From e077c4e0365aac33d7d0a0cbae87144f3ebb9a97 Mon Sep 17 00:00:00 2001 From: Luke Hubmayer-Werner Date: Fri, 12 Jul 2024 01:20:15 +0930 Subject: [PATCH] [WIP] Audio shader... converting stuff to packed ints because floats are hitting gles2 precision limits --- shaders/audio_renderer.gdshader | 76 +++++++++++++++++++-------------- test/audio_renderer.gd | 4 +- test/audio_system.gd | 38 +++++------------ 3 files changed, 57 insertions(+), 61 deletions(-) diff --git a/shaders/audio_renderer.gdshader b/shaders/audio_renderer.gdshader index ba349d0..02eeef2 100644 --- a/shaders/audio_renderer.gdshader +++ b/shaders/audio_renderer.gdshader @@ -3,6 +3,7 @@ // Unfortunately, this loses type-checking on [0.0, 1.0] vs [0,255] etc. so a lot of this will involve comments declaring ranges. shader_type canvas_item; render_mode blend_premul_alpha; +const int INT_TEX_SIZE = 4096; const float TEX_SIZE = 4096.0; const float UV_QUANTIZE = TEX_SIZE; // I feel like these magic numbers are a bit more intuitive in hex @@ -13,15 +14,31 @@ const float x8000 = float(0x8000); // 32768.0 const float xFF00 = float(0xFF00); // 65280.0 const float xFFFF = float(0xFFFF); // 65535.0 const float x10000 = float(0x10000); // 65536.0 +const float x00FF0000 = float(0x00FF0000); +const float xFF000000 = float(0xFF000000); const vec2 INT16_DOT_BE = vec2(xFF00, x00FF); const vec2 INT16_DOT_LE = vec2(x00FF, xFF00); +const vec4 INT32_DOT_LE = vec4(x00FF, xFF00, x00FF0000, xFF000000); float unpack_uint16(vec2 uint16) { // Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [0, 65535] in float32 return dot(uint16, INT16_DOT_LE); } +float unpack_uint32_to_float(vec4 uint32) { + // Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value [0, 0xFFFFFFFF] in float32 + // NOTE: THIS WILL LOSE PRECISION ON NUMBERS ABOVE 24BIT SIGNIFICANCE + // I CAN'T EVEN GUARANTEE THE 0xFF000000 CONSTANT WILL SURVIVE ROUNDING + return dot(uint32, INT32_DOT_LE); +} + +int unpack_int32(vec4 int32) { + // Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value + // return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) << 16); + return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) * 0x10000); +} + float unpack_int16(vec2 int16) { // Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [-32768, 32767] in float32 float unsigned = dot(int16, INT16_DOT_LE); @@ -126,12 +143,8 @@ float get_instrument_sample(float instrument_index, float pitch_scale, float t, const int NUM_CHANNELS = 8; const int MAX_CHANNEL_NOTE_EVENTS = 2048; const int NUM_CHANNEL_NOTE_PROBES = 11; // log2(MAX_CHANNEL_NOTE_EVENTS) -uniform sampler2D midi_events; +uniform sampler2D midi_events : hint_normal; uniform vec2 midi_events_size = vec2(2048.0, 16.0); -// SDR rendering only gives us [0.0, 1.0] from the sampler2D so we need to rescale it. -uniform float sdr_scale = 128.0; //1024.0; -// uniform float t_scale = 524.0; // Change this if we need longer than 8min44sec. -// ^ Other things will also need changing, since 4096x4096 = 8MSamples is barely over 524 seconds at 32kHz. vec4 get_midi_texel(float x, float y) { return texture(midi_events, vec2(x, y)/midi_events_size).xyzw; } @@ -142,46 +155,40 @@ vec2 unpack_float(float f) { float y = fract(a) * 1024.0 / 1023.0; return vec2(x, y); } -vec4 render_song(float sample_progress) { - // Each texel rendered is a stereo S16LE frame representing 1/32000 of a second - // BGM sequences should be relatively small so it should be fine to use RGBAF (4x f32s per texel) as our data texture +vec4 render_song(int smp) { + // Each output texel rendered is a stereo S16LE frame representing 1/32000 of a second // 2048 is an established safe texture dimension so may as well go 2048 wide - float t = sample_progress/output_mixrate; + + float t = float(smp)/output_mixrate; vec2 downmixed_stereo = vec2(0.0); // Binary search the channels for (int channel = 0; channel < 1; channel++) { // for (int channel = 0; channel < NUM_CHANNELS; channel++) { - float row = float(channel * 2); + float row = float(channel * 4); float event_idx = 0.0; + int smp_start; for (int i = 0; i < NUM_CHANNEL_NOTE_PROBES; i++) { float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1)); - vec4 note_event = get_midi_texel(event_idx + step_size, row) * sdr_scale; - float t_start = note_event.x; - event_idx += (t >= t_start) ? step_size : 0.0; + smp_start = int(unpack_int32(get_midi_texel(event_idx + step_size, row))); + event_idx += (smp >= smp_start) ? step_size : 0.0; } - vec4 note_event = get_midi_texel(event_idx, row) * sdr_scale; // scaled to [0.0, 1024.0] - vec4 note_event_supplement = get_midi_texel(event_idx, row+1.0); // left as [0.0, 1.0] - float t_start = note_event.x; - float t_end = note_event.y; + smp_start = int(unpack_int32(get_midi_texel(event_idx, row))); + int smp_end = int(unpack_int32(get_midi_texel(event_idx, row+1.0))); + vec4 note_event_supplement = get_midi_texel(event_idx, row+2.0); // left as [0.0, 1.0] + float instrument_idx = note_event_supplement.x * 255.0; + float pitch_idx = note_event_supplement.y * 255.0; + float velocity = note_event_supplement.z; + float pan = note_event_supplement.w; + vec4 adsr = get_midi_texel(event_idx, row+3.0); // left as [0.0, 1.0] // ====================At some point I'll look back into packing floats==================== - // vec2 instrument_and_pitch = unpack_float(note_event.z); - // float instrument_idx = instrument_and_pitch.x * 1023.0; - // float pitch_idx = instrument_and_pitch.y * 1023.0; // TODO: Maybe rescale this for fine tuning? Don't use it raw because 2^(127-71) is MASSIVE, keep the power-of-2 calcs in shader. - // vec2 velocity_and_pan = unpack_float(note_event.w); // Can leave these as [0.0, 1.0] and then mix appropriately - // float velocity = velocity_and_pan.x; - // float pan = velocity_and_pan.y; - // vec2 attack_and_decay = unpack_float(note_event_supplement.x); - // vec2 sustain_and_release = unpack_float(note_event_supplement.y); // TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain // ====================At some point I'll look back into packing floats==================== - float instrument_idx = note_event.z; - float pitch_idx = note_event.w; - float velocity = note_event_supplement.x; - float pan = note_event_supplement.y; // For now, just branch this - if (t < t_end) { + if (smp < smp_end) { + float t_start = float(smp_start)/output_mixrate; + float t_end = float(smp_end)/output_mixrate; float samp = get_instrument_sample(instrument_idx, get_pitch_scale(pitch_idx), t-t_start, t_end-t_start); samp *= velocity; // TODO: do some ADSR here? @@ -190,7 +197,11 @@ vec4 render_song(float sample_progress) { } // Convert the stereo float audio to S16LE // return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(downmixed_stereo.y)); - return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(mod(t, 2.0) - 1.0)); + // return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(mod(t, 2.0) - 1.0)); + vec2 isuv = vec2(mod(float(smp), instrument_samples_size.x), trunc(float(smp)/instrument_samples_size.x))/instrument_samples_size; + // float ins = rescale_int16(unpack_int16(texture(instrument_samples, isuv).xw)); + // return vec4(pack_float_to_int16(ins), pack_float_to_int16(mod(t, 2.0) - 1.0)); + return vec4(texture(instrument_samples, isuv).xw, pack_float_to_int16(mod(t, 2.0) - 1.0)); // return vec4(pack_float_to_int16((t/10.0) - 1.0), pack_float_to_int16(mod(t, 2.0) - 1.0)); } @@ -199,7 +210,8 @@ void fragment() { vec2 uv = vec2(UV.x, 1.0-UV.y); // uv = (trunc(uv*UV_QUANTIZE)+0.5)/UV_QUANTIZE; // COLOR.xyzw = test_writeback(TEXTURE, uv); - COLOR.xyzw = render_song(dot(trunc(uv*TEX_SIZE), vec2(1.0, TEX_SIZE))); + ivec2 xy = ivec2(trunc(uv*TEX_SIZE)); + COLOR.xyzw = render_song(xy.x + (xy.y*INT_TEX_SIZE)); } // const int MAX_TEMPO_EVENTS = 256; diff --git a/test/audio_renderer.gd b/test/audio_renderer.gd index 603929a..713a49c 100644 --- a/test/audio_renderer.gd +++ b/test/audio_renderer.gd @@ -1,8 +1,8 @@ extends Control const INPUT_TEX_WIDTH := 2048 -const INPUT_FORMAT := Image.FORMAT_RGBAF # Image.FORMAT_LA8 -const INPUT_BYTES_PER_TEXEL := 16 # 2 +const INPUT_FORMAT := Image.FORMAT_RGBA8 # Image.FORMAT_LA8 +const INPUT_BYTES_PER_TEXEL := 4 # 2 const OUTPUT_WIDTH := 4096 var viewport: Viewport var render_queue: Array # of Images diff --git a/test/audio_system.gd b/test/audio_system.gd index 9c0d289..3d6b063 100644 --- a/test/audio_system.gd +++ b/test/audio_system.gd @@ -135,35 +135,19 @@ func test_rendering() -> void: var midi_events_bytes := StreamPeerBuffer.new() var midi_events_bytes2 := StreamPeerBuffer.new() - var divisor = 128.0 #1024.0 # See sdr_scale in audio_renderer.gdshader + var midi_events_bytes3 := StreamPeerBuffer.new() + var midi_events_bytes4 := StreamPeerBuffer.new() for i in 2048: var t = i * 10.0 - midi_events_bytes.put_float(t/divisor) # t_start - midi_events_bytes.put_float((t+3.0)/divisor) # t_end - # Try repacking these later - midi_events_bytes.put_float((i%35)/divisor) # instrument - midi_events_bytes.put_float(71/divisor) # pitch_idx - # midi_events_bytes.put_float((35 + (i%40))/divisor) # pitch_idx - midi_events_bytes2.put_float(1.0) # velocity - midi_events_bytes2.put_float((i%101)/100.0) # pan - midi_events_bytes2.put_float(0.0) # TBD - midi_events_bytes2.put_float(0.0) # TBD - # midi_events_bytes.put_float(((i%35) + 71/1024.0)/1023.0) # instrument_and_pitch - # midi_events_bytes.put_float((1.0 + (i*4)/1024.0)/1023.0) # velocity_and_pan - # midi_events_bytes2.put_float(0.0) # attack_and_decay - # midi_events_bytes2.put_float(0.0) # sustain_and_relase - # midi_events_bytes2.put_float(0.0) # TBD - # midi_events_bytes2.put_float(0.0) # TBD - # for i in 2048-256: - # midi_events_bytes.put_float(0.0) - # midi_events_bytes.put_float(0.0) - # midi_events_bytes.put_float(0.0) - # midi_events_bytes.put_float(0.0) - # midi_events_bytes2.put_float(0.0) - # midi_events_bytes2.put_float(0.0) - # midi_events_bytes2.put_float(0.0) - # midi_events_bytes2.put_float(0.0) - var channel_data = midi_events_bytes.data_array + midi_events_bytes2.data_array + midi_events_bytes.put_32(t*32000) # t_start + midi_events_bytes2.put_32((t+3.0)*32000) # t_end + midi_events_bytes3.put_u8((i%35)) # instrument + midi_events_bytes3.put_u8(71) # pitch_idx + # midi_events_bytes.put_float((35 + (i%40))) # pitch_idx + midi_events_bytes3.put_u8(255) # velocity + midi_events_bytes3.put_u8(i%256) # pan + midi_events_bytes4.put_32(0) # ADSR + var channel_data = midi_events_bytes.data_array + midi_events_bytes2.data_array + midi_events_bytes3.data_array + midi_events_bytes4.data_array audio_renderer.push_bytes(channel_data) # + channel_data + channel_data + channel_data + channel_data + channel_data + channel_data + channel_data) # var test_payload := PoolByteArray()