From 72dcc1194543466ca3119538d11a9422d40ba953 Mon Sep 17 00:00:00 2001 From: Luke Hubmayer-Werner Date: Tue, 16 Jul 2024 20:26:18 +0930 Subject: [PATCH] [BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" --- shaders/audio_renderer.gdshader | 170 ++++++++++++++++---------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/shaders/audio_renderer.gdshader b/shaders/audio_renderer.gdshader index 62e079f..ea5602a 100644 --- a/shaders/audio_renderer.gdshader +++ b/shaders/audio_renderer.gdshader @@ -3,66 +3,66 @@ // Unfortunately, this loses type-checking on [0.0, 1.0] vs [0,255] etc. so a lot of this will involve comments declaring ranges. shader_type canvas_item; render_mode blend_premul_alpha; -uniform sampler2D instrument_samples; -uniform vec2 instrument_samples_size = vec2(2048.0, 128.0); -uniform int INT_OUTPUT_WIDTH = 4096; -uniform vec2 OUTPUT_FRAMEBUFFER_SIZE = vec2(4096.0, 4096.0); -uniform float reference_note = 71.0; // [0, 255], possibly [0, 127] -uniform float output_mixrate = 32000.0; // SNES SPC output is 32kHz -uniform vec2 midi_events_size = vec2(2048.0, 32.0); -uniform int tempo_scale_thousandths = 1000; -const int TEMPO_SCALE_MULTIPLIER = 1000; +uniform highp sampler2D instrument_samples; +uniform highp vec2 instrument_samples_size = vec2(2048.0, 128.0); +uniform highp int INT_OUTPUT_WIDTH = 4096; +uniform highp vec2 OUTPUT_FRAMEBUFFER_SIZE = vec2(4096.0, 4096.0); +uniform highp float reference_note = 71.0; // [0, 255], possibly [0, 127] +uniform highp float output_mixrate = 32000.0; // SNES SPC output is 32kHz +uniform highp vec2 midi_events_size = vec2(2048.0, 32.0); +uniform highp int tempo_scale_thousandths = 1000; +const highp int TEMPO_SCALE_MULTIPLIER = 1000; // I feel like these magic numbers are a bit more intuitive in hex -const float x00FF = float(0x00FF); // 255.0 -const float x0100 = float(0x0100); // 256.0 -const float x7FFF = float(0x7FFF); // 32767.0 -const float x8000 = float(0x8000); // 32768.0 -const float xFF00 = float(0xFF00); // 65280.0 -const float xFFFF = float(0xFFFF); // 65535.0 -const float x10000 = float(0x10000); // 65536.0 -const float x00FF0000 = float(0x00FF0000); -const float xFF000000 = float(0xFF000000); +const highp float x00FF = float(0x00FF); // 255.0 +const highp float x0100 = float(0x0100); // 256.0 +const highp float x7FFF = float(0x7FFF); // 32767.0 +const highp float x8000 = float(0x8000); // 32768.0 +const highp float xFF00 = float(0xFF00); // 65280.0 +const highp float xFFFF = float(0xFFFF); // 65535.0 +const highp float x10000 = float(0x10000); // 65536.0 +const highp float x00FF0000 = float(0x00FF0000); +const highp float xFF000000 = float(0xFF000000); -const vec2 INT16_DOT_BE = vec2(xFF00, x00FF); -const vec2 INT16_DOT_LE = vec2(x00FF, xFF00); -const vec4 INT32_DOT_LE = vec4(x00FF, xFF00, x00FF0000, xFF000000); +const highp vec2 INT16_DOT_BE = vec2(xFF00, x00FF); +const highp vec2 INT16_DOT_LE = vec2(x00FF, xFF00); +const highp vec4 INT32_DOT_LE = vec4(x00FF, xFF00, x00FF0000, xFF000000); -float unpack_uint16(vec2 uint16) { +highp float unpack_uint16(highp vec2 uint16) { // Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [0, 65535] in float32 return dot(uint16, INT16_DOT_LE); } -float unpack_uint32_to_float(vec4 uint32) { +highp float unpack_uint32_to_float(highp vec4 uint32) { // Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value [0, 0xFFFFFFFF] in float32 // NOTE: THIS WILL LOSE PRECISION ON NUMBERS ABOVE 24BIT SIGNIFICANCE // I CAN'T EVEN GUARANTEE THE 0xFF000000 CONSTANT WILL SURVIVE ROUNDING return dot(uint32, INT32_DOT_LE); } -int unpack_int32(vec4 int32) { +highp int unpack_int32(highp vec4 int32) { // Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value // return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) << 16); return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) * 0x10000); } -float unpack_int16(vec2 int16) { +highp float unpack_int16(highp vec2 int16) { // Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [-32768, 32767] in float32 - float unsigned = dot(int16, INT16_DOT_LE); + highp float unsigned = dot(int16, INT16_DOT_LE); return unsigned - (unsigned < x7FFF ? 0.0 : x10000); } -float rescale_int16(float int16) { +highp float rescale_int16(highp float int16) { // Rescale from [-32768, 32767] to [-1.0, 1.0) return int16 / x8000; } -vec2 pack_float_to_int16(float value) { +highp vec2 pack_float_to_int16(highp float value) { // Convert a float in range [-1.0, 1.0) to a signed 2byte integer [-32768, 32767] packed into two [0.0, 1.0] floats - float scaled = value * x8000; - float unsigned = scaled + (scaled < 0.0 ? x10000 : 0.0); - float unsigned_div_256 = unsigned / x0100; - float MSB = trunc(unsigned_div_256) / x00FF; - float LSB = fract(unsigned_div_256) * x0100 / x00FF; + highp float scaled = value * x8000; + highp float unsigned = scaled + (scaled < 0.0 ? x10000 : 0.0); + highp float unsigned_div_256 = unsigned / x0100; + highp float MSB = trunc(unsigned_div_256) / x00FF; + highp float LSB = fract(unsigned_div_256) * x0100 / x00FF; return vec2(LSB, MSB); } @@ -100,50 +100,50 @@ vec2 pack_float_to_int16(float value) { // With the 258 texel header, which uses 3 texels of margin, 255 would be subtracted from the above payload, // leaving 261121 texels for the sample data. -const float HEADER_LENGTH_TEXELS = 5.0; -const int INSTRUMENT_SAMPLES_WIDTH = 2048; -float sinc(float x) { +const highp float HEADER_LENGTH_TEXELS = 5.0; +const highp int INSTRUMENT_SAMPLES_WIDTH = 2048; +highp float sinc(highp float x) { x = abs(x) + 0.00000000000001; // Avoid division by zero return min(sin(x)/x, 1.0); } -float get_pitch_scale(float note) { +highp float get_pitch_scale(highp float note) { return exp2((note - reference_note)/12.0); } -vec2 get_inst_texel(vec2 xy) { +highp vec2 get_inst_texel(highp vec2 xy) { return texture(instrument_samples, (xy+0.5)/instrument_samples_size).xw; } -float get_inst_texel_int16(int smp) { - int x = smp % INSTRUMENT_SAMPLES_WIDTH; - int y = smp / INSTRUMENT_SAMPLES_WIDTH; +highp float get_inst_texel_int16(highp int smp) { + highp int x = smp % INSTRUMENT_SAMPLES_WIDTH; + highp int y = smp / INSTRUMENT_SAMPLES_WIDTH; return unpack_int16(texture(instrument_samples, (vec2(float(x), float(y)) + 0.5)/instrument_samples_size).xw); } -float get_instrument_sample(float instrument_index, float note, float t) { - float header_offset = instrument_index * HEADER_LENGTH_TEXELS; - int smp_start = unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0)))); // The true start, after the prepended frames of silence - float smp_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0))); // padded past the true loop point for filter - float smp_loop_length = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0))); - float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 4.0, 0.0))); +highp float get_instrument_sample(highp float instrument_index, highp float note, highp float t) { + highp float header_offset = instrument_index * HEADER_LENGTH_TEXELS; + highp int smp_start = unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0)))); // The true start, after the prepended frames of silence + highp float smp_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0))); // padded past the true loop point for filter + highp float smp_loop_length = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0))); + highp float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 4.0, 0.0))); // Calculate the point we want to sample in linear space - float mixrate = sample_mixrate * get_pitch_scale(note); - float smp_t = t * mixrate; + highp float mixrate = sample_mixrate * get_pitch_scale(note); + highp float smp_t = t * mixrate; // If we're past the end of the sample, we need to wrap it back to within the loop range - float overshoot = max(smp_t - smp_loop_begin, 0.0); + highp float overshoot = max(smp_t - smp_loop_begin, 0.0); smp_t -= floor(overshoot/smp_loop_length) * smp_loop_length; // if (smp_t > smp_loop_begin) { // // return 0.0; // smp_t = mod(smp_t - smp_loop_begin, smp_loop_length) + smp_loop_begin; // } - int smp_window_start = smp_start + int(smp_t) - 6; - float smp_rel_filter_target = fract(smp_t) + 6.0; - float output = 0.0; + highp int smp_window_start = smp_start + int(smp_t) - 6; + highp float smp_rel_filter_target = fract(smp_t) + 6.0; + highp float output = 0.0; for (int i = 0; i < 12; i++) { - int smp_filter = smp_window_start + i; - float s = get_inst_texel_int16(smp_filter); + highp int smp_filter = smp_window_start + i; + highp float s = get_inst_texel_int16(smp_filter); // TODO: determine proper value for this. Might be based on instrument base mixrate. output += s * sinc((smp_rel_filter_target - float(i)) * 3.1); } @@ -153,60 +153,60 @@ float get_instrument_sample(float instrument_index, float note, float t) { } const int NUM_CHANNELS = 8; -const int MAX_CHANNEL_NOTE_EVENTS = 2048; +const highp int MAX_CHANNEL_NOTE_EVENTS = 2048; const int NUM_CHANNEL_NOTE_PROBES = 11; // log2(MAX_CHANNEL_NOTE_EVENTS) -vec4 get_midi_texel(sampler2D tex, float x, float y) { +highp vec4 get_midi_texel(highp sampler2D tex, highp float x, highp float y) { return texture(tex, vec2(x, y)/midi_events_size).xyzw; } -int retime_smp(int smp) { +highp int retime_smp(highp int smp) { // Overflow safety is important as our input values can go up to 2^24, and we multiply by around 2^10 - int factor = smp / tempo_scale_thousandths; - int residue = smp % tempo_scale_thousandths; - int a = (residue * TEMPO_SCALE_MULTIPLIER) / tempo_scale_thousandths; - int b = factor * TEMPO_SCALE_MULTIPLIER; + highp int factor = smp / tempo_scale_thousandths; + highp int residue = smp % tempo_scale_thousandths; + highp int a = (residue * TEMPO_SCALE_MULTIPLIER) / tempo_scale_thousandths; + highp int b = factor * TEMPO_SCALE_MULTIPLIER; return a + b; } -vec4 render_song(sampler2D tex, int smp) { +highp vec4 render_song(highp sampler2D tex, highp int smp) { // Each output texel rendered is a stereo S16LE frame representing 1/32000 of a second // 2048 is an established safe texture dimension so may as well go 2048 wide - float t = float(smp)/output_mixrate; - vec2 downmixed_stereo = vec2(0.0); + highp float t = float(smp)/output_mixrate; + highp vec2 downmixed_stereo = vec2(0.0); // Binary search the channels for (int channel = 0; channel < NUM_CHANNELS; channel++) { - float row = float(channel * 4); - float event_idx = 0.0; - int smp_start; + highp float row = float(channel * 4); + highp float event_idx = 0.0; + highp int smp_start; for (int i = 0; i < NUM_CHANNEL_NOTE_PROBES; i++) { - float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1)); + highp float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1)); smp_start = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx + step_size, row)))); event_idx += (smp >= smp_start) ? step_size : 0.0; } smp_start = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row)))); - int smp_end = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row+1.0)))); + highp int smp_end = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row+1.0)))); - vec4 note_event_supplement = get_midi_texel(tex, event_idx, row+2.0); // left as [0.0, 1.0] - float instrument_idx = trunc(note_event_supplement.x * 255.0); - float pitch_idx = note_event_supplement.y * 255.0; - float velocity = note_event_supplement.z; - float pan = note_event_supplement.w; - vec4 adsr = get_midi_texel(tex, event_idx, row+3.0); // left as [0.0, 1.0] + highp vec4 note_event_supplement = get_midi_texel(tex, event_idx, row+2.0); // left as [0.0, 1.0] + highp float instrument_idx = trunc(note_event_supplement.x * 255.0); + highp float pitch_idx = note_event_supplement.y * 255.0; + highp float velocity = note_event_supplement.z; + highp float pan = note_event_supplement.w; + highp vec4 adsr = get_midi_texel(tex, event_idx, row+3.0); // left as [0.0, 1.0] // ====================At some point I'll look back into packing floats==================== // TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain // ====================At some point I'll look back into packing floats==================== - float attack = 1.0 + adsr.x*255.0; //65535.0 + 1.0; // TODO: work out effective resolution for this - int smp_attack = int(attack) * 2; // Max value is 131072 samples = 4.096 seconds + highp float attack = 1.0 + adsr.x*255.0; //65535.0 + 1.0; // TODO: work out effective resolution for this + highp int smp_attack = int(attack) * 2; // Max value is 131072 samples = 4.096 seconds // For now, just branch this if (smp_start < smp) { // First sample may not start at zero! - int smp_overrun = smp - smp_end; // 256 samples of linear decay to 0 after note_off + highp int smp_overrun = smp - smp_end; // 256 samples of linear decay to 0 after note_off smp_overrun = (smp_overrun < 0) ? 0 : smp_overrun; if (smp_overrun < 256) { - float t_start = float(smp_start)/output_mixrate; - float attack_factor = min(float(smp - smp_start)/float(smp_attack), 1.0); - float release_factor = float(255-smp_overrun)/255.0; // 256 samples of linear decay to 0 after note_off - float samp = get_instrument_sample(instrument_idx, pitch_idx, t-t_start); + highp float t_start = float(smp_start)/output_mixrate; + highp float attack_factor = min(float(smp - smp_start)/float(smp_attack), 1.0); + highp float release_factor = float(255-smp_overrun)/255.0; // 256 samples of linear decay to 0 after note_off + highp float samp = get_instrument_sample(instrument_idx, pitch_idx, t-t_start); samp *= velocity * attack_factor * release_factor; // TODO: proper decay and sustain, revisit release downmixed_stereo += samp * vec2(pan, 1.0-pan) * 0.5; // TODO: double it to maintain the mono level on each channel at center=0.5? @@ -219,9 +219,9 @@ vec4 render_song(sampler2D tex, int smp) { void fragment() { // GLES2 - vec2 uv = vec2(UV.x, 1.0-UV.y); + highp vec2 uv = vec2(UV.x, 1.0-UV.y); // uv = (trunc(uv*OUTPUT_FRAMEBUFFER_SIZE)+0.5)/OUTPUT_FRAMEBUFFER_SIZE; // COLOR.xyzw = test_writeback(TEXTURE, uv); - ivec2 xy = ivec2(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE)); + highp ivec2 xy = ivec2(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE)); COLOR.xyzw = render_song(TEXTURE, xy.x + (xy.y*INT_OUTPUT_WIDTH)); }