From 72dcc1194543466ca3119538d11a9422d40ba953 Mon Sep 17 00:00:00 2001
From: Luke Hubmayer-Werner <mongoose41@gmail.com>
Date: Tue, 16 Jul 2024 20:26:18 +0930
Subject: [PATCH] [BGM] slap highp everywhere to ward off destructive gpu
 driver "optimisations"

---
 shaders/audio_renderer.gdshader | 170 ++++++++++++++++----------------
 1 file changed, 85 insertions(+), 85 deletions(-)

diff --git a/shaders/audio_renderer.gdshader b/shaders/audio_renderer.gdshader
index 62e079f..ea5602a 100644
--- a/shaders/audio_renderer.gdshader
+++ b/shaders/audio_renderer.gdshader
@@ -3,66 +3,66 @@
 // Unfortunately, this loses type-checking on [0.0, 1.0] vs [0,255] etc. so a lot of this will involve comments declaring ranges.
 shader_type canvas_item;
 render_mode blend_premul_alpha;
-uniform sampler2D instrument_samples;
-uniform vec2 instrument_samples_size = vec2(2048.0, 128.0);
-uniform int INT_OUTPUT_WIDTH = 4096;
-uniform vec2 OUTPUT_FRAMEBUFFER_SIZE = vec2(4096.0, 4096.0);
-uniform float reference_note = 71.0;  // [0, 255], possibly [0, 127]
-uniform float output_mixrate = 32000.0;  // SNES SPC output is 32kHz
-uniform vec2 midi_events_size = vec2(2048.0, 32.0);
-uniform int tempo_scale_thousandths = 1000;
-const int TEMPO_SCALE_MULTIPLIER = 1000;
+uniform highp sampler2D instrument_samples;
+uniform highp vec2 instrument_samples_size = vec2(2048.0, 128.0);
+uniform highp int INT_OUTPUT_WIDTH = 4096;
+uniform highp vec2 OUTPUT_FRAMEBUFFER_SIZE = vec2(4096.0, 4096.0);
+uniform highp float reference_note = 71.0;  // [0, 255], possibly [0, 127]
+uniform highp float output_mixrate = 32000.0;  // SNES SPC output is 32kHz
+uniform highp vec2 midi_events_size = vec2(2048.0, 32.0);
+uniform highp int tempo_scale_thousandths = 1000;
+const highp int TEMPO_SCALE_MULTIPLIER = 1000;
 // I feel like these magic numbers are a bit more intuitive in hex
-const float x00FF  = float(0x00FF);   //   255.0
-const float x0100  = float(0x0100);   //   256.0
-const float x7FFF  = float(0x7FFF);   // 32767.0
-const float x8000  = float(0x8000);   // 32768.0
-const float xFF00  = float(0xFF00);   // 65280.0
-const float xFFFF  = float(0xFFFF);   // 65535.0
-const float x10000 = float(0x10000);  // 65536.0
-const float x00FF0000  = float(0x00FF0000);
-const float xFF000000  = float(0xFF000000);
+const highp float x00FF  = float(0x00FF);   //   255.0
+const highp float x0100  = float(0x0100);   //   256.0
+const highp float x7FFF  = float(0x7FFF);   // 32767.0
+const highp float x8000  = float(0x8000);   // 32768.0
+const highp float xFF00  = float(0xFF00);   // 65280.0
+const highp float xFFFF  = float(0xFFFF);   // 65535.0
+const highp float x10000 = float(0x10000);  // 65536.0
+const highp float x00FF0000  = float(0x00FF0000);
+const highp float xFF000000  = float(0xFF000000);
 
-const vec2 INT16_DOT_BE = vec2(xFF00, x00FF);
-const vec2 INT16_DOT_LE = vec2(x00FF, xFF00);
-const vec4 INT32_DOT_LE = vec4(x00FF, xFF00, x00FF0000, xFF000000);
+const highp vec2 INT16_DOT_BE = vec2(xFF00, x00FF);
+const highp vec2 INT16_DOT_LE = vec2(x00FF, xFF00);
+const highp vec4 INT32_DOT_LE = vec4(x00FF, xFF00, x00FF0000, xFF000000);
 
-float unpack_uint16(vec2 uint16) {
+highp float unpack_uint16(highp vec2 uint16) {
 	// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [0, 65535] in float32
 	return dot(uint16, INT16_DOT_LE);
 }
 
-float unpack_uint32_to_float(vec4 uint32) {
+highp float unpack_uint32_to_float(highp vec4 uint32) {
 	// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value [0, 0xFFFFFFFF] in float32
 	// NOTE: THIS WILL LOSE PRECISION ON NUMBERS ABOVE 24BIT SIGNIFICANCE
 	// I CAN'T EVEN GUARANTEE THE 0xFF000000 CONSTANT WILL SURVIVE ROUNDING
 	return dot(uint32, INT32_DOT_LE);
 }
 
-int unpack_int32(vec4 int32) {
+highp int unpack_int32(highp vec4 int32) {
 	// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value
 	// return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) << 16);
 	return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) * 0x10000);
 }
 
-float unpack_int16(vec2 int16) {
+highp float unpack_int16(highp vec2 int16) {
 	// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [-32768, 32767] in float32
-	float unsigned = dot(int16, INT16_DOT_LE);
+	highp float unsigned = dot(int16, INT16_DOT_LE);
 	return unsigned - (unsigned < x7FFF ? 0.0 : x10000);
 }
 
-float rescale_int16(float int16) {
+highp float rescale_int16(highp float int16) {
 	// Rescale from [-32768, 32767] to [-1.0, 1.0)
 	return int16 / x8000;
 }
 
-vec2 pack_float_to_int16(float value) {
+highp vec2 pack_float_to_int16(highp float value) {
 	// Convert a float in range [-1.0, 1.0) to a signed 2byte integer [-32768, 32767] packed into two [0.0, 1.0] floats
-	float scaled = value * x8000;
-	float unsigned = scaled + (scaled < 0.0 ? x10000 : 0.0);
-	float unsigned_div_256 = unsigned / x0100;
-	float MSB = trunc(unsigned_div_256)         / x00FF;
-	float LSB = fract(unsigned_div_256) * x0100 / x00FF;
+	highp float scaled = value * x8000;
+	highp float unsigned = scaled + (scaled < 0.0 ? x10000 : 0.0);
+	highp float unsigned_div_256 = unsigned / x0100;
+	highp float MSB = trunc(unsigned_div_256)         / x00FF;
+	highp float LSB = fract(unsigned_div_256) * x0100 / x00FF;
 	return vec2(LSB, MSB);
 }
 
@@ -100,50 +100,50 @@ vec2 pack_float_to_int16(float value) {
 // With the 258 texel header, which uses 3 texels of margin, 255 would be subtracted from the above payload,
 //   leaving 261121 texels for the sample data.
 
-const float HEADER_LENGTH_TEXELS = 5.0;
-const int INSTRUMENT_SAMPLES_WIDTH = 2048;
-float sinc(float x) {
+const highp float HEADER_LENGTH_TEXELS = 5.0;
+const highp int INSTRUMENT_SAMPLES_WIDTH = 2048;
+highp float sinc(highp float x) {
 	x = abs(x) + 0.00000000000001;  // Avoid division by zero
 	return min(sin(x)/x, 1.0);
 }
 
-float get_pitch_scale(float note) {
+highp float get_pitch_scale(highp float note) {
 	return exp2((note - reference_note)/12.0);
 }
 
-vec2 get_inst_texel(vec2 xy) {
+highp vec2 get_inst_texel(highp vec2 xy) {
 	return texture(instrument_samples, (xy+0.5)/instrument_samples_size).xw;
 }
 
-float get_inst_texel_int16(int smp) {
-	int x = smp % INSTRUMENT_SAMPLES_WIDTH;
-	int y = smp / INSTRUMENT_SAMPLES_WIDTH;
+highp float get_inst_texel_int16(highp int smp) {
+	highp int x = smp % INSTRUMENT_SAMPLES_WIDTH;
+	highp int y = smp / INSTRUMENT_SAMPLES_WIDTH;
 	return unpack_int16(texture(instrument_samples, (vec2(float(x), float(y)) + 0.5)/instrument_samples_size).xw);
 }
 
-float get_instrument_sample(float instrument_index, float note, float t) {
-	float header_offset = instrument_index * HEADER_LENGTH_TEXELS;
-	int smp_start = unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0))));  // The true start, after the prepended frames of silence
-	float smp_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0)));  // padded past the true loop point for filter
-	float smp_loop_length = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0)));
-	float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 4.0, 0.0)));
+highp float get_instrument_sample(highp float instrument_index, highp float note, highp float t) {
+	highp float header_offset = instrument_index * HEADER_LENGTH_TEXELS;
+	highp int smp_start = unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0))));  // The true start, after the prepended frames of silence
+	highp float smp_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0)));  // padded past the true loop point for filter
+	highp float smp_loop_length = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0)));
+	highp float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 4.0, 0.0)));
 	// Calculate the point we want to sample in linear space
-	float mixrate = sample_mixrate * get_pitch_scale(note);
-	float smp_t = t * mixrate;
+	highp float mixrate = sample_mixrate * get_pitch_scale(note);
+	highp float smp_t = t * mixrate;
 	// If we're past the end of the sample, we need to wrap it back to within the loop range
-	float overshoot = max(smp_t - smp_loop_begin, 0.0);
+	highp float overshoot = max(smp_t - smp_loop_begin, 0.0);
 	smp_t -= floor(overshoot/smp_loop_length) * smp_loop_length;
 	// if (smp_t > smp_loop_begin) {
 	// 	// return 0.0;
 	// 	smp_t = mod(smp_t - smp_loop_begin, smp_loop_length) + smp_loop_begin;
 	// }
 
-	int smp_window_start = smp_start + int(smp_t) - 6;
-	float smp_rel_filter_target = fract(smp_t) + 6.0;
-	float output = 0.0;
+	highp int smp_window_start = smp_start + int(smp_t) - 6;
+	highp float smp_rel_filter_target = fract(smp_t) + 6.0;
+	highp float output = 0.0;
 	for (int i = 0; i < 12; i++) {
-		int smp_filter = smp_window_start + i;
-		float s = get_inst_texel_int16(smp_filter);
+		highp int smp_filter = smp_window_start + i;
+		highp float s = get_inst_texel_int16(smp_filter);
 		// TODO: determine proper value for this. Might be based on instrument base mixrate.
 		output += s * sinc((smp_rel_filter_target - float(i)) * 3.1);
 	}
@@ -153,60 +153,60 @@ float get_instrument_sample(float instrument_index, float note, float t) {
 }
 
 const int NUM_CHANNELS = 8;
-const int MAX_CHANNEL_NOTE_EVENTS = 2048;
+const highp int MAX_CHANNEL_NOTE_EVENTS = 2048;
 const int NUM_CHANNEL_NOTE_PROBES = 11;  // log2(MAX_CHANNEL_NOTE_EVENTS)
-vec4 get_midi_texel(sampler2D tex, float x, float y) {
+highp vec4 get_midi_texel(highp sampler2D tex, highp float x, highp float y) {
 	return texture(tex, vec2(x, y)/midi_events_size).xyzw;
 }
-int retime_smp(int smp) {
+highp int retime_smp(highp int smp) {
 	// Overflow safety is important as our input values can go up to 2^24, and we multiply by around 2^10
-	int factor = smp / tempo_scale_thousandths;
-	int residue = smp % tempo_scale_thousandths;
-	int a = (residue * TEMPO_SCALE_MULTIPLIER) / tempo_scale_thousandths;
-	int b = factor * TEMPO_SCALE_MULTIPLIER;
+	highp int factor = smp / tempo_scale_thousandths;
+	highp int residue = smp % tempo_scale_thousandths;
+	highp int a = (residue * TEMPO_SCALE_MULTIPLIER) / tempo_scale_thousandths;
+	highp int b = factor * TEMPO_SCALE_MULTIPLIER;
 	return a + b;
 }
-vec4 render_song(sampler2D tex, int smp) {
+highp vec4 render_song(highp sampler2D tex, highp int smp) {
 	// Each output texel rendered is a stereo S16LE frame representing 1/32000 of a second
 	// 2048 is an established safe texture dimension so may as well go 2048 wide
 
-	float t = float(smp)/output_mixrate;
-	vec2 downmixed_stereo = vec2(0.0);
+	highp float t = float(smp)/output_mixrate;
+	highp vec2 downmixed_stereo = vec2(0.0);
 
 	// Binary search the channels
 	for (int channel = 0; channel < NUM_CHANNELS; channel++) {
-		float row = float(channel * 4);
-		float event_idx = 0.0;
-		int smp_start;
+		highp float row = float(channel * 4);
+		highp float event_idx = 0.0;
+		highp int smp_start;
 		for (int i = 0; i < NUM_CHANNEL_NOTE_PROBES; i++) {
-			float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1));
+			highp float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1));
 			smp_start = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx + step_size, row))));
 			event_idx += (smp >= smp_start) ? step_size : 0.0;
 		}
 		smp_start = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row))));
-		int smp_end = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row+1.0))));
+		highp int smp_end = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row+1.0))));
 
-		vec4 note_event_supplement = get_midi_texel(tex, event_idx, row+2.0);  // left as [0.0, 1.0]
-		float instrument_idx = trunc(note_event_supplement.x * 255.0);
-		float pitch_idx = note_event_supplement.y * 255.0;
-		float velocity = note_event_supplement.z;
-		float pan = note_event_supplement.w;
-		vec4 adsr = get_midi_texel(tex, event_idx, row+3.0);  // left as [0.0, 1.0]
+		highp vec4 note_event_supplement = get_midi_texel(tex, event_idx, row+2.0);  // left as [0.0, 1.0]
+		highp float instrument_idx = trunc(note_event_supplement.x * 255.0);
+		highp float pitch_idx = note_event_supplement.y * 255.0;
+		highp float velocity = note_event_supplement.z;
+		highp float pan = note_event_supplement.w;
+		highp vec4 adsr = get_midi_texel(tex, event_idx, row+3.0);  // left as [0.0, 1.0]
 		// ====================At some point I'll look back into packing floats====================
 		// TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain
 		// ====================At some point I'll look back into packing floats====================
-		float attack = 1.0 + adsr.x*255.0;  //65535.0 + 1.0;  // TODO: work out effective resolution for this
-		int smp_attack = int(attack) * 2;  // Max value is 131072 samples = 4.096 seconds
+		highp float attack = 1.0 + adsr.x*255.0;  //65535.0 + 1.0;  // TODO: work out effective resolution for this
+		highp int smp_attack = int(attack) * 2;  // Max value is 131072 samples = 4.096 seconds
 
 		// For now, just branch this
 		if (smp_start < smp) {  // First sample may not start at zero!
-			int smp_overrun = smp - smp_end;  // 256 samples of linear decay to 0 after note_off
+			highp int smp_overrun = smp - smp_end;  // 256 samples of linear decay to 0 after note_off
 			smp_overrun = (smp_overrun < 0) ? 0 : smp_overrun;
 			if (smp_overrun < 256) {
-				float t_start = float(smp_start)/output_mixrate;
-				float attack_factor = min(float(smp - smp_start)/float(smp_attack), 1.0);
-				float release_factor = float(255-smp_overrun)/255.0;  // 256 samples of linear decay to 0 after note_off
-				float samp = get_instrument_sample(instrument_idx, pitch_idx, t-t_start);
+				highp float t_start = float(smp_start)/output_mixrate;
+				highp float attack_factor = min(float(smp - smp_start)/float(smp_attack), 1.0);
+				highp float release_factor = float(255-smp_overrun)/255.0;  // 256 samples of linear decay to 0 after note_off
+				highp float samp = get_instrument_sample(instrument_idx, pitch_idx, t-t_start);
 				samp *= velocity * attack_factor * release_factor;
 				// TODO: proper decay and sustain, revisit release
 				downmixed_stereo += samp * vec2(pan, 1.0-pan) * 0.5;  // TODO: double it to maintain the mono level on each channel at center=0.5?
@@ -219,9 +219,9 @@ vec4 render_song(sampler2D tex, int smp) {
 
 void fragment() {
 	// GLES2
-	vec2 uv = vec2(UV.x, 1.0-UV.y);
+	highp vec2 uv = vec2(UV.x, 1.0-UV.y);
 	// uv = (trunc(uv*OUTPUT_FRAMEBUFFER_SIZE)+0.5)/OUTPUT_FRAMEBUFFER_SIZE;
 	// COLOR.xyzw = test_writeback(TEXTURE, uv);
-	ivec2 xy = ivec2(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE));
+	highp ivec2 xy = ivec2(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE));
 	COLOR.xyzw = render_song(TEXTURE, xy.x + (xy.y*INT_OUTPUT_WIDTH));
 }