From e077c4e0365aac33d7d0a0cbae87144f3ebb9a97 Mon Sep 17 00:00:00 2001
From: Luke Hubmayer-Werner <mongoose41@gmail.com>
Date: Fri, 12 Jul 2024 01:20:15 +0930
Subject: [PATCH] [WIP] Audio shader... converting stuff to packed ints because
 floats are hitting gles2 precision limits

---
 shaders/audio_renderer.gdshader | 76 +++++++++++++++++++--------------
 test/audio_renderer.gd          |  4 +-
 test/audio_system.gd            | 38 +++++------------
 3 files changed, 57 insertions(+), 61 deletions(-)

diff --git a/shaders/audio_renderer.gdshader b/shaders/audio_renderer.gdshader
index ba349d0..02eeef2 100644
--- a/shaders/audio_renderer.gdshader
+++ b/shaders/audio_renderer.gdshader
@@ -3,6 +3,7 @@
 // Unfortunately, this loses type-checking on [0.0, 1.0] vs [0,255] etc. so a lot of this will involve comments declaring ranges.
 shader_type canvas_item;
 render_mode blend_premul_alpha;
+const int INT_TEX_SIZE = 4096;
 const float TEX_SIZE = 4096.0;
 const float UV_QUANTIZE = TEX_SIZE;
 // I feel like these magic numbers are a bit more intuitive in hex
@@ -13,15 +14,31 @@ const float x8000  = float(0x8000);   // 32768.0
 const float xFF00  = float(0xFF00);   // 65280.0
 const float xFFFF  = float(0xFFFF);   // 65535.0
 const float x10000 = float(0x10000);  // 65536.0
+const float x00FF0000  = float(0x00FF0000);
+const float xFF000000  = float(0xFF000000);
 
 const vec2 INT16_DOT_BE = vec2(xFF00, x00FF);
 const vec2 INT16_DOT_LE = vec2(x00FF, xFF00);
+const vec4 INT32_DOT_LE = vec4(x00FF, xFF00, x00FF0000, xFF000000);
 
 float unpack_uint16(vec2 uint16) {
 	// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [0, 65535] in float32
 	return dot(uint16, INT16_DOT_LE);
 }
 
+float unpack_uint32_to_float(vec4 uint32) {
+	// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value [0, 0xFFFFFFFF] in float32
+	// NOTE: THIS WILL LOSE PRECISION ON NUMBERS ABOVE 24BIT SIGNIFICANCE
+	// I CAN'T EVEN GUARANTEE THE 0xFF000000 CONSTANT WILL SURVIVE ROUNDING
+	return dot(uint32, INT32_DOT_LE);
+}
+
+int unpack_int32(vec4 int32) {
+	// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value
+	// return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) << 16);
+	return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) * 0x10000);
+}
+
 float unpack_int16(vec2 int16) {
 	// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [-32768, 32767] in float32
 	float unsigned = dot(int16, INT16_DOT_LE);
@@ -126,12 +143,8 @@ float get_instrument_sample(float instrument_index, float pitch_scale, float t,
 const int NUM_CHANNELS = 8;
 const int MAX_CHANNEL_NOTE_EVENTS = 2048;
 const int NUM_CHANNEL_NOTE_PROBES = 11;  // log2(MAX_CHANNEL_NOTE_EVENTS)
-uniform sampler2D midi_events;
+uniform sampler2D midi_events : hint_normal;
 uniform vec2 midi_events_size = vec2(2048.0, 16.0);
-// SDR rendering only gives us [0.0, 1.0] from the sampler2D so we need to rescale it.
-uniform float sdr_scale = 128.0; //1024.0;
-// uniform float t_scale = 524.0;  // Change this if we need longer than 8min44sec.
-// ^ Other things will also need changing, since 4096x4096 = 8MSamples is barely over 524 seconds at 32kHz.
 vec4 get_midi_texel(float x, float y) {
 	return texture(midi_events, vec2(x, y)/midi_events_size).xyzw;
 }
@@ -142,46 +155,40 @@ vec2 unpack_float(float f) {
 	float y = fract(a) * 1024.0 / 1023.0;
 	return vec2(x, y);
 }
-vec4 render_song(float sample_progress) {
-	// Each texel rendered is a stereo S16LE frame representing 1/32000 of a second
-	// BGM sequences should be relatively small so it should be fine to use RGBAF (4x f32s per texel) as our data texture
+vec4 render_song(int smp) {
+	// Each output texel rendered is a stereo S16LE frame representing 1/32000 of a second
 	// 2048 is an established safe texture dimension so may as well go 2048 wide
-	float t = sample_progress/output_mixrate;
+
+	float t = float(smp)/output_mixrate;
 	vec2 downmixed_stereo = vec2(0.0);
 
 	// Binary search the channels
 	for (int channel = 0; channel < 1; channel++) {
 	// for (int channel = 0; channel < NUM_CHANNELS; channel++) {
-		float row = float(channel * 2);
+		float row = float(channel * 4);
 		float event_idx = 0.0;
+		int smp_start;
 		for (int i = 0; i < NUM_CHANNEL_NOTE_PROBES; i++) {
 			float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1));
-			vec4 note_event = get_midi_texel(event_idx + step_size, row) * sdr_scale;
-			float t_start = note_event.x;
-			event_idx += (t >= t_start) ? step_size : 0.0;
+			smp_start = int(unpack_int32(get_midi_texel(event_idx + step_size, row)));
+			event_idx += (smp >= smp_start) ? step_size : 0.0;
 		}
-		vec4 note_event = get_midi_texel(event_idx, row) * sdr_scale;  // scaled to [0.0, 1024.0]
-		vec4 note_event_supplement = get_midi_texel(event_idx, row+1.0);  // left as [0.0, 1.0]
-		float t_start = note_event.x;
-		float t_end = note_event.y;
+		smp_start = int(unpack_int32(get_midi_texel(event_idx, row)));
+		int smp_end = int(unpack_int32(get_midi_texel(event_idx, row+1.0)));
+		vec4 note_event_supplement = get_midi_texel(event_idx, row+2.0);  // left as [0.0, 1.0]
+		float instrument_idx = note_event_supplement.x * 255.0;
+		float pitch_idx = note_event_supplement.y * 255.0;
+		float velocity = note_event_supplement.z;
+		float pan = note_event_supplement.w;
+		vec4 adsr = get_midi_texel(event_idx, row+3.0);  // left as [0.0, 1.0]
 		// ====================At some point I'll look back into packing floats====================
-		// vec2 instrument_and_pitch = unpack_float(note_event.z);
-		// float instrument_idx = instrument_and_pitch.x * 1023.0;
-		// float pitch_idx = instrument_and_pitch.y * 1023.0;  // TODO: Maybe rescale this for fine tuning? Don't use it raw because 2^(127-71) is MASSIVE, keep the power-of-2 calcs in shader.
-		// vec2 velocity_and_pan = unpack_float(note_event.w);  // Can leave these as [0.0, 1.0] and then mix appropriately
-		// float velocity = velocity_and_pan.x;
-		// float pan = velocity_and_pan.y;
-		// vec2 attack_and_decay = unpack_float(note_event_supplement.x);
-		// vec2 sustain_and_release = unpack_float(note_event_supplement.y);
 		// TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain
 		// ====================At some point I'll look back into packing floats====================
-		float instrument_idx = note_event.z;
-		float pitch_idx = note_event.w;
-		float velocity = note_event_supplement.x;
-		float pan = note_event_supplement.y;
 
 		// For now, just branch this
-		if (t < t_end) {
+		if (smp < smp_end) {
+			float t_start = float(smp_start)/output_mixrate;
+			float t_end = float(smp_end)/output_mixrate;
 			float samp = get_instrument_sample(instrument_idx, get_pitch_scale(pitch_idx), t-t_start, t_end-t_start);
 			samp *= velocity;
 			// TODO: do some ADSR here?
@@ -190,7 +197,11 @@ vec4 render_song(float sample_progress) {
 	}
 	// Convert the stereo float audio to S16LE
 	// return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(downmixed_stereo.y));
-	return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(mod(t, 2.0) - 1.0));
+	// return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(mod(t, 2.0) - 1.0));
+	vec2 isuv = vec2(mod(float(smp), instrument_samples_size.x), trunc(float(smp)/instrument_samples_size.x))/instrument_samples_size;
+	// float ins = rescale_int16(unpack_int16(texture(instrument_samples, isuv).xw));
+	// return vec4(pack_float_to_int16(ins), pack_float_to_int16(mod(t, 2.0) - 1.0));
+	return vec4(texture(instrument_samples, isuv).xw, pack_float_to_int16(mod(t, 2.0) - 1.0));
 	// return vec4(pack_float_to_int16((t/10.0) - 1.0), pack_float_to_int16(mod(t, 2.0) - 1.0));
 }
 
@@ -199,7 +210,8 @@ void fragment() {
 	vec2 uv = vec2(UV.x, 1.0-UV.y);
 	// uv = (trunc(uv*UV_QUANTIZE)+0.5)/UV_QUANTIZE;
 	// COLOR.xyzw = test_writeback(TEXTURE, uv);
-	COLOR.xyzw = render_song(dot(trunc(uv*TEX_SIZE), vec2(1.0, TEX_SIZE)));
+	ivec2 xy = ivec2(trunc(uv*TEX_SIZE));
+	COLOR.xyzw = render_song(xy.x + (xy.y*INT_TEX_SIZE));
 }
 
 // const int MAX_TEMPO_EVENTS = 256;
diff --git a/test/audio_renderer.gd b/test/audio_renderer.gd
index 603929a..713a49c 100644
--- a/test/audio_renderer.gd
+++ b/test/audio_renderer.gd
@@ -1,8 +1,8 @@
 extends Control
 
 const INPUT_TEX_WIDTH := 2048
-const INPUT_FORMAT := Image.FORMAT_RGBAF  # Image.FORMAT_LA8
-const INPUT_BYTES_PER_TEXEL := 16         # 2
+const INPUT_FORMAT := Image.FORMAT_RGBA8  # Image.FORMAT_LA8
+const INPUT_BYTES_PER_TEXEL := 4         # 2
 const OUTPUT_WIDTH := 4096
 var viewport: Viewport
 var render_queue: Array  # of Images
diff --git a/test/audio_system.gd b/test/audio_system.gd
index 9c0d289..3d6b063 100644
--- a/test/audio_system.gd
+++ b/test/audio_system.gd
@@ -135,35 +135,19 @@ func test_rendering() -> void:
 
 	var midi_events_bytes := StreamPeerBuffer.new()
 	var midi_events_bytes2 := StreamPeerBuffer.new()
-	var divisor = 128.0 #1024.0  # See sdr_scale in audio_renderer.gdshader
+	var midi_events_bytes3 := StreamPeerBuffer.new()
+	var midi_events_bytes4 := StreamPeerBuffer.new()
 	for i in 2048:
 		var t = i * 10.0
-		midi_events_bytes.put_float(t/divisor)  # t_start
-		midi_events_bytes.put_float((t+3.0)/divisor)  # t_end
-		# Try repacking these later
-		midi_events_bytes.put_float((i%35)/divisor)  # instrument
-		midi_events_bytes.put_float(71/divisor)  # pitch_idx
-		# midi_events_bytes.put_float((35 + (i%40))/divisor)  # pitch_idx
-		midi_events_bytes2.put_float(1.0)  # velocity
-		midi_events_bytes2.put_float((i%101)/100.0)  # pan
-		midi_events_bytes2.put_float(0.0)  # TBD
-		midi_events_bytes2.put_float(0.0)  # TBD
-		# midi_events_bytes.put_float(((i%35) + 71/1024.0)/1023.0)  # instrument_and_pitch
-		# midi_events_bytes.put_float((1.0 + (i*4)/1024.0)/1023.0)  # velocity_and_pan
-		# midi_events_bytes2.put_float(0.0)  # attack_and_decay
-		# midi_events_bytes2.put_float(0.0)  # sustain_and_relase
-		# midi_events_bytes2.put_float(0.0)  # TBD
-		# midi_events_bytes2.put_float(0.0)  # TBD
-	# for i in 2048-256:
-	# 	midi_events_bytes.put_float(0.0)
-	# 	midi_events_bytes.put_float(0.0)
-	# 	midi_events_bytes.put_float(0.0)
-	# 	midi_events_bytes.put_float(0.0)
-	# 	midi_events_bytes2.put_float(0.0)
-	# 	midi_events_bytes2.put_float(0.0)
-	# 	midi_events_bytes2.put_float(0.0)
-	# 	midi_events_bytes2.put_float(0.0)
-	var channel_data = midi_events_bytes.data_array + midi_events_bytes2.data_array
+		midi_events_bytes.put_32(t*32000)  # t_start
+		midi_events_bytes2.put_32((t+3.0)*32000)  # t_end
+		midi_events_bytes3.put_u8((i%35))  # instrument
+		midi_events_bytes3.put_u8(71)  # pitch_idx
+		# midi_events_bytes.put_float((35 + (i%40)))  # pitch_idx
+		midi_events_bytes3.put_u8(255)  # velocity
+		midi_events_bytes3.put_u8(i%256)  # pan
+		midi_events_bytes4.put_32(0)  # ADSR
+	var channel_data = midi_events_bytes.data_array + midi_events_bytes2.data_array + midi_events_bytes3.data_array + midi_events_bytes4.data_array
 	audio_renderer.push_bytes(channel_data)  # + channel_data + channel_data + channel_data + channel_data + channel_data + channel_data + channel_data)
 
 # 	var test_payload := PoolByteArray()