ChocolateBird/shaders/audio_renderer.gdshader

// ============================================================= BOILERPLATE =============================================================
// While most of the data we are working with is integral, GPU conversion overheads mean almost all of this will be floats.
// Unfortunately, this loses type-checking on [0.0, 1.0] vs [0,255] etc. so a lot of this will involve comments declaring ranges.
shader_type canvas_item;
render_mode blend_premul_alpha;
uniform highp sampler2D instrument_samples;
uniform highp vec2 instrument_samples_size = vec2(2048.0, 128.0);
uniform highp int INT_OUTPUT_WIDTH = 4096;
uniform highp vec2 OUTPUT_FRAMEBUFFER_SIZE = vec2(4096.0, 4096.0);
uniform highp float reference_note = 71.0;  // [0, 255], possibly [0, 127]
uniform highp float output_mixrate = 32000.0;  // SNES SPC output is 32kHz
uniform highp vec2 midi_events_size = vec2(2048.0, 32.0);
uniform highp int tempo_scale_thousandths = 1000;
const highp int TEMPO_SCALE_MULTIPLIER = 1000;
// I feel like these magic numbers are a bit more intuitive in hex
const highp float x00FF  = float(0x00FF);   //   255.0
const highp float x0100  = float(0x0100);   //   256.0
const highp float x7FFF  = float(0x7FFF);   // 32767.0
const highp float x8000  = float(0x8000);   // 32768.0
const highp float xFF00  = float(0xFF00);   // 65280.0
const highp float xFFFF  = float(0xFFFF);   // 65535.0
const highp float x10000 = float(0x10000);  // 65536.0
const highp float x00FF0000  = float(0x00FF0000);
const highp float xFF000000  = float(0xFF000000);

const highp vec2 INT16_DOT_BE = vec2(xFF00, x00FF);
const highp vec2 INT16_DOT_LE = vec2(x00FF, xFF00);
const highp vec4 INT32_DOT_LE = vec4(x00FF, xFF00, x00FF0000, xFF000000);

highp float unpack_uint16(highp vec2 uint16) {
	// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [0, 65535] in float32
	return dot(uint16, INT16_DOT_LE);
}

highp float unpack_uint32_to_float(highp vec4 uint32) {
	// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value [0, 0xFFFFFFFF] in float32
	// NOTE: THIS WILL LOSE PRECISION ON NUMBERS ABOVE 24BIT SIGNIFICANCE
	// I CAN'T EVEN GUARANTEE THE 0xFF000000 CONSTANT WILL SURVIVE ROUNDING
	return dot(uint32, INT32_DOT_LE);
}

highp int unpack_int32(highp vec4 int32) {
	// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value
	// return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) << 16);
	return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) * 0x10000);
}

highp float unpack_int16(highp vec2 int16) {
	// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [-32768, 32767] in float32
	highp float unsigned = dot(int16, INT16_DOT_LE);
	return unsigned - (unsigned < x7FFF ? 0.0 : x10000);
}

highp float rescale_int16(highp float int16) {
	// Rescale from [-32768, 32767] to [-1.0, 1.0)
	return int16 / x8000;
}

highp vec2 pack_float_to_int16(highp float value) {
	// Convert a float in range [-1.0, 1.0) to a signed 2byte integer [-32768, 32767] packed into two [0.0, 1.0] floats
	highp float scaled = value * x8000;
	highp float unsigned = scaled + (scaled < 0.0 ? x10000 : 0.0);
	highp float unsigned_div_256 = unsigned / x0100;
	highp float MSB = trunc(unsigned_div_256)         / x00FF;
	highp float LSB = fract(unsigned_div_256) * x0100 / x00FF;
	return vec2(LSB, MSB);
}

// vec4 test_writeback(sampler2D tex, vec2 uv) {
// 	// Test importing and exporting the samples,
// 	// and exporting a value derived from the UV
// 	vec4 output;
// 	float sample_1 = rescale_int16(unpack_int16(texture(tex, uv).xw));
// 	float sample_2 = rescale_int16(dot(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE), vec2(1.0, OUTPUT_FRAMEBUFFER_SIZE)));
// 	output.xy = pack_float_to_int16(sample_1);
// 	output.zw = pack_float_to_int16(sample_2);
// 	return output;
// }


// =============================================================    LOGIC    =============================================================
// We have around 200k frames across 35 instrument samples
// 35 instrument samples and 8 sfx samples = 43 samples
// 2048x128 texture maybe? at 2bytes per texel, that's 512KiB of VRAM
// We start the texture with a bunch of same-size headers
//     int32  smp_start  // The true start, after the prepended frames of silence
//     uint16 loop_begin    // padded past the true loop point for filtering
//     uint16 loop_length
//     uint16 mixrate
//
// To accomodate filtering, every sample must begin with 3 frames of silence, and end with 6 frames of the beginning of the loop.
// Looped playback will go from the first 3 of 6 frames at the end, to the third frame after the loop start point, to avoid filter bleeding.
// If a sample does not loop, it must have 6 frames of silence at the end, not including the subsequent next sample's 3 frames of silence prefix.
// As such, every sample will have an additional 9 frames, 3 before, 6 after.
// Additionally, every row of the texture must have 3 redundant frames on either side - i.e., we only sample from [3, 2045) on any given row.
// So the payload of a 2048-wide texture will be 2042 per row, excluding the initial header.
// So for 43 samples, a header of 43*6 = 258 texels starts the first row,
//   after which the first sample's 3 frames of silence (3 texels of (0.0, 0.0), 6 bytes of 0x00) may begin.
// A 2048x128 texture would have a payload of 2042x128 = 261376 frames (texels) excluding header
// With the 258 texel header, which uses 3 texels of margin, 255 would be subtracted from the above payload,
//   leaving 261121 texels for the sample data.

const highp float HEADER_LENGTH_TEXELS = 5.0;
const highp int INSTRUMENT_SAMPLES_WIDTH = 2048;
highp float sinc(highp float x) {
	x = abs(x) + 0.00000000000001;  // Avoid division by zero
	return min(sin(x)/x, 1.0);
}

highp float get_pitch_scale(highp float note) {
	return exp2((note - reference_note)/12.0);
}

highp vec2 get_inst_texel(highp vec2 xy) {
	return texture(instrument_samples, (xy+0.5)/instrument_samples_size).xw;
}

highp float get_inst_texel_int16(highp int smp) {
	highp int x = smp % INSTRUMENT_SAMPLES_WIDTH;
	highp int y = smp / INSTRUMENT_SAMPLES_WIDTH;
	return unpack_int16(texture(instrument_samples, (vec2(float(x), float(y)) + 0.5)/instrument_samples_size).xw);
}

highp float get_instrument_sample(highp float instrument_index, highp float note, highp float t) {
	highp float header_offset = instrument_index * HEADER_LENGTH_TEXELS;
	highp int smp_start = unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0))));  // The true start, after the prepended frames of silence
	highp float smp_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0)));  // padded past the true loop point for filter
	highp float smp_loop_length = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0)));
	highp float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 4.0, 0.0)));
	// Calculate the point we want to sample in linear space
	highp float mixrate = sample_mixrate * get_pitch_scale(note);
	highp float smp_t = t * mixrate;
	// If we're past the end of the sample, we need to wrap it back to within the loop range
	highp float overshoot = max(smp_t - smp_loop_begin, 0.0);
	smp_t -= floor(overshoot/smp_loop_length) * smp_loop_length;
	// if (smp_t > smp_loop_begin) {
	// 	// return 0.0;
	// 	smp_t = mod(smp_t - smp_loop_begin, smp_loop_length) + smp_loop_begin;
	// }

	highp int smp_window_start = smp_start + int(smp_t) - 6;
	highp float smp_rel_filter_target = fract(smp_t) + 6.0;
	highp float output = 0.0;
	for (int i = 0; i < 12; i++) {
		highp int smp_filter = smp_window_start + i;
		highp float s = get_inst_texel_int16(smp_filter);
		// TODO: determine proper value for this. Might be based on instrument base mixrate.
		output += s * sinc((smp_rel_filter_target - float(i)) * 3.1);
	}
	return rescale_int16(output);
	// int target_texel = int(smp_t) + smp_start;
	// return rescale_int16(get_inst_texel_int16(target_texel));
}

const int NUM_CHANNELS = 8;
const highp int MAX_CHANNEL_NOTE_EVENTS = 2048;
const int NUM_CHANNEL_NOTE_PROBES = 11;  // log2(MAX_CHANNEL_NOTE_EVENTS)
highp vec4 get_midi_texel(highp sampler2D tex, highp float x, highp float y) {
	return texture(tex, vec2(x, y)/midi_events_size).xyzw;
}
highp int retime_smp(highp int smp) {
	// Overflow safety is important as our input values can go up to 2^24, and we multiply by around 2^10
	highp int factor = smp / tempo_scale_thousandths;
	highp int residue = smp % tempo_scale_thousandths;
	highp int a = (residue * TEMPO_SCALE_MULTIPLIER) / tempo_scale_thousandths;
	highp int b = factor * TEMPO_SCALE_MULTIPLIER;
	return a + b;
}
highp vec4 render_song(highp sampler2D tex, highp int smp) {
	// Each output texel rendered is a stereo S16LE frame representing 1/32000 of a second
	// 2048 is an established safe texture dimension so may as well go 2048 wide

	highp float t = float(smp)/output_mixrate;
	highp vec2 downmixed_stereo = vec2(0.0);

	// Binary search the channels
	for (int channel = 0; channel < NUM_CHANNELS; channel++) {
		highp float row = float(channel * 4);
		highp float event_idx = 0.0;
		highp int smp_start;
		for (int i = 0; i < NUM_CHANNEL_NOTE_PROBES; i++) {
			highp float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1));
			smp_start = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx + step_size, row))));
			event_idx += (smp >= smp_start) ? step_size : 0.0;
		}
		smp_start = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row))));
		highp int smp_end = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row+1.0))));

		highp vec4 note_event_supplement = get_midi_texel(tex, event_idx, row+2.0);  // left as [0.0, 1.0]
		highp float instrument_idx = trunc(note_event_supplement.x * 255.0);
		highp float pitch_idx = note_event_supplement.y * 255.0;
		highp float velocity = note_event_supplement.z;
		highp float pan = note_event_supplement.w;
		highp vec4 adsr = get_midi_texel(tex, event_idx, row+3.0);  // left as [0.0, 1.0]
		// ====================At some point I'll look back into packing floats====================
		// TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain
		// ====================At some point I'll look back into packing floats====================
		highp float attack = 1.0 + adsr.x*255.0;  //65535.0 + 1.0;  // TODO: work out effective resolution for this
		highp int smp_attack = int(attack) * 2;  // Max value is 131072 samples = 4.096 seconds

		// For now, just branch this
		if (smp_start < smp) {  // First sample may not start at zero!
			highp int smp_overrun = smp - smp_end;  // 256 samples of linear decay to 0 after note_off
			smp_overrun = (smp_overrun < 0) ? 0 : smp_overrun;
			if (smp_overrun < 256) {
				highp float t_start = float(smp_start)/output_mixrate;
				highp float attack_factor = min(float(smp - smp_start)/float(smp_attack), 1.0);
				highp float release_factor = float(255-smp_overrun)/255.0;  // 256 samples of linear decay to 0 after note_off
				highp float samp = get_instrument_sample(instrument_idx, pitch_idx, t-t_start);
				samp *= velocity * attack_factor * release_factor;
				// TODO: proper decay and sustain, revisit release
				downmixed_stereo += samp * vec2(pan, 1.0-pan) * 0.5;  // TODO: double it to maintain the mono level on each channel at center=0.5?
			}
		}
	}
	// Convert the stereo float audio to S16LE
	return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(downmixed_stereo.y));
}

void fragment() {
	// GLES2
	highp vec2 uv = vec2(UV.x, 1.0-UV.y);
	// uv = (trunc(uv*OUTPUT_FRAMEBUFFER_SIZE)+0.5)/OUTPUT_FRAMEBUFFER_SIZE;
	// COLOR.xyzw = test_writeback(TEXTURE, uv);
	highp ivec2 xy = ivec2(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE));
	COLOR.xyzw = render_song(TEXTURE, xy.x + (xy.y*INT_OUTPUT_WIDTH));
}
BGM render shader 2024-07-10 22:13:58 +09:30			`// ============================================================= BOILERPLATE =============================================================`
			`// While most of the data we are working with is integral, GPU conversion overheads mean almost all of this will be floats.`
			`// Unfortunately, this loses type-checking on [0.0, 1.0] vs [0,255] etc. so a lot of this will involve comments declaring ranges.`
Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30			`shader_type canvas_item;`
			`render_mode blend_premul_alpha;`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`uniform highp sampler2D instrument_samples;`
			`uniform highp vec2 instrument_samples_size = vec2(2048.0, 128.0);`
			`uniform highp int INT_OUTPUT_WIDTH = 4096;`
			`uniform highp vec2 OUTPUT_FRAMEBUFFER_SIZE = vec2(4096.0, 4096.0);`
			`uniform highp float reference_note = 71.0; // [0, 255], possibly [0, 127]`
			`uniform highp float output_mixrate = 32000.0; // SNES SPC output is 32kHz`
			`uniform highp vec2 midi_events_size = vec2(2048.0, 32.0);`
			`uniform highp int tempo_scale_thousandths = 1000;`
			`const highp int TEMPO_SCALE_MULTIPLIER = 1000;`
Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30			`// I feel like these magic numbers are a bit more intuitive in hex`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`const highp float x00FF = float(0x00FF); // 255.0`
			`const highp float x0100 = float(0x0100); // 256.0`
			`const highp float x7FFF = float(0x7FFF); // 32767.0`
			`const highp float x8000 = float(0x8000); // 32768.0`
			`const highp float xFF00 = float(0xFF00); // 65280.0`
			`const highp float xFFFF = float(0xFFFF); // 65535.0`
			`const highp float x10000 = float(0x10000); // 65536.0`
			`const highp float x00FF0000 = float(0x00FF0000);`
			`const highp float xFF000000 = float(0xFF000000);`

			`const highp vec2 INT16_DOT_BE = vec2(xFF00, x00FF);`
			`const highp vec2 INT16_DOT_LE = vec2(x00FF, xFF00);`
			`const highp vec4 INT32_DOT_LE = vec4(x00FF, xFF00, x00FF0000, xFF000000);`

			`highp float unpack_uint16(highp vec2 uint16) {`
BGM render shader 2024-07-10 22:13:58 +09:30			`// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [0, 65535] in float32`
			`return dot(uint16, INT16_DOT_LE);`
			`}`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float unpack_uint32_to_float(highp vec4 uint32) {`
BGM render shader 2024-07-10 22:13:58 +09:30			`// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value [0, 0xFFFFFFFF] in float32`
			`// NOTE: THIS WILL LOSE PRECISION ON NUMBERS ABOVE 24BIT SIGNIFICANCE`
			`// I CAN'T EVEN GUARANTEE THE 0xFF000000 CONSTANT WILL SURVIVE ROUNDING`
			`return dot(uint32, INT32_DOT_LE);`
			`}`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp int unpack_int32(highp vec4 int32) {`
BGM render shader 2024-07-10 22:13:58 +09:30			`// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value`
			`// return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) << 16);`
			`return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) * 0x10000);`
			`}`
Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float unpack_int16(highp vec2 int16) {`
BGM render shader 2024-07-10 22:13:58 +09:30			`// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [-32768, 32767] in float32`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float unsigned = dot(int16, INT16_DOT_LE);`
Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30			`return unsigned - (unsigned < x7FFF ? 0.0 : x10000);`
			`}`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float rescale_int16(highp float int16) {`
Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30			`// Rescale from [-32768, 32767] to [-1.0, 1.0)`
			`return int16 / x8000;`
			`}`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp vec2 pack_float_to_int16(highp float value) {`
Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30			`// Convert a float in range [-1.0, 1.0) to a signed 2byte integer [-32768, 32767] packed into two [0.0, 1.0] floats`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float scaled = value * x8000;`
			`highp float unsigned = scaled + (scaled < 0.0 ? x10000 : 0.0);`
			`highp float unsigned_div_256 = unsigned / x0100;`
			`highp float MSB = trunc(unsigned_div_256) / x00FF;`
			`highp float LSB = fract(unsigned_div_256) * x0100 / x00FF;`
Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30			`return vec2(LSB, MSB);`
			`}`

[BGM Shader] Batching improvements Make it easier to change framebuffer size 2024-07-15 00:45:10 +09:30			`// vec4 test_writeback(sampler2D tex, vec2 uv) {`
			`// // Test importing and exporting the samples,`
			`// // and exporting a value derived from the UV`
			`// vec4 output;`
			`// float sample_1 = rescale_int16(unpack_int16(texture(tex, uv).xw));`
[BGM Shader] Fix shader uniform names 2024-07-15 01:36:14 +09:30			`// float sample_2 = rescale_int16(dot(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE), vec2(1.0, OUTPUT_FRAMEBUFFER_SIZE)));`
[BGM Shader] Batching improvements Make it easier to change framebuffer size 2024-07-15 00:45:10 +09:30			`// output.xy = pack_float_to_int16(sample_1);`
			`// output.zw = pack_float_to_int16(sample_2);`
			`// return output;`
			`// }`
Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30
BGM render shader 2024-07-10 22:13:58 +09:30
			`// ============================================================= LOGIC =============================================================`
			`// We have around 200k frames across 35 instrument samples`
			`// 35 instrument samples and 8 sfx samples = 43 samples`
			`// 2048x128 texture maybe? at 2bytes per texel, that's 512KiB of VRAM`
			`// We start the texture with a bunch of same-size headers`
			`// int32 smp_start // The true start, after the prepended frames of silence`
			`// uint16 loop_begin // padded past the true loop point for filtering`
			`// uint16 loop_length`
			`// uint16 mixrate`
			`//`
			`// To accomodate filtering, every sample must begin with 3 frames of silence, and end with 6 frames of the beginning of the loop.`
			`// Looped playback will go from the first 3 of 6 frames at the end, to the third frame after the loop start point, to avoid filter bleeding.`
			`// If a sample does not loop, it must have 6 frames of silence at the end, not including the subsequent next sample's 3 frames of silence prefix.`
			`// As such, every sample will have an additional 9 frames, 3 before, 6 after.`
			`// Additionally, every row of the texture must have 3 redundant frames on either side - i.e., we only sample from [3, 2045) on any given row.`
			`// So the payload of a 2048-wide texture will be 2042 per row, excluding the initial header.`
			`// So for 43 samples, a header of 43*6 = 258 texels starts the first row,`
			`// after which the first sample's 3 frames of silence (3 texels of (0.0, 0.0), 6 bytes of 0x00) may begin.`
			`// A 2048x128 texture would have a payload of 2042x128 = 261376 frames (texels) excluding header`
			`// With the 258 texel header, which uses 3 texels of margin, 255 would be subtracted from the above payload,`
			`// leaving 261121 texels for the sample data.`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`const highp float HEADER_LENGTH_TEXELS = 5.0;`
			`const highp int INSTRUMENT_SAMPLES_WIDTH = 2048;`
			`highp float sinc(highp float x) {`
BGM render shader 2024-07-10 22:13:58 +09:30			`x = abs(x) + 0.00000000000001; // Avoid division by zero`
			`return min(sin(x)/x, 1.0);`
			`}`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float get_pitch_scale(highp float note) {`
BGM render shader 2024-07-10 22:13:58 +09:30			`return exp2((note - reference_note)/12.0);`
			`}`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp vec2 get_inst_texel(highp vec2 xy) {`
BGM render shader 2024-07-10 22:13:58 +09:30			`return texture(instrument_samples, (xy+0.5)/instrument_samples_size).xw;`
			`}`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float get_inst_texel_int16(highp int smp) {`
			`highp int x = smp % INSTRUMENT_SAMPLES_WIDTH;`
			`highp int y = smp / INSTRUMENT_SAMPLES_WIDTH;`
BGM render shader 2024-07-10 22:13:58 +09:30			`return unpack_int16(texture(instrument_samples, (vec2(float(x), float(y)) + 0.5)/instrument_samples_size).xw);`
			`}`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float get_instrument_sample(highp float instrument_index, highp float note, highp float t) {`
			`highp float header_offset = instrument_index * HEADER_LENGTH_TEXELS;`
			`highp int smp_start = unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0)))); // The true start, after the prepended frames of silence`
			`highp float smp_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0))); // padded past the true loop point for filter`
			`highp float smp_loop_length = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0)));`
			`highp float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 4.0, 0.0)));`
BGM render shader 2024-07-10 22:13:58 +09:30			`// Calculate the point we want to sample in linear space`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float mixrate = sample_mixrate * get_pitch_scale(note);`
			`highp float smp_t = t * mixrate;`
BGM render shader 2024-07-10 22:13:58 +09:30			`// If we're past the end of the sample, we need to wrap it back to within the loop range`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float overshoot = max(smp_t - smp_loop_begin, 0.0);`
BGM render shader 2024-07-10 22:13:58 +09:30			`smp_t -= floor(overshoot/smp_loop_length) * smp_loop_length;`
			`// if (smp_t > smp_loop_begin) {`
			`// // return 0.0;`
			`// smp_t = mod(smp_t - smp_loop_begin, smp_loop_length) + smp_loop_begin;`
			`// }`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp int smp_window_start = smp_start + int(smp_t) - 6;`
			`highp float smp_rel_filter_target = fract(smp_t) + 6.0;`
			`highp float output = 0.0;`
BGM render shader 2024-07-10 22:13:58 +09:30			`for (int i = 0; i < 12; i++) {`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp int smp_filter = smp_window_start + i;`
			`highp float s = get_inst_texel_int16(smp_filter);`
BGM render shader 2024-07-10 22:13:58 +09:30			`// TODO: determine proper value for this. Might be based on instrument base mixrate.`
			`output += s * sinc((smp_rel_filter_target - float(i)) * 3.1);`
			`}`
			`return rescale_int16(output);`
			`// int target_texel = int(smp_t) + smp_start;`
			`// return rescale_int16(get_inst_texel_int16(target_texel));`
			`}`

			`const int NUM_CHANNELS = 8;`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`const highp int MAX_CHANNEL_NOTE_EVENTS = 2048;`
BGM render shader 2024-07-10 22:13:58 +09:30			`const int NUM_CHANNEL_NOTE_PROBES = 11; // log2(MAX_CHANNEL_NOTE_EVENTS)`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp vec4 get_midi_texel(highp sampler2D tex, highp float x, highp float y) {`
[BGM Shader] Enable batching 2024-07-14 23:34:23 +09:30			`return texture(tex, vec2(x, y)/midi_events_size).xyzw;`
BGM render shader 2024-07-10 22:13:58 +09:30			`}`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp int retime_smp(highp int smp) {`
JAOT Audio rendering 2024-07-16 03:43:14 +09:30			`// Overflow safety is important as our input values can go up to 2^24, and we multiply by around 2^10`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp int factor = smp / tempo_scale_thousandths;`
			`highp int residue = smp % tempo_scale_thousandths;`
			`highp int a = (residue * TEMPO_SCALE_MULTIPLIER) / tempo_scale_thousandths;`
			`highp int b = factor * TEMPO_SCALE_MULTIPLIER;`
JAOT Audio rendering 2024-07-16 03:43:14 +09:30			`return a + b;`
			`}`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp vec4 render_song(highp sampler2D tex, highp int smp) {`
BGM render shader 2024-07-10 22:13:58 +09:30			`// Each output texel rendered is a stereo S16LE frame representing 1/32000 of a second`
			`// 2048 is an established safe texture dimension so may as well go 2048 wide`

[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float t = float(smp)/output_mixrate;`
			`highp vec2 downmixed_stereo = vec2(0.0);`
BGM render shader 2024-07-10 22:13:58 +09:30
			`// Binary search the channels`
			`for (int channel = 0; channel < NUM_CHANNELS; channel++) {`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float row = float(channel * 4);`
			`highp float event_idx = 0.0;`
			`highp int smp_start;`
BGM render shader 2024-07-10 22:13:58 +09:30			`for (int i = 0; i < NUM_CHANNEL_NOTE_PROBES; i++) {`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1));`
JAOT Audio rendering 2024-07-16 03:43:14 +09:30			`smp_start = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx + step_size, row))));`
BGM render shader 2024-07-10 22:13:58 +09:30			`event_idx += (smp >= smp_start) ? step_size : 0.0;`
			`}`
JAOT Audio rendering 2024-07-16 03:43:14 +09:30			`smp_start = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row))));`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp int smp_end = retime_smp(int(unpack_int32(get_midi_texel(tex, event_idx, row+1.0))));`

			`highp vec4 note_event_supplement = get_midi_texel(tex, event_idx, row+2.0); // left as [0.0, 1.0]`
			`highp float instrument_idx = trunc(note_event_supplement.x * 255.0);`
			`highp float pitch_idx = note_event_supplement.y * 255.0;`
			`highp float velocity = note_event_supplement.z;`
			`highp float pan = note_event_supplement.w;`
			`highp vec4 adsr = get_midi_texel(tex, event_idx, row+3.0); // left as [0.0, 1.0]`
BGM render shader 2024-07-10 22:13:58 +09:30			`// ====================At some point I'll look back into packing floats====================`
			`// TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain`
			`// ====================At some point I'll look back into packing floats====================`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float attack = 1.0 + adsr.x*255.0; //65535.0 + 1.0; // TODO: work out effective resolution for this`
			`highp int smp_attack = int(attack) * 2; // Max value is 131072 samples = 4.096 seconds`
BGM render shader 2024-07-10 22:13:58 +09:30
			`// For now, just branch this`
			`if (smp_start < smp) { // First sample may not start at zero!`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp int smp_overrun = smp - smp_end; // 256 samples of linear decay to 0 after note_off`
BGM render shader 2024-07-10 22:13:58 +09:30			`smp_overrun = (smp_overrun < 0) ? 0 : smp_overrun;`
			`if (smp_overrun < 256) {`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp float t_start = float(smp_start)/output_mixrate;`
			`highp float attack_factor = min(float(smp - smp_start)/float(smp_attack), 1.0);`
			`highp float release_factor = float(255-smp_overrun)/255.0; // 256 samples of linear decay to 0 after note_off`
			`highp float samp = get_instrument_sample(instrument_idx, pitch_idx, t-t_start);`
BGM render shader 2024-07-10 22:13:58 +09:30			`samp = velocity attack_factor * release_factor;`
			`// TODO: proper decay and sustain, revisit release`
Flip panning 2024-07-15 15:25:31 +09:30			`downmixed_stereo += samp * vec2(pan, 1.0-pan) * 0.5; // TODO: double it to maintain the mono level on each channel at center=0.5?`
BGM render shader 2024-07-10 22:13:58 +09:30			`}`
			`}`
			`}`
			`// Convert the stereo float audio to S16LE`
			`return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(downmixed_stereo.y));`
			`}`

Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30			`void fragment() {`
			`// GLES2`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp vec2 uv = vec2(UV.x, 1.0-UV.y);`
[BGM Shader] Fix shader uniform names 2024-07-15 01:36:14 +09:30			`// uv = (trunc(uv*OUTPUT_FRAMEBUFFER_SIZE)+0.5)/OUTPUT_FRAMEBUFFER_SIZE;`
BGM render shader 2024-07-10 22:13:58 +09:30			`// COLOR.xyzw = test_writeback(TEXTURE, uv);`
[BGM] slap highp everywhere to ward off destructive gpu driver "optimisations" 2024-07-16 20:26:18 +09:30			`highp ivec2 xy = ivec2(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE));`
[BGM Shader] Fix shader uniform names 2024-07-15 01:36:14 +09:30			`COLOR.xyzw = render_song(TEXTURE, xy.x + (xy.y*INT_OUTPUT_WIDTH));`
Some plumbing for upcoming fake compute shaders 2024-07-10 00:35:29 +09:30			`}`