304 lines
15 KiB
Plaintext
304 lines
15 KiB
Plaintext
// ============================================================= BOILERPLATE =============================================================
|
|
// While most of the data we are working with is integral, GPU conversion overheads mean almost all of this will be floats.
|
|
// Unfortunately, this loses type-checking on [0.0, 1.0] vs [0,255] etc. so a lot of this will involve comments declaring ranges.
|
|
shader_type canvas_item;
|
|
render_mode blend_premul_alpha;
|
|
uniform highp sampler2D instrument_samples;
|
|
uniform highp vec2 instrument_samples_size = vec2(2048.0, 128.0);
|
|
uniform highp int INT_OUTPUT_WIDTH = 4096;
|
|
uniform highp vec2 OUTPUT_FRAMEBUFFER_SIZE = vec2(4096.0, 4096.0);
|
|
uniform highp float reference_note = 71.0; // [0, 255], possibly [0, 127]
|
|
uniform highp float output_mixrate = 32000.0; // SNES SPC output is 32kHz
|
|
uniform highp vec2 midi_events_size = vec2(4096.0, 32.0);
|
|
uniform highp int tempo_scale_thousandths = 1000;
|
|
const highp int TEMPO_SCALE_MULTIPLIER = 1000;
|
|
// I feel like these magic numbers are a bit more intuitive in hex
|
|
const highp float x00FF = float(0x00FF); // 255.0
|
|
const highp float x0100 = float(0x0100); // 256.0
|
|
const highp float x7FFF = float(0x7FFF); // 32767.0
|
|
const highp float x8000 = float(0x8000); // 32768.0
|
|
const highp float xFF00 = float(0xFF00); // 65280.0
|
|
const highp float xFFFF = float(0xFFFF); // 65535.0
|
|
const highp float x10000 = float(0x10000); // 65536.0
|
|
const highp float x00FF0000 = float(0x00FF0000);
|
|
const highp float xFF000000 = float(0xFF000000);
|
|
|
|
const highp vec2 INT16_DOT_BE = vec2(xFF00, x00FF);
|
|
const highp vec2 INT16_DOT_LE = vec2(x00FF, xFF00);
|
|
const highp vec4 INT32_DOT_LE = vec4(x00FF, xFF00, x00FF0000, xFF000000);
|
|
|
|
highp float unpack_uint16(highp vec2 uint16) {
|
|
// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [0, 65535] in float32
|
|
return dot(uint16, INT16_DOT_LE);
|
|
}
|
|
|
|
highp float unpack_uint32_to_float(highp vec4 uint32) {
|
|
// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value [0, 0xFFFFFFFF] in float32
|
|
// NOTE: THIS WILL LOSE PRECISION ON NUMBERS ABOVE 24BIT SIGNIFICANCE
|
|
// I CAN'T EVEN GUARANTEE THE 0xFF000000 CONSTANT WILL SURVIVE ROUNDING
|
|
return dot(uint32, INT32_DOT_LE);
|
|
}
|
|
|
|
highp int unpack_int32(highp vec4 int32) {
|
|
// Convert packed 4byte integer, sampled as four [0.0, 1.0] range floats, to the original int value
|
|
// return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) << 16);
|
|
return int(unpack_uint16(int32.xy)) + (int(unpack_uint16(int32.zw)) * 0x10000);
|
|
}
|
|
|
|
highp float unpack_int16(highp vec2 int16) {
|
|
// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [-32768, 32767] in float32
|
|
highp float unsigned = dot(int16, INT16_DOT_LE);
|
|
return unsigned - (unsigned < x7FFF ? 0.0 : x10000);
|
|
}
|
|
|
|
highp float rescale_int16(highp float int16) {
|
|
// Rescale from [-32768, 32767] to [-1.0, 1.0)
|
|
return int16 / x8000;
|
|
}
|
|
|
|
highp vec2 pack_float_to_int16(highp float value) {
|
|
// Convert a float in range [-1.0, 1.0) to a signed 2byte integer [-32768, 32767] packed into two [0.0, 1.0] floats
|
|
highp float scaled = value * x8000;
|
|
highp float unsigned = scaled + (scaled < 0.0 ? x10000 : 0.0);
|
|
highp float unsigned_div_256 = unsigned / x0100;
|
|
highp float MSB = trunc(unsigned_div_256) / x00FF;
|
|
highp float LSB = fract(unsigned_div_256) * x0100 / x00FF;
|
|
return vec2(LSB, MSB);
|
|
}
|
|
|
|
// vec4 test_writeback(sampler2D tex, vec2 uv) {
|
|
// // Test importing and exporting the samples,
|
|
// // and exporting a value derived from the UV
|
|
// vec4 output;
|
|
// float sample_1 = rescale_int16(unpack_int16(texture(tex, uv).xw));
|
|
// float sample_2 = rescale_int16(dot(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE), vec2(1.0, OUTPUT_FRAMEBUFFER_SIZE)));
|
|
// output.xy = pack_float_to_int16(sample_1);
|
|
// output.zw = pack_float_to_int16(sample_2);
|
|
// return output;
|
|
// }
|
|
|
|
|
|
// ============================================================= LOGIC =============================================================
|
|
// We have around 200k frames across 35 instrument samples
|
|
// 35 instrument samples and 8 sfx samples = 43 samples
|
|
// 2048x128 texture maybe? at 2bytes per texel, that's 512KiB of VRAM
|
|
// We start the texture with a bunch of same-size headers
|
|
// int32 smp_start // The true start, after the prepended frames of silence
|
|
// uint16 loop_begin // padded past the true loop point for filtering
|
|
// uint16 loop_length
|
|
// uint16 mixrate
|
|
//
|
|
// To accomodate filtering, every sample must begin with 3 frames of silence, and end with 6 frames of the beginning of the loop.
|
|
// Looped playback will go from the first 3 of 6 frames at the end, to the third frame after the loop start point, to avoid filter bleeding.
|
|
// If a sample does not loop, it must have 6 frames of silence at the end, not including the subsequent next sample's 3 frames of silence prefix.
|
|
// As such, every sample will have an additional 9 frames, 3 before, 6 after.
|
|
// Additionally, every row of the texture must have 3 redundant frames on either side - i.e., we only sample from [3, 2045) on any given row.
|
|
// So the payload of a 2048-wide texture will be 2042 per row, excluding the initial header.
|
|
// So for 43 samples, a header of 43*6 = 258 texels starts the first row,
|
|
// after which the first sample's 3 frames of silence (3 texels of (0.0, 0.0), 6 bytes of 0x00) may begin.
|
|
// A 2048x128 texture would have a payload of 2042x128 = 261376 frames (texels) excluding header
|
|
// With the 258 texel header, which uses 3 texels of margin, 255 would be subtracted from the above payload,
|
|
// leaving 261121 texels for the sample data.
|
|
|
|
const highp float HEADER_LENGTH_TEXELS = 5.0;
|
|
const highp int INSTRUMENT_SAMPLES_WIDTH = 2048;
|
|
highp float sinc(highp float x) {
|
|
x = abs(x * 3.14159265359) + 0.00000000001; // Avoid division by zero
|
|
return min(sin(x)/x, 1.0);
|
|
}
|
|
|
|
highp float get_pitch_scale(highp float note) {
|
|
return exp2((note - reference_note)/12.0);
|
|
}
|
|
|
|
highp vec2 get_inst_texel(highp vec2 xy) {
|
|
return texture(instrument_samples, (xy+0.5)/instrument_samples_size).xw;
|
|
}
|
|
|
|
highp float get_inst_texel_int16(highp int smp) {
|
|
highp int x = smp % INSTRUMENT_SAMPLES_WIDTH;
|
|
highp int y = smp / INSTRUMENT_SAMPLES_WIDTH;
|
|
return unpack_int16(texture(instrument_samples, (vec2(float(x), float(y)) + 0.5)/instrument_samples_size).xw);
|
|
}
|
|
|
|
highp float get_instrument_sample(highp float instrument_index, highp float note, highp float t) {
|
|
highp float header_offset = instrument_index * HEADER_LENGTH_TEXELS;
|
|
highp int smp_start = unpack_int32(vec4(get_inst_texel(vec2(header_offset, 1.0)), get_inst_texel(vec2(header_offset + 1.0, 1.0)))); // The true start, after the prepended frames of silence
|
|
highp float smp_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 1.0))); // padded past the true loop point for filter
|
|
highp float smp_loop_length = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 1.0)));
|
|
highp float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 4.0, 1.0)));
|
|
// Calculate the point we want to sample in linear space
|
|
highp float mixrate = sample_mixrate * get_pitch_scale(note);
|
|
highp float smp_t = t * mixrate;
|
|
// If we're past the end of the sample, we need to wrap it back to within the loop range
|
|
highp float overshoot = max(smp_t - smp_loop_begin, 0.0);
|
|
smp_t -= floor(overshoot/smp_loop_length) * smp_loop_length;
|
|
|
|
// // Linear interpolation
|
|
// highp int smp_window_start = smp_start + int(smp_t);
|
|
// highp float x0 = get_inst_texel_int16(smp_window_start);
|
|
// highp float x1 = get_inst_texel_int16(smp_window_start+1);
|
|
// return rescale_int16(mix(x0, x1, fract(smp_t)));
|
|
|
|
// Cubic interpolation
|
|
highp int smp_window_start = smp_start + int(smp_t);
|
|
highp float x0 = get_inst_texel_int16(smp_window_start-1);
|
|
highp float x1 = get_inst_texel_int16(smp_window_start);
|
|
highp float x2 = get_inst_texel_int16(smp_window_start+1);
|
|
highp float x3 = get_inst_texel_int16(smp_window_start+2);
|
|
highp float a0 = 3.0*x1 - 3.0*x2 + x3 - x0;
|
|
highp float a1 = 2.0*x0 - 5.0*x1 + 4.0*x2 - x3;
|
|
highp float a2 = x2 - x0;
|
|
highp float a3 = 2.0*x1;
|
|
highp float T = fract(smp_t);
|
|
highp float T2 = T*T;
|
|
return rescale_int16((a0*T2*T + a1*T2 + a2*T + a3) / 2.0);
|
|
|
|
// // Windowed Sinc interpolation
|
|
// highp int smp_window_start = smp_start + int(smp_t) - 6;
|
|
// highp float smp_rel_filter_target = fract(smp_t) + 6.0;
|
|
// highp float output = 0.0;
|
|
// for (int i = 0; i < 12; i++) {
|
|
// highp int smp_filter = smp_window_start + i;
|
|
// highp float s = get_inst_texel_int16(smp_filter);
|
|
// // TODO: determine proper value for this. Might be based on instrument base mixrate.
|
|
// output += s * sinc(smp_rel_filter_target - float(i));
|
|
// }
|
|
// return rescale_int16(output);
|
|
|
|
// // Nearest sample
|
|
// int target_texel = int(smp_t) + smp_start;
|
|
// return rescale_int16(get_inst_texel_int16(target_texel));
|
|
}
|
|
|
|
// highp float get_exponential_decay_no_lookup(highp int periods) {
|
|
// highp ivec4 low_periods = ivec4(2047, 2015, 1946, 1835) - (periods * ivec4(8, 7, 6, 5));
|
|
// highp ivec4 high_periods = ivec4(1672, 1446, 1134, 695) - (periods * ivec4(4, 3, 2, 1));
|
|
// highp ivec4 max1 = max(low_periods, high_periods);
|
|
// highp ivec2 max2 = max(max1.xy, max1.zw);
|
|
// highp int env = max(max(max2.x, max2.y), 0);
|
|
// return highp float(env)/2047.0;
|
|
// }
|
|
|
|
const highp int HEADER_START_ATTACK_TIME = 0;
|
|
const highp int HEADER_START_PERIOD_TABLE = HEADER_START_ATTACK_TIME + 2*16;
|
|
const highp int HEADER_START_EXPONENTIAL_DECAY_CURVE = HEADER_START_PERIOD_TABLE + 1*32;
|
|
const highp float HEADER_LENGTH_EXPONENTIAL_DECAY_CURVE = 697.0;
|
|
highp int get_attack_time_smps(highp int idx) {
|
|
highp float idx2 = float(HEADER_START_ATTACK_TIME + (idx*2));
|
|
return unpack_int32(vec4(get_inst_texel(vec2(idx2, 0.0)), get_inst_texel(vec2(idx2 + 1.0, 0.0))));
|
|
}
|
|
highp int get_rate_period(highp int idx) {
|
|
highp float idx2 = float(HEADER_START_PERIOD_TABLE + idx);
|
|
return int(unpack_int16(get_inst_texel(vec2(idx2, 0.0))));
|
|
}
|
|
highp float get_exponential_decay(highp float periods) { // input in periods, output in [0.0, 1.0]
|
|
highp float idx = float(HEADER_START_EXPONENTIAL_DECAY_CURVE) + periods;
|
|
highp float idx2 = floor(idx);
|
|
highp float a = unpack_int16(get_inst_texel(vec2(idx2, 0.0))) / 2047.0;
|
|
highp float b = unpack_int16(get_inst_texel(vec2(idx2 + 1.0, 0.0))) / 2047.0;
|
|
return mix(a, b, fract(idx));
|
|
}
|
|
|
|
|
|
const int NUM_CHANNELS = 8;
|
|
const highp int MAX_CHANNEL_NOTE_EVENTS = 4096;
|
|
const int NUM_CHANNEL_NOTE_PROBES = 12; // log2(MAX_CHANNEL_NOTE_EVENTS)
|
|
highp vec4 get_midi_texel(highp sampler2D tex, highp float x, highp float y) {
|
|
return texture(tex, vec2(x, y)/midi_events_size).xyzw;
|
|
}
|
|
highp int retime_smp(highp int smp) {
|
|
// Overflow safety is important as our input values can go up to 2^24, and we multiply by around 2^10
|
|
highp int factor = smp / tempo_scale_thousandths;
|
|
highp int residue = smp % tempo_scale_thousandths;
|
|
highp int a = (residue * TEMPO_SCALE_MULTIPLIER) / tempo_scale_thousandths;
|
|
highp int b = factor * TEMPO_SCALE_MULTIPLIER;
|
|
return a + b;
|
|
}
|
|
highp vec4 render_song(highp sampler2D tex, highp int smp) {
|
|
// Each output texel rendered is a stereo S16LE frame representing 1/32000 of a second
|
|
// 2048 is an established safe texture dimension so may as well go 2048 wide
|
|
|
|
highp float t = float(smp)/output_mixrate;
|
|
highp vec2 downmixed_stereo = vec2(0.0);
|
|
|
|
// Binary search the channels
|
|
for (int channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
highp float row = float(channel * 5);
|
|
highp float event_idx = 0.0;
|
|
highp int smp_event_start;
|
|
for (int i = 0; i < NUM_CHANNEL_NOTE_PROBES; i++) {
|
|
highp float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1));
|
|
smp_event_start = retime_smp(unpack_int32(get_midi_texel(tex, event_idx + step_size, row)));
|
|
event_idx += (smp >= smp_event_start) ? step_size : 0.0;
|
|
}
|
|
highp vec4 tex0 = get_midi_texel(tex, event_idx, row);
|
|
highp vec4 tex1 = get_midi_texel(tex, event_idx, row+1.0);
|
|
highp vec4 tex2 = get_midi_texel(tex, event_idx, row+2.0);
|
|
highp vec4 tex3 = get_midi_texel(tex, event_idx, row+3.0);
|
|
highp vec4 tex4 = get_midi_texel(tex, event_idx, row+4.0);
|
|
highp vec4 next_tex1 = get_midi_texel(tex, event_idx+1.0, row+1.0);
|
|
smp_event_start = retime_smp(unpack_int32(tex0));
|
|
// highp int smp_event_end = retime_smp(unpack_int32(tex1));
|
|
highp int smp_note_start = retime_smp(unpack_int32(tex1));
|
|
highp int next_smp_note_start = retime_smp(unpack_int32(next_tex1));
|
|
|
|
// For now, just branch this
|
|
if (smp_note_start < smp) { // First sample may not start at zero!
|
|
highp int smp_release_overrun = (smp_note_start == next_smp_note_start) ? 0 : max(smp - next_smp_note_start + 256, 0); // 256 samples of linear decay to 0 before next non-tie event
|
|
if (smp_release_overrun < 256) {
|
|
highp float instrument_idx = trunc(tex2.x * 255.0);
|
|
highp float pitch_idx = tex2.y * 255.0;
|
|
highp float velocity = tex2.z;
|
|
highp float pan = tex2.w;
|
|
highp ivec4 adsr = ivec4(tex3 * 255.0);
|
|
// ====================At some point I'll look back into packing floats====================
|
|
// TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain
|
|
// ====================At some point I'll look back into packing floats====================
|
|
highp int smp_attack = get_attack_time_smps(adsr.x);
|
|
highp int adsr_decay_rate = get_rate_period(adsr.y*2 + 16);
|
|
highp int adsr_decay_total_periods = adsr.z;
|
|
highp int adsr_sustain_decay_rate = get_rate_period(adsr.w);
|
|
highp int smp_decay_end = smp_attack + (adsr_decay_rate * adsr_decay_total_periods);
|
|
|
|
highp int smp_progress = smp - smp_note_start;
|
|
highp float t_start = float(smp_note_start)/output_mixrate;
|
|
// Branchless attack - just clamp to [0.0, 1.0]
|
|
highp float attack_factor = clamp(float(smp_progress)/float(smp_attack), 0.0, 1.0);
|
|
// Two-stage decay - determine if we're in the first or second stage
|
|
highp int smp_decay_progress = max(smp_progress - smp_attack, 0);
|
|
highp float decay_periods = 0.0;
|
|
// For now, just branch it
|
|
if (smp_decay_progress < smp_decay_end) {
|
|
decay_periods = float(smp_decay_progress) / float(adsr_decay_rate);
|
|
} else {
|
|
decay_periods = float(adsr_decay_total_periods);
|
|
if (adsr_sustain_decay_rate > 0) {
|
|
decay_periods += (float(smp_decay_progress - smp_decay_end) / float(adsr_sustain_decay_rate));
|
|
}
|
|
}
|
|
decay_periods = min(decay_periods, HEADER_LENGTH_EXPONENTIAL_DECAY_CURVE); // Don't overshoot!
|
|
highp float decay_factor = clamp(get_exponential_decay(decay_periods), 0.0, 1.0);
|
|
// 256 samples of linear decay to 0 before next note
|
|
highp float release_factor = float(255-smp_release_overrun)/255.0;
|
|
|
|
highp float samp = get_instrument_sample(instrument_idx, pitch_idx, t-t_start);
|
|
samp *= velocity * attack_factor * decay_factor * release_factor;
|
|
// TODO: proper decay and sustain, revisit release
|
|
downmixed_stereo += samp * vec2(pan, 1.0-pan) * 0.5; // TODO: double it to maintain the mono level on each channel at center=0.5?
|
|
}
|
|
}
|
|
}
|
|
// Convert the stereo float audio to S16LE
|
|
return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(downmixed_stereo.y));
|
|
}
|
|
|
|
void fragment() {
|
|
// GLES2
|
|
highp vec2 uv = vec2(UV.x, 1.0-UV.y);
|
|
// uv = (trunc(uv*OUTPUT_FRAMEBUFFER_SIZE)+0.5)/OUTPUT_FRAMEBUFFER_SIZE;
|
|
// COLOR.xyzw = test_writeback(TEXTURE, uv);
|
|
highp ivec2 xy = ivec2(trunc(uv*OUTPUT_FRAMEBUFFER_SIZE));
|
|
COLOR.xyzw = render_song(TEXTURE, xy.x + (xy.y*INT_OUTPUT_WIDTH));
|
|
}
|