223 lines
12 KiB
Plaintext
223 lines
12 KiB
Plaintext
// ============================================================= BOILERPLATE =============================================================
|
|
// While most of the data we are working with is integral, GPU conversion overheads mean almost all of this will be floats.
|
|
// Unfortunately, this loses type-checking on [0.0, 1.0] vs [0,255] etc. so a lot of this will involve comments declaring ranges.
|
|
shader_type canvas_item;
|
|
render_mode blend_premul_alpha;
|
|
const float TEX_SIZE = 4096.0;
|
|
const float UV_QUANTIZE = TEX_SIZE;
|
|
// I feel like these magic numbers are a bit more intuitive in hex
|
|
const float x00FF = float(0x00FF); // 255.0
|
|
const float x0100 = float(0x0100); // 256.0
|
|
const float x7FFF = float(0x7FFF); // 32767.0
|
|
const float x8000 = float(0x8000); // 32768.0
|
|
const float xFF00 = float(0xFF00); // 65280.0
|
|
const float xFFFF = float(0xFFFF); // 65535.0
|
|
const float x10000 = float(0x10000); // 65536.0
|
|
|
|
const vec2 INT16_DOT_BE = vec2(xFF00, x00FF);
|
|
const vec2 INT16_DOT_LE = vec2(x00FF, xFF00);
|
|
|
|
uniform sampler2D tex : hint_normal;
|
|
|
|
float unpack_uint16(vec2 uint16) {
|
|
// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [0, 65535] in float32
|
|
return dot(uint16, INT16_DOT_LE);
|
|
}
|
|
|
|
float unpack_int16(vec2 int16) {
|
|
// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [-32768, 32767] in float32
|
|
float unsigned = dot(int16, INT16_DOT_LE);
|
|
return unsigned - (unsigned < x7FFF ? 0.0 : x10000);
|
|
}
|
|
|
|
float rescale_int16(float int16) {
|
|
// Rescale from [-32768, 32767] to [-1.0, 1.0)
|
|
return int16 / x8000;
|
|
}
|
|
|
|
vec2 pack_float_to_int16(float value) {
|
|
// Convert a float in range [-1.0, 1.0) to a signed 2byte integer [-32768, 32767] packed into two [0.0, 1.0] floats
|
|
float scaled = value * x8000;
|
|
float unsigned = scaled + (scaled < 0.0 ? x10000 : 0.0);
|
|
float unsigned_div_256 = unsigned / x0100;
|
|
float MSB = trunc(unsigned_div_256) / x00FF;
|
|
float LSB = fract(unsigned_div_256) * x0100 / x00FF;
|
|
return vec2(LSB, MSB);
|
|
}
|
|
|
|
vec4 test_writeback(vec2 uv) {
|
|
// Test importing and exporting the samples,
|
|
// and exporting a value derived from the UV
|
|
vec4 output;
|
|
float sample_1 = rescale_int16(unpack_int16(texture(tex, uv).xw));
|
|
float sample_2 = rescale_int16(dot(trunc(uv*TEX_SIZE), vec2(1.0, TEX_SIZE)));
|
|
output.xy = pack_float_to_int16(sample_1);
|
|
output.zw = pack_float_to_int16(sample_2);
|
|
return output;
|
|
}
|
|
|
|
|
|
// ============================================================= LOGIC =============================================================
|
|
// We have around 200k frames across 35 instrument samples
|
|
// 35 instrument samples and 8 sfx samples = 43 samples
|
|
// 2048x128 texture maybe? at 2bytes per texel, that's 512KiB of VRAM
|
|
// We start the texture with a bunch of same-size headers
|
|
// uint16 sample_start // The true start, after the prepended 3 frames of silence
|
|
// uint16 sample_length // 3 frames after the true end, because of how we loop
|
|
// uint16 sample_loop_begin // 3 frames after the true loop point
|
|
// uint16 mixrate
|
|
// 2*uint8 AD of ADSR ([0.0, 1.0] is fine)
|
|
// 2*uint8 SR of ADSR ([0.0, 1.0] is fine)
|
|
// So six texture() calls spent on header information, and one on the final lookup.
|
|
// Alternatively, sample length could be omitted and fetched as the start of the next entry to save redundant entries.
|
|
//
|
|
// To accomodate filtering, every sample must begin with 3 frames of silence, and end with 6 frames of the beginning of the loop.
|
|
// Looped playback will go from the first 3 of 6 frames at the end, to the third frame after the loop start point, to avoid filter bleeding.
|
|
// If a sample does not loop, it must have 6 frames of silence at the end, not including the subsequent next sample's 3 frames of silence prefix.
|
|
// As such, every sample will have an additional 9 frames, 3 before, 6 after.
|
|
// Additionally, every row of the texture must have 3 redundant frames on either side - i.e., we only sample from [3, 2045) on any given row.
|
|
// So the payload of a 2048-wide texture will be 2042 per row, excluding the initial header.
|
|
// So for 43 samples, a header of 43*6 = 258 texels starts the first row,
|
|
// after which the first sample's 3 frames of silence (3 texels of (0.0, 0.0), 6 bytes of 0x00) may begin.
|
|
// A 2048x128 texture would have a payload of 2042x128 = 261376 frames (texels) excluding header
|
|
// With the 258 texel header, which uses 3 texels of margin, 255 would be subtracted from the above payload,
|
|
// leaving 261121 texels for the sample data.
|
|
|
|
const float HEADER_LENGTH_TEXELS = 6.0;
|
|
uniform sampler2D instrument_samples;
|
|
uniform vec2 instrument_samples_size = vec2(2048.0, 128.0);
|
|
uniform float instrument_row_padding = 3.0; // In case we want to go to cubic filtering
|
|
uniform float instrument_row_payload = 2042.0; // 2048-3-3 Make sure to set with instrument_samples_size and instrument_row_padding!
|
|
uniform float reference_note = 71.0; // [0, 255], possibly [0, 127]
|
|
uniform float output_mixrate = 32000.0; // SNES SPC output is 32kHz
|
|
|
|
float get_pitch_scale(float note) {
|
|
// return pow(2.0, (note - reference_note)/12.0);
|
|
return exp2((note - reference_note)/12.0);
|
|
}
|
|
|
|
vec2 get_inst_texel(vec2 xy) {
|
|
return texture(instrument_samples, xy/instrument_samples_size).xw;
|
|
}
|
|
|
|
float get_instrument_sample(float instrument_index, float pitch_scale, float t, float t_end) {
|
|
// t_end is for ADSR purposes
|
|
float header_offset = instrument_index * HEADER_LENGTH_TEXELS;
|
|
float sample_start = unpack_uint16(get_inst_texel(vec2(header_offset, 0.0))); // The true start, after the prepended 3 frames of silence
|
|
float sample_length = unpack_uint16(get_inst_texel(vec2(header_offset + 1.0, 0.0))); // 3 frames after the true end, because of how we loop
|
|
float sample_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0))); // 3 frames after the true loop point
|
|
float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0)));
|
|
vec2 attack_decay = get_inst_texel(vec2(header_offset + 4.0, 0.0));
|
|
vec2 sustain_release = get_inst_texel(vec2(header_offset + 5.0, 0.0));
|
|
// Calculate the point we want to sample in linear space
|
|
float mixrate = sample_mixrate * pitch_scale;
|
|
float target_frame = t * mixrate;
|
|
// If we're past the end of the sample, we need to wrap it back to within the loop range
|
|
float loop_length = sample_length - sample_loop_begin;
|
|
float overshoot = max(target_frame - sample_length, 0.0);
|
|
float overshoot_loops = ceil(overshoot/loop_length);
|
|
target_frame -= overshoot_loops*loop_length;
|
|
// Now we need to identify the sampling point since our frames are spread across multiple rows for GPU reasons
|
|
// We only sample from texel 4 onwards on a given row - texel 0 is the header, texels 1,2,3 are lead-in for filtering
|
|
// Note that y should be integral, but x should be continuous, as that's what applies the filtering!
|
|
target_frame += sample_start;
|
|
vec2 sample_xy = vec2(instrument_row_padding + mod(target_frame, instrument_row_payload), trunc(target_frame/instrument_row_payload));
|
|
return rescale_int16(unpack_int16(get_inst_texel(sample_xy)));
|
|
}
|
|
|
|
const int NUM_CHANNELS = 8;
|
|
const int MAX_CHANNEL_NOTE_EVENTS = 2048;
|
|
const int NUM_CHANNEL_NOTE_PROBES = 11; // log2(MAX_CHANNEL_NOTE_EVENTS)
|
|
uniform sampler2D midi_events;
|
|
uniform vec2 midi_events_size = vec2(2048.0, 16.0);
|
|
// SDR rendering only gives us [0.0, 1.0] from the sampler2D so we need to rescale it.
|
|
uniform float t_scale = 524.0; // Change this if we need longer than 8min44sec.
|
|
// ^ Other things will also need changing, since 4096x4096 = 8MSamples is barely over 524 seconds at 32kHz.
|
|
vec4 get_midi_texel(float x, float y) {
|
|
return texture(midi_events, vec2(x, y)/midi_events_size).xyzw;
|
|
}
|
|
vec2 unpack_float(float f) {
|
|
// Unpack two 10bit values from a single channel (23bit mantissa)
|
|
float a = f * 1024.0;
|
|
float x = trunc(a) / 1023.0;
|
|
float y = fract(a) * 1024.0 / 1023.0;
|
|
return vec2(x, y);
|
|
}
|
|
vec4 render_song(float sample_progress) {
|
|
// Each texel rendered is a stereo S16LE frame representing 1/32000 of a second
|
|
// BGM sequences should be relatively small so it should be fine to use RGBAF (4x f32s per texel) as our data texture
|
|
// 2048 is an established safe texture dimension so may as well go 2048 wide
|
|
float t = sample_progress/output_mixrate;
|
|
vec2 downmixed_stereo = vec2(0.0);
|
|
|
|
// Binary search the channels
|
|
for (int channel = 0; channel < NUM_CHANNELS; channel++) {
|
|
float row = float(channel * 2);
|
|
float event_idx = 0.0;
|
|
for (int i = 0; i < NUM_CHANNEL_NOTE_PROBES; i++) {
|
|
float step_size = exp2(float(NUM_CHANNEL_NOTE_PROBES - i - 1));
|
|
vec4 note_event = get_midi_texel(event_idx + step_size, row);
|
|
float t_start = note_event.x;
|
|
event_idx += (t >= t_start) ? step_size : 0.0;
|
|
}
|
|
vec4 note_event = get_midi_texel(event_idx, row);
|
|
vec4 note_event_supplement = get_midi_texel(event_idx, row+1.0);
|
|
float t_start = note_event.x * t_scale;
|
|
float t_end = note_event.y * t_scale;
|
|
vec2 instrument_and_pitch = unpack_float(note_event.z);
|
|
float instrument_idx = instrument_and_pitch.x * 1023.0;
|
|
float pitch_idx = instrument_and_pitch.y * 1023.0; // TODO: Maybe rescale this for fine tuning? Don't use it raw because 2^(127-71) is MASSIVE, keep the power-of-2 calcs in shader.
|
|
vec2 velocity_and_pan = unpack_float(note_event_supplement.w); // Can leave these as [0.0, 1.0] and then mix appropriately
|
|
float velocity = velocity_and_pan.x;
|
|
float pan = velocity_and_pan.y;
|
|
vec2 attack_and_decay = unpack_float(note_event_supplement.x);
|
|
vec2 sustain_and_release = unpack_float(note_event_supplement.y);
|
|
// TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain
|
|
|
|
// For now, just branch this
|
|
if (t_end > t) {
|
|
float samp = get_instrument_sample(instrument_idx, get_pitch_scale(pitch_idx), t-t_start, t_end-t_start);
|
|
samp *= velocity;
|
|
// TODO: do some ADSR here?
|
|
downmixed_stereo += samp * vec2(1.0-pan, pan); // TODO: double it to maintain the mono level on each channel at center=0.5?
|
|
}
|
|
}
|
|
// Convert the stereo float audio to S16LE
|
|
return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(downmixed_stereo.y));
|
|
}
|
|
|
|
void fragment() {
|
|
// GLES2
|
|
vec2 uv = vec2(UV.x, 1.0-UV.y);
|
|
uv = (trunc(uv*UV_QUANTIZE)+0.5)/UV_QUANTIZE;
|
|
// COLOR.xyzw = test_writeback(uv);
|
|
COLOR.xyzw = render_song(dot(uv, vec2(1.0, midi_events_size.x)));
|
|
}
|
|
|
|
// const int MAX_TEMPO_EVENTS = 256;
|
|
// const int NUM_TEMPO_PROBES = 8; // log2(MAX_TEMPO_EVENTS)
|
|
// Because tempo is dynamic, it will need to be encoded into a header in song_texture
|
|
// // Binary search the first row for tempo information
|
|
// float tempo_idx = 0.0;
|
|
// vec4 tempo_event;
|
|
// float t_start;
|
|
// for (int i = 0; i < NUM_TEMPO_PROBES; i++) {
|
|
// float step_size = exp2(float(NUM_TEMPO_PROBES - i - 1));
|
|
// tempo_event = get_midi_texel(tempo_idx + step_size, 0.0);
|
|
// t_start = tempo_event.x;
|
|
// tempo_idx += (t >= t_start) ? step_size : 0.0;
|
|
// }
|
|
// float beat_start = tempo_event.y;
|
|
// float tempo_start = tempo_event.z;
|
|
// float tempo_end = tempo_event.w; // For tempo slides
|
|
// vec4 next_tempo_event = get_midi_texel(tempo_idx + 1.0, 0.0);
|
|
// float t_end = next_tempo_event.x;
|
|
// float beat_end = next_tempo_event.y;
|
|
// // Use the tempo information to convert wall time to beat time
|
|
// float t0 = t - t_start;
|
|
// float t_length = t_end - t_start;
|
|
// float tempo_section_progression = t0 / t_length;
|
|
// float tempo_at_t = mix(tempo_start, tempo_end, tempo_section_progression);
|
|
// float current_beat = beat_start + (t0 * (tempo_start+tempo_at_t) * 0.5); // Use the average tempo across the period to turn integration into area of a rectangle
|
|
// Now that we have our position on the beatmap,
|