diff --git a/scripts/loaders/SoundLoader.gd b/scripts/loaders/SoundLoader.gd index 7e96c36..be48ce0 100644 --- a/scripts/loaders/SoundLoader.gd +++ b/scripts/loaders/SoundLoader.gd @@ -174,7 +174,7 @@ func load_samples(snes_data: Dictionary, buffer: StreamPeerBuffer): instrument_samples_HACK_EXTENDED_LOOPS.append(HACK_EXTEND_LOOP_SAMPLE(samp)) # print('Instrument %02X has mix_rate %d Hz and %d samples'%[i, samp.mix_rate, len(samp.data)/2]) emit_signal('audio_inst_sample_loaded', i) - samp.save_to_wav('output/instrument%02d(%dHz)(loop from %d).wav' % [i, samp.mix_rate, samp.loop_begin]) + samp.save_to_wav('output/instrument%02d(%dHz)(loop from %d to %d of %d).wav' % [i, samp.mix_rate, samp.loop_begin, samp.loop_end, len(samp.data)/2]) # We start the texture with a bunch of same-size headers @@ -186,56 +186,65 @@ func load_samples(snes_data: Dictionary, buffer: StreamPeerBuffer): # 2*uint8 SR of ADSR ([0.0, 1.0] is fine) var samples_tex: ImageTexture const TEX_WIDTH := 2048 -const FILTER_PAD := 3 +const FILTER_PAD := 32 func samples_to_texture(): var num_samples := INST_NUM + SFX_NUM - var header_length := num_samples * 8 + var header_length := num_samples * 5 # Create header and unwrapped payload separately first var header_buffer := StreamPeerBuffer.new() var payload_buffer := StreamPeerBuffer.new() for sample in instrument_samples + sfx_samples: - var loop_end: int = sample.loop_end + var sample_data_start: int = header_length + (payload_buffer.get_position()/2) + FILTER_PAD # After the prepended silence, in texels (2bytes) var loop_begin: int = sample.loop_begin - var nonlooping: bool = loop_begin >= loop_end - if nonlooping: - loop_begin = loop_end - loop_end += 3 - header_buffer.put_32(header_length + (payload_buffer.get_position()/2) + FILTER_PAD) # sample_start - header_buffer.put_u16(loop_end + FILTER_PAD) # sample_length - header_buffer.put_u16(loop_begin + FILTER_PAD) # sample_loop_begin - header_buffer.put_u16(sample.mix_rate) # sample_mixrate - header_buffer.put_u8(0) # TODO: attack - header_buffer.put_u8(0) # TODO: decay - header_buffer.put_u8(0) # TODO: sustain - header_buffer.put_u8(0) # TODO: release - header_buffer.put_u16(0) # TODO: unk - for i in FILTER_PAD: # Prepend 3 frames of silence + var loop_length: int = sample.loop_end - loop_begin + var nonlooping: bool = loop_length <= 0 + print('Processing sample, nonlooping=%s'%nonlooping) + + for i in FILTER_PAD: # Prepend frames of silence payload_buffer.put_16(0) payload_buffer.put_data(sample.data) # Copy entire S16LE audio data + if nonlooping: - for i in FILTER_PAD*2: - payload_buffer.put_16(0) # 6 frames of trailing silence to loop + # Append trailing silence for filter safety + for i in FILTER_PAD*5: + payload_buffer.put_16(0) + # Make it loop the trailing silence + loop_begin += FILTER_PAD + loop_length = 1 else: - # Copy frame by frame in case the loop is shorter than 6 frames - var loop_length = sample.loop_end - sample.loop_begin - for i in FILTER_PAD*2: + # Append copies of the loop for filter safety + # var loop_data = sample.data.subarray(sample.loop_begin*2, -1) + # for i in ceil((FILTER_PAD*4)/loop_length): + # payload_buffer.put_data(loop_data) + + # Copy frame by frame in case the loop is shorter than padding frames + for i in FILTER_PAD*4: var pos := payload_buffer.get_position() - payload_buffer.seek(pos - loop_length) + payload_buffer.seek(pos - loop_length*2) var frame := payload_buffer.get_16() payload_buffer.seek(pos) payload_buffer.put_16(frame) + header_buffer.put_32(sample_data_start) + header_buffer.put_u16(loop_begin + FILTER_PAD) + header_buffer.put_u16(loop_length) + header_buffer.put_u16(sample.mix_rate) # Combine the unwrapped arrays var data := header_buffer.data_array + payload_buffer.data_array - # Now calculate wrapping and rowwise padding for the combined array - for row in TEX_WIDTH: - var row_end: int = (row + 1) * TEX_WIDTH * 2 # Remember: 8bit array, 16bit values - if len(data)/2 > row_end: - # [... a b c] + [a b c] + [a b c ...] - data = data.subarray(0, row_end-1) + data.subarray(row_end-FILTER_PAD*2, row_end-1) + data.subarray(row_end-FILTER_PAD*2, -1) - else: - break + var datasamp := AudioStreamSample.new() + datasamp.data = data + datasamp.mix_rate = 32000 + datasamp.format = AudioStreamSample.FORMAT_16_BITS + datasamp.save_to_wav('output/texture_inst_data.wav') + # # Now calculate wrapping and rowwise padding for the combined array + # for row in TEX_WIDTH: + # var row_end: int = (row + 1) * TEX_WIDTH * 2 # Remember: 8bit array, 16bit values + # if len(data)/2 > row_end: + # # [... a b c] + [a b c] + [a b c ...] + # data = data.subarray(0, row_end-1) + data.subarray(row_end-FILTER_PAD*2, row_end-1) + data.subarray(row_end-FILTER_PAD*2, -1) + # else: + # break var needed_rows := (len(data)/2)/float(TEX_WIDTH) var rows := int(pow(2, ceil(log(needed_rows) / log(2)))) if rows > TEX_WIDTH: diff --git a/shaders/audio_renderer.gdshader b/shaders/audio_renderer.gdshader index 04e19fc..59c1530 100644 --- a/shaders/audio_renderer.gdshader +++ b/shaders/audio_renderer.gdshader @@ -77,14 +77,10 @@ vec4 test_writeback(sampler2D tex, vec2 uv) { // 35 instrument samples and 8 sfx samples = 43 samples // 2048x128 texture maybe? at 2bytes per texel, that's 512KiB of VRAM // We start the texture with a bunch of same-size headers -// uint16 sample_start // The true start, after the prepended 3 frames of silence -// uint16 sample_length // 3 frames after the true end, because of how we loop -// uint16 sample_loop_begin // 3 frames after the true loop point +// int32 smp_start // The true start, after the prepended frames of silence +// uint16 loop_begin // padded past the true loop point for filtering +// uint16 loop_length // uint16 mixrate -// 2*uint8 AD of ADSR ([0.0, 1.0] is fine) -// 2*uint8 SR of ADSR ([0.0, 1.0] is fine) -// So six texture() calls spent on header information, and one on the final lookup. -// Alternatively, sample length could be omitted and fetched as the start of the next entry to save redundant entries. // // To accomodate filtering, every sample must begin with 3 frames of silence, and end with 6 frames of the beginning of the loop. // Looped playback will go from the first 3 of 6 frames at the end, to the third frame after the loop start point, to avoid filter bleeding. @@ -98,56 +94,63 @@ vec4 test_writeback(sampler2D tex, vec2 uv) { // With the 258 texel header, which uses 3 texels of margin, 255 would be subtracted from the above payload, // leaving 261121 texels for the sample data. -const float HEADER_LENGTH_TEXELS = 8.0; +const float HEADER_LENGTH_TEXELS = 5.0; uniform sampler2D instrument_samples; uniform vec2 instrument_samples_size = vec2(2048.0, 128.0); -uniform float instrument_row_padding = 3.0; // In case we want to go to cubic filtering -uniform float instrument_row_payload = 2042.0; // 2048-3-3 Make sure to set with instrument_samples_size and instrument_row_padding! +const int INSTRUMENT_SAMPLES_WIDTH = 2048; uniform float reference_note = 71.0; // [0, 255], possibly [0, 127] uniform float output_mixrate = 32000.0; // SNES SPC output is 32kHz +float sinc(float x) { + x = abs(x) + 0.00000000000001; // Avoid division by zero + return min(sin(x)/x, 1.0); +} float get_pitch_scale(float note) { - // return pow(2.0, (note - reference_note)/12.0); return exp2((note - reference_note)/12.0); } vec2 get_inst_texel(vec2 xy) { - return texture(instrument_samples, xy/instrument_samples_size).xw; + return texture(instrument_samples, (xy+0.5)/instrument_samples_size).xw; } -float get_inst_texel_int16(float s) { - float x = mod(s, instrument_row_payload) + instrument_row_padding; - float y = trunc(s / instrument_row_payload); - return unpack_int16(texture(instrument_samples, (vec2(x, y) + 0.5)/instrument_samples_size).xw); +float get_inst_texel_int16(int smp) { + int x = smp % INSTRUMENT_SAMPLES_WIDTH; + int y = smp / INSTRUMENT_SAMPLES_WIDTH; + return unpack_int16(texture(instrument_samples, (vec2(float(x), float(y)) + 0.5)/instrument_samples_size).xw); } -float get_instrument_sample(float instrument_index, float pitch_scale, float t, float t_end) { - // t_end is for ADSR purposes +float get_instrument_sample(float instrument_index, float note, float t) { float header_offset = instrument_index * HEADER_LENGTH_TEXELS; - float sample_start = float(unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0))))); // The true start, after the prepended 3 frames of silence - float sample_length = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0))); // 3 frames after the true end, because of how we loop - float sample_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0))); // 3 frames after the true loop point + int smp_start = unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0)))); // The true start, after the prepended frames of silence + float smp_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0))); // padded past the true loop point for filter + float smp_loop_length = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0))); float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 4.0, 0.0))); - vec2 attack_decay = get_inst_texel(vec2(header_offset + 5.0, 0.0)); - vec2 sustain_release = get_inst_texel(vec2(header_offset + 6.0, 0.0)); // Calculate the point we want to sample in linear space - float mixrate = sample_mixrate * pitch_scale; - float target_frame = t * mixrate; + float mixrate = sample_mixrate * get_pitch_scale(note); + float smp_t = t * mixrate; // If we're past the end of the sample, we need to wrap it back to within the loop range - float loop_length = sample_length - sample_loop_begin; - float overshoot = max(target_frame - sample_length, 0.0); - float overshoot_loops = ceil(overshoot/loop_length); - target_frame -= overshoot_loops*loop_length; - // Now we need to identify the sampling point since our frames are spread across multiple rows for GPU reasons - // We only sample from texel 4 onwards on a given row - texel 0 is the header, texels 1,2,3 are lead-in for filtering - target_frame += sample_start; - float a = get_inst_texel_int16(floor(target_frame)); - float b = get_inst_texel_int16(ceil(target_frame)); - float mix_amount = fract(target_frame); - return rescale_int16(mix(a, b, mix_amount)); + float overshoot = max(smp_t - smp_loop_begin, 0.0); + smp_t -= floor(overshoot/smp_loop_length) * smp_loop_length; + // if (smp_t > smp_loop_begin) { + // // return 0.0; + // smp_t = mod(smp_t - smp_loop_begin, smp_loop_length) + smp_loop_begin; + // } + + int smp_window_start = smp_start + int(smp_t) - 6; + float smp_rel_filter_target = fract(smp_t) + 6.0; + float output = 0.0; + for (int i = 0; i < 12; i++) { + int smp_filter = smp_window_start + i; + float s = get_inst_texel_int16(smp_filter); + // TODO: determine proper value for this. Might be based on instrument base mixrate. + output += s * sinc((smp_rel_filter_target - float(i)) * 3.1); + } + return rescale_int16(output); + // int target_texel = int(smp_t) + smp_start; + // return rescale_int16(get_inst_texel_int16(target_texel)); } -const int NUM_CHANNELS = 8; +const int NUM_CHANNELS = 1; //8; const int MAX_CHANNEL_NOTE_EVENTS = 2048; const int NUM_CHANNEL_NOTE_PROBES = 11; // log2(MAX_CHANNEL_NOTE_EVENTS) uniform sampler2D midi_events : hint_normal; @@ -155,13 +158,6 @@ uniform vec2 midi_events_size = vec2(2048.0, 16.0); vec4 get_midi_texel(float x, float y) { return texture(midi_events, vec2(x, y)/midi_events_size).xyzw; } -vec2 unpack_float(float f) { - // Unpack two 10bit values from a single channel (23bit mantissa) - float a = f * 1024.0; - float x = trunc(a) / 1023.0; - float y = fract(a) * 1024.0 / 1023.0; - return vec2(x, y); -} vec4 render_song(int smp) { // Each output texel rendered is a stereo S16LE frame representing 1/32000 of a second // 2048 is an established safe texture dimension so may as well go 2048 wide @@ -170,8 +166,7 @@ vec4 render_song(int smp) { vec2 downmixed_stereo = vec2(0.0); // Binary search the channels - for (int channel = 0; channel < 1; channel++) { - // for (int channel = 0; channel < NUM_CHANNELS; channel++) { + for (int channel = 0; channel < NUM_CHANNELS; channel++) { float row = float(channel * 4); float event_idx = 0.0; int smp_start; @@ -183,7 +178,7 @@ vec4 render_song(int smp) { smp_start = int(unpack_int32(get_midi_texel(event_idx, row))); int smp_end = int(unpack_int32(get_midi_texel(event_idx, row+1.0))); vec4 note_event_supplement = get_midi_texel(event_idx, row+2.0); // left as [0.0, 1.0] - float instrument_idx = note_event_supplement.x * 255.0; + float instrument_idx = trunc(note_event_supplement.x * 255.0); float pitch_idx = note_event_supplement.y * 255.0; float velocity = note_event_supplement.z; float pan = note_event_supplement.w; @@ -191,19 +186,25 @@ vec4 render_song(int smp) { // ====================At some point I'll look back into packing floats==================== // TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain // ====================At some point I'll look back into packing floats==================== + float attack = adsr.x*65535.0 + 1.0; // TODO: work out effective resolution for this + int smp_attack = int(attack) * 2; // Max value is 131072 samples = 4.096 seconds // For now, just branch this - if (smp < smp_end) { + int smp_overrun = smp - smp_end; // 256 samples of linear decay to 0 after note_off + smp_overrun = (smp_overrun < 0) ? 0 : smp_overrun; + if (smp_overrun < 256) { float t_start = float(smp_start)/output_mixrate; - float t_end = float(smp_end)/output_mixrate; - float samp = get_instrument_sample(instrument_idx, get_pitch_scale(pitch_idx), t-t_start, t_end-t_start); - samp *= velocity; - // TODO: do some ADSR here? - downmixed_stereo += samp * vec2(1.0-pan, pan); // TODO: double it to maintain the mono level on each channel at center=0.5? + float attack_factor = min(float(smp - smp_start)/float(smp_attack), 1.0); + float release_factor = float(255-smp_overrun)/255.0; // 256 samples of linear decay to 0 after note_off + float samp = get_instrument_sample(instrument_idx, pitch_idx, t-t_start); + samp *= velocity * attack_factor * release_factor; + // TODO: proper decay and sustain, revisit release + downmixed_stereo += samp * vec2(1.0-pan, pan) * 0.8; // TODO: double it to maintain the mono level on each channel at center=0.5? } } // Convert the stereo float audio to S16LE return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(downmixed_stereo.y)); + // return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(mod(t, 2.0)-1.0)); } void fragment() { diff --git a/test/audio_system.gd b/test/audio_system.gd index a671885..03afa64 100644 --- a/test/audio_system.gd +++ b/test/audio_system.gd @@ -138,14 +138,15 @@ func test_rendering() -> void: var midi_events_bytes3 := StreamPeerBuffer.new() var midi_events_bytes4 := StreamPeerBuffer.new() for i in 2048: - var t = i * 3.0 + var t = i * 2.0 midi_events_bytes.put_32(t*32000) # t_start - midi_events_bytes2.put_32((t+2.75)*32000) # t_end - midi_events_bytes3.put_u8((i%35)) # instrument + midi_events_bytes2.put_32((t+1.75)*32000) # t_end + midi_events_bytes3.put_u8(i%35) # instrument midi_events_bytes3.put_u8(71) # pitch_idx # midi_events_bytes.put_float((35 + (i%40))) # pitch_idx midi_events_bytes3.put_u8(255) # velocity - midi_events_bytes3.put_u8(i%256) # pan + midi_events_bytes3.put_u8(0) # pan + # midi_events_bytes3.put_u8(i%256) # pan midi_events_bytes4.put_32(0) # ADSR var channel_data = midi_events_bytes.data_array + midi_events_bytes2.data_array + midi_events_bytes3.data_array + midi_events_bytes4.data_array audio_renderer.push_bytes(channel_data) # + channel_data + channel_data + channel_data + channel_data + channel_data + channel_data + channel_data) @@ -170,7 +171,7 @@ func _draw() -> void: audio_renderer.get_result() var result = audio_renderer.result_queue[0] var rendered_audio := AudioStreamSample.new() - rendered_audio.data = result + rendered_audio.data = result.subarray(0, (4*120*32000) - 1) rendered_audio.stereo = true rendered_audio.mix_rate = 32000 rendered_audio.format = AudioStreamSample.FORMAT_16_BITS