[WIP] Finally some nice sounding shader output

This commit is contained in:
Luke Hubmayer-Werner 2024-07-13 02:17:19 +09:30
parent fbd3dd52c7
commit e9b0db8277
3 changed files with 101 additions and 90 deletions

View File

@ -174,7 +174,7 @@ func load_samples(snes_data: Dictionary, buffer: StreamPeerBuffer):
instrument_samples_HACK_EXTENDED_LOOPS.append(HACK_EXTEND_LOOP_SAMPLE(samp))
# print('Instrument %02X has mix_rate %d Hz and %d samples'%[i, samp.mix_rate, len(samp.data)/2])
emit_signal('audio_inst_sample_loaded', i)
samp.save_to_wav('output/instrument%02d(%dHz)(loop from %d).wav' % [i, samp.mix_rate, samp.loop_begin])
samp.save_to_wav('output/instrument%02d(%dHz)(loop from %d to %d of %d).wav' % [i, samp.mix_rate, samp.loop_begin, samp.loop_end, len(samp.data)/2])
# We start the texture with a bunch of same-size headers
@ -186,56 +186,65 @@ func load_samples(snes_data: Dictionary, buffer: StreamPeerBuffer):
# 2*uint8 SR of ADSR ([0.0, 1.0] is fine)
var samples_tex: ImageTexture
const TEX_WIDTH := 2048
const FILTER_PAD := 3
const FILTER_PAD := 32
func samples_to_texture():
var num_samples := INST_NUM + SFX_NUM
var header_length := num_samples * 8
var header_length := num_samples * 5
# Create header and unwrapped payload separately first
var header_buffer := StreamPeerBuffer.new()
var payload_buffer := StreamPeerBuffer.new()
for sample in instrument_samples + sfx_samples:
var loop_end: int = sample.loop_end
var sample_data_start: int = header_length + (payload_buffer.get_position()/2) + FILTER_PAD # After the prepended silence, in texels (2bytes)
var loop_begin: int = sample.loop_begin
var nonlooping: bool = loop_begin >= loop_end
if nonlooping:
loop_begin = loop_end
loop_end += 3
header_buffer.put_32(header_length + (payload_buffer.get_position()/2) + FILTER_PAD) # sample_start
header_buffer.put_u16(loop_end + FILTER_PAD) # sample_length
header_buffer.put_u16(loop_begin + FILTER_PAD) # sample_loop_begin
header_buffer.put_u16(sample.mix_rate) # sample_mixrate
header_buffer.put_u8(0) # TODO: attack
header_buffer.put_u8(0) # TODO: decay
header_buffer.put_u8(0) # TODO: sustain
header_buffer.put_u8(0) # TODO: release
header_buffer.put_u16(0) # TODO: unk
for i in FILTER_PAD: # Prepend 3 frames of silence
var loop_length: int = sample.loop_end - loop_begin
var nonlooping: bool = loop_length <= 0
print('Processing sample, nonlooping=%s'%nonlooping)
for i in FILTER_PAD: # Prepend frames of silence
payload_buffer.put_16(0)
payload_buffer.put_data(sample.data) # Copy entire S16LE audio data
if nonlooping:
for i in FILTER_PAD*2:
payload_buffer.put_16(0) # 6 frames of trailing silence to loop
# Append trailing silence for filter safety
for i in FILTER_PAD*5:
payload_buffer.put_16(0)
# Make it loop the trailing silence
loop_begin += FILTER_PAD
loop_length = 1
else:
# Copy frame by frame in case the loop is shorter than 6 frames
var loop_length = sample.loop_end - sample.loop_begin
for i in FILTER_PAD*2:
# Append copies of the loop for filter safety
# var loop_data = sample.data.subarray(sample.loop_begin*2, -1)
# for i in ceil((FILTER_PAD*4)/loop_length):
# payload_buffer.put_data(loop_data)
# Copy frame by frame in case the loop is shorter than padding frames
for i in FILTER_PAD*4:
var pos := payload_buffer.get_position()
payload_buffer.seek(pos - loop_length)
payload_buffer.seek(pos - loop_length*2)
var frame := payload_buffer.get_16()
payload_buffer.seek(pos)
payload_buffer.put_16(frame)
header_buffer.put_32(sample_data_start)
header_buffer.put_u16(loop_begin + FILTER_PAD)
header_buffer.put_u16(loop_length)
header_buffer.put_u16(sample.mix_rate)
# Combine the unwrapped arrays
var data := header_buffer.data_array + payload_buffer.data_array
# Now calculate wrapping and rowwise padding for the combined array
for row in TEX_WIDTH:
var row_end: int = (row + 1) * TEX_WIDTH * 2 # Remember: 8bit array, 16bit values
if len(data)/2 > row_end:
# [... a b c] + [a b c] + [a b c ...]
data = data.subarray(0, row_end-1) + data.subarray(row_end-FILTER_PAD*2, row_end-1) + data.subarray(row_end-FILTER_PAD*2, -1)
else:
break
var datasamp := AudioStreamSample.new()
datasamp.data = data
datasamp.mix_rate = 32000
datasamp.format = AudioStreamSample.FORMAT_16_BITS
datasamp.save_to_wav('output/texture_inst_data.wav')
# # Now calculate wrapping and rowwise padding for the combined array
# for row in TEX_WIDTH:
# var row_end: int = (row + 1) * TEX_WIDTH * 2 # Remember: 8bit array, 16bit values
# if len(data)/2 > row_end:
# # [... a b c] + [a b c] + [a b c ...]
# data = data.subarray(0, row_end-1) + data.subarray(row_end-FILTER_PAD*2, row_end-1) + data.subarray(row_end-FILTER_PAD*2, -1)
# else:
# break
var needed_rows := (len(data)/2)/float(TEX_WIDTH)
var rows := int(pow(2, ceil(log(needed_rows) / log(2))))
if rows > TEX_WIDTH:

View File

@ -77,14 +77,10 @@ vec4 test_writeback(sampler2D tex, vec2 uv) {
// 35 instrument samples and 8 sfx samples = 43 samples
// 2048x128 texture maybe? at 2bytes per texel, that's 512KiB of VRAM
// We start the texture with a bunch of same-size headers
// uint16 sample_start // The true start, after the prepended 3 frames of silence
// uint16 sample_length // 3 frames after the true end, because of how we loop
// uint16 sample_loop_begin // 3 frames after the true loop point
// int32 smp_start // The true start, after the prepended frames of silence
// uint16 loop_begin // padded past the true loop point for filtering
// uint16 loop_length
// uint16 mixrate
// 2*uint8 AD of ADSR ([0.0, 1.0] is fine)
// 2*uint8 SR of ADSR ([0.0, 1.0] is fine)
// So six texture() calls spent on header information, and one on the final lookup.
// Alternatively, sample length could be omitted and fetched as the start of the next entry to save redundant entries.
//
// To accomodate filtering, every sample must begin with 3 frames of silence, and end with 6 frames of the beginning of the loop.
// Looped playback will go from the first 3 of 6 frames at the end, to the third frame after the loop start point, to avoid filter bleeding.
@ -98,56 +94,63 @@ vec4 test_writeback(sampler2D tex, vec2 uv) {
// With the 258 texel header, which uses 3 texels of margin, 255 would be subtracted from the above payload,
// leaving 261121 texels for the sample data.
const float HEADER_LENGTH_TEXELS = 8.0;
const float HEADER_LENGTH_TEXELS = 5.0;
uniform sampler2D instrument_samples;
uniform vec2 instrument_samples_size = vec2(2048.0, 128.0);
uniform float instrument_row_padding = 3.0; // In case we want to go to cubic filtering
uniform float instrument_row_payload = 2042.0; // 2048-3-3 Make sure to set with instrument_samples_size and instrument_row_padding!
const int INSTRUMENT_SAMPLES_WIDTH = 2048;
uniform float reference_note = 71.0; // [0, 255], possibly [0, 127]
uniform float output_mixrate = 32000.0; // SNES SPC output is 32kHz
float sinc(float x) {
x = abs(x) + 0.00000000000001; // Avoid division by zero
return min(sin(x)/x, 1.0);
}
float get_pitch_scale(float note) {
// return pow(2.0, (note - reference_note)/12.0);
return exp2((note - reference_note)/12.0);
}
vec2 get_inst_texel(vec2 xy) {
return texture(instrument_samples, xy/instrument_samples_size).xw;
return texture(instrument_samples, (xy+0.5)/instrument_samples_size).xw;
}
float get_inst_texel_int16(float s) {
float x = mod(s, instrument_row_payload) + instrument_row_padding;
float y = trunc(s / instrument_row_payload);
return unpack_int16(texture(instrument_samples, (vec2(x, y) + 0.5)/instrument_samples_size).xw);
float get_inst_texel_int16(int smp) {
int x = smp % INSTRUMENT_SAMPLES_WIDTH;
int y = smp / INSTRUMENT_SAMPLES_WIDTH;
return unpack_int16(texture(instrument_samples, (vec2(float(x), float(y)) + 0.5)/instrument_samples_size).xw);
}
float get_instrument_sample(float instrument_index, float pitch_scale, float t, float t_end) {
// t_end is for ADSR purposes
float get_instrument_sample(float instrument_index, float note, float t) {
float header_offset = instrument_index * HEADER_LENGTH_TEXELS;
float sample_start = float(unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0))))); // The true start, after the prepended 3 frames of silence
float sample_length = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0))); // 3 frames after the true end, because of how we loop
float sample_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0))); // 3 frames after the true loop point
int smp_start = unpack_int32(vec4(get_inst_texel(vec2(header_offset, 0.0)), get_inst_texel(vec2(header_offset + 1.0, 0.0)))); // The true start, after the prepended frames of silence
float smp_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0))); // padded past the true loop point for filter
float smp_loop_length = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0)));
float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 4.0, 0.0)));
vec2 attack_decay = get_inst_texel(vec2(header_offset + 5.0, 0.0));
vec2 sustain_release = get_inst_texel(vec2(header_offset + 6.0, 0.0));
// Calculate the point we want to sample in linear space
float mixrate = sample_mixrate * pitch_scale;
float target_frame = t * mixrate;
float mixrate = sample_mixrate * get_pitch_scale(note);
float smp_t = t * mixrate;
// If we're past the end of the sample, we need to wrap it back to within the loop range
float loop_length = sample_length - sample_loop_begin;
float overshoot = max(target_frame - sample_length, 0.0);
float overshoot_loops = ceil(overshoot/loop_length);
target_frame -= overshoot_loops*loop_length;
// Now we need to identify the sampling point since our frames are spread across multiple rows for GPU reasons
// We only sample from texel 4 onwards on a given row - texel 0 is the header, texels 1,2,3 are lead-in for filtering
target_frame += sample_start;
float a = get_inst_texel_int16(floor(target_frame));
float b = get_inst_texel_int16(ceil(target_frame));
float mix_amount = fract(target_frame);
return rescale_int16(mix(a, b, mix_amount));
float overshoot = max(smp_t - smp_loop_begin, 0.0);
smp_t -= floor(overshoot/smp_loop_length) * smp_loop_length;
// if (smp_t > smp_loop_begin) {
// // return 0.0;
// smp_t = mod(smp_t - smp_loop_begin, smp_loop_length) + smp_loop_begin;
// }
int smp_window_start = smp_start + int(smp_t) - 6;
float smp_rel_filter_target = fract(smp_t) + 6.0;
float output = 0.0;
for (int i = 0; i < 12; i++) {
int smp_filter = smp_window_start + i;
float s = get_inst_texel_int16(smp_filter);
// TODO: determine proper value for this. Might be based on instrument base mixrate.
output += s * sinc((smp_rel_filter_target - float(i)) * 3.1);
}
return rescale_int16(output);
// int target_texel = int(smp_t) + smp_start;
// return rescale_int16(get_inst_texel_int16(target_texel));
}
const int NUM_CHANNELS = 8;
const int NUM_CHANNELS = 1; //8;
const int MAX_CHANNEL_NOTE_EVENTS = 2048;
const int NUM_CHANNEL_NOTE_PROBES = 11; // log2(MAX_CHANNEL_NOTE_EVENTS)
uniform sampler2D midi_events : hint_normal;
@ -155,13 +158,6 @@ uniform vec2 midi_events_size = vec2(2048.0, 16.0);
vec4 get_midi_texel(float x, float y) {
return texture(midi_events, vec2(x, y)/midi_events_size).xyzw;
}
vec2 unpack_float(float f) {
// Unpack two 10bit values from a single channel (23bit mantissa)
float a = f * 1024.0;
float x = trunc(a) / 1023.0;
float y = fract(a) * 1024.0 / 1023.0;
return vec2(x, y);
}
vec4 render_song(int smp) {
// Each output texel rendered is a stereo S16LE frame representing 1/32000 of a second
// 2048 is an established safe texture dimension so may as well go 2048 wide
@ -170,8 +166,7 @@ vec4 render_song(int smp) {
vec2 downmixed_stereo = vec2(0.0);
// Binary search the channels
for (int channel = 0; channel < 1; channel++) {
// for (int channel = 0; channel < NUM_CHANNELS; channel++) {
for (int channel = 0; channel < NUM_CHANNELS; channel++) {
float row = float(channel * 4);
float event_idx = 0.0;
int smp_start;
@ -183,7 +178,7 @@ vec4 render_song(int smp) {
smp_start = int(unpack_int32(get_midi_texel(event_idx, row)));
int smp_end = int(unpack_int32(get_midi_texel(event_idx, row+1.0)));
vec4 note_event_supplement = get_midi_texel(event_idx, row+2.0); // left as [0.0, 1.0]
float instrument_idx = note_event_supplement.x * 255.0;
float instrument_idx = trunc(note_event_supplement.x * 255.0);
float pitch_idx = note_event_supplement.y * 255.0;
float velocity = note_event_supplement.z;
float pan = note_event_supplement.w;
@ -191,19 +186,25 @@ vec4 render_song(int smp) {
// ====================At some point I'll look back into packing floats====================
// TBD = note_event_supplement.zw; - tremolo/vibrato/noise/pan_lfo/pitchbend/echo remain
// ====================At some point I'll look back into packing floats====================
float attack = adsr.x*65535.0 + 1.0; // TODO: work out effective resolution for this
int smp_attack = int(attack) * 2; // Max value is 131072 samples = 4.096 seconds
// For now, just branch this
if (smp < smp_end) {
int smp_overrun = smp - smp_end; // 256 samples of linear decay to 0 after note_off
smp_overrun = (smp_overrun < 0) ? 0 : smp_overrun;
if (smp_overrun < 256) {
float t_start = float(smp_start)/output_mixrate;
float t_end = float(smp_end)/output_mixrate;
float samp = get_instrument_sample(instrument_idx, get_pitch_scale(pitch_idx), t-t_start, t_end-t_start);
samp *= velocity;
// TODO: do some ADSR here?
downmixed_stereo += samp * vec2(1.0-pan, pan); // TODO: double it to maintain the mono level on each channel at center=0.5?
float attack_factor = min(float(smp - smp_start)/float(smp_attack), 1.0);
float release_factor = float(255-smp_overrun)/255.0; // 256 samples of linear decay to 0 after note_off
float samp = get_instrument_sample(instrument_idx, pitch_idx, t-t_start);
samp *= velocity * attack_factor * release_factor;
// TODO: proper decay and sustain, revisit release
downmixed_stereo += samp * vec2(1.0-pan, pan) * 0.8; // TODO: double it to maintain the mono level on each channel at center=0.5?
}
}
// Convert the stereo float audio to S16LE
return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(downmixed_stereo.y));
// return vec4(pack_float_to_int16(downmixed_stereo.x), pack_float_to_int16(mod(t, 2.0)-1.0));
}
void fragment() {

View File

@ -138,14 +138,15 @@ func test_rendering() -> void:
var midi_events_bytes3 := StreamPeerBuffer.new()
var midi_events_bytes4 := StreamPeerBuffer.new()
for i in 2048:
var t = i * 3.0
var t = i * 2.0
midi_events_bytes.put_32(t*32000) # t_start
midi_events_bytes2.put_32((t+2.75)*32000) # t_end
midi_events_bytes3.put_u8((i%35)) # instrument
midi_events_bytes2.put_32((t+1.75)*32000) # t_end
midi_events_bytes3.put_u8(i%35) # instrument
midi_events_bytes3.put_u8(71) # pitch_idx
# midi_events_bytes.put_float((35 + (i%40))) # pitch_idx
midi_events_bytes3.put_u8(255) # velocity
midi_events_bytes3.put_u8(i%256) # pan
midi_events_bytes3.put_u8(0) # pan
# midi_events_bytes3.put_u8(i%256) # pan
midi_events_bytes4.put_32(0) # ADSR
var channel_data = midi_events_bytes.data_array + midi_events_bytes2.data_array + midi_events_bytes3.data_array + midi_events_bytes4.data_array
audio_renderer.push_bytes(channel_data) # + channel_data + channel_data + channel_data + channel_data + channel_data + channel_data + channel_data)
@ -170,7 +171,7 @@ func _draw() -> void:
audio_renderer.get_result()
var result = audio_renderer.result_queue[0]
var rendered_audio := AudioStreamSample.new()
rendered_audio.data = result
rendered_audio.data = result.subarray(0, (4*120*32000) - 1)
rendered_audio.stereo = true
rendered_audio.mix_rate = 32000
rendered_audio.format = AudioStreamSample.FORMAT_16_BITS