[WIP] Sound render shader

This commit is contained in:
Luke Hubmayer-Werner 2024-07-10 22:13:58 +09:30
parent 80cbfa7ab8
commit 89d244eb88
3 changed files with 216 additions and 25 deletions

View File

@ -18,16 +18,25 @@ const BYTES_PER_SAMPLE := 2 # 16bit samples
# !!! Adding a few ms to the loops removes harshness. !!! # !!! Adding a few ms to the loops removes harshness. !!!
const HACK_EXTEND_LOOP_SAMPLE_EXTRA_MS := 2 # !!! const HACK_EXTEND_LOOP_SAMPLE_EXTRA_MS := 2 # !!!
func HACK_EXTEND_LOOP_SAMPLE(audio: AudioStreamSample) -> AudioStreamSample: # !!! func HACK_EXTEND_LOOP_SAMPLE(audio: AudioStreamSample) -> AudioStreamSample: # !!!
if audio.loop_begin >= audio.loop_end: # !!! var output: AudioStreamSample = audio.duplicate(true) # !!!
return audio # !!! # Prepend silence # !!!
var silent_samples := (audio.mix_rate * PREPEND_MS) / 1000 # !!!
var silence := PoolByteArray() # !!!
silence.resize(silent_samples * 2) # 16bit samples in 8bit array # !!!
silence.fill(0) # !!!
output.data = silence + output.data # !!!
output.loop_begin += silent_samples # !!!
output.loop_end += silent_samples # !!!
# Append looped samples # !!!
if output.loop_begin >= output.loop_end: # !!!
return output # !!!
var looped_samples = audio.data.subarray(audio.loop_begin * BYTES_PER_SAMPLE, -1) # !!! var looped_samples = audio.data.subarray(audio.loop_begin * BYTES_PER_SAMPLE, -1) # !!!
var loop_len = len(looped_samples) # !!! var loop_len = len(looped_samples) # !!!
var target_len = (audio.mix_rate * HACK_EXTEND_LOOP_SAMPLE_EXTRA_MS / 1000) * BYTES_PER_SAMPLE # !!! var target_len = (audio.mix_rate * HACK_EXTEND_LOOP_SAMPLE_EXTRA_MS / 1000) * BYTES_PER_SAMPLE # !!!
while loop_len < target_len: # Keep doubling in length until it's long enough !!! while loop_len < target_len: # Keep doubling in length until it's long enough !!!
looped_samples += looped_samples # !!! looped_samples += looped_samples # !!!
loop_len = len(looped_samples) # !!! loop_len = len(looped_samples) # !!!
var output = audio.duplicate(true) # !!! output.data += looped_samples # !!!
output.data = audio.data + looped_samples # !!!
return output # !!! return output # !!!
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@ -81,14 +90,14 @@ func make_sample(buffer: StreamPeerBuffer, size: int, sample_rate: int) -> Audio
return audio return audio
var num_packets := size/9 var num_packets := size/9
var samples = PoolIntArray([0, 0]) # Start with two zero samples for filter purposes, strip them from the actual output var samples = PoolIntArray([0, 0]) # Start with two zero samples for filter purposes, strip them from the actual output later
var i := 2 var i := 2
for pkt in num_packets: for pkt in num_packets:
# Decode a single 9byte BRR packet # Decode a single 9byte BRR packet
var header_byte := buffer.get_u8() var header_byte := buffer.get_u8()
var exponent := header_byte >> 4 var exponent := header_byte >> 4
var filter := (header_byte >> 2) & 0x03 var filter := (header_byte >> 2) & 0x03
var loop := bool(header_byte & 0x02) # var loop := bool(header_byte & 0x02)
var end := bool(header_byte & 0x01) var end := bool(header_byte & 0x01)
for sample in 8: for sample in 8:
var b := buffer.get_u8() var b := buffer.get_u8()
@ -109,30 +118,26 @@ func make_sample(buffer: StreamPeerBuffer, size: int, sample_rate: int) -> Audio
if end: if end:
# print('End flag on packet') # print('End flag on packet')
break break
# Convert int array to byte array # Remove first two zero samples
var audio_data = PoolByteArray() samples.remove(0)
# Prepend silence, accounting for the two null samples samples.remove(0)
var silent_samples := ((sample_rate * PREPEND_MS) / 1000) - 2
audio_data.resize(silent_samples * 2) # 16bit samples in 8bit array
audio_data.fill(0)
# Pack 16bit samples to 8bit array # Pack 16bit samples to 8bit array
for b in samples: var out_buff = StreamPeerBuffer.new()
audio_data.append(b & 0xFF) for sample in samples:
audio_data.append(b >> 8) out_buff.put_16(sample)
audio.data = audio_data audio.data = out_buff.data_array
return audio return audio
func get_inst_sample_data(snes_data: Dictionary, buffer: StreamPeerBuffer, id: int) -> AudioStreamSample: func get_inst_sample_data(snes_data: Dictionary, buffer: StreamPeerBuffer, id: int) -> AudioStreamSample:
var sample_rate := get_reference_pitch_samplerate(snes_data.bgm_instrument_samplerates[id] & 0xFF) var sample_rate := get_reference_pitch_samplerate(snes_data.bgm_instrument_samplerates[id] & 0xFF)
var silent_samples := ((sample_rate * PREPEND_MS) / 1000)
var loop_start_packet: int = snes_data.bgm_instrument_loop_starts[id]/9 # Note that Instrument $1F Steel Guitar has a length of $088B but a loop point of $088D which is 243.22... packets. Luckily it doesn't matter. var loop_start_packet: int = snes_data.bgm_instrument_loop_starts[id]/9 # Note that Instrument $1F Steel Guitar has a length of $088B but a loop point of $088D which is 243.22... packets. Luckily it doesn't matter.
buffer.seek(snes_data.bgm_instrument_brr_pointers[id] & 0x3FFFFF) buffer.seek(snes_data.bgm_instrument_brr_pointers[id] & 0x3FFFFF)
var size := buffer.get_u16() var size := buffer.get_u16()
var num_samples := (size/9)*16 var num_samples := (size/9)*16
var audio := make_sample(buffer, size, sample_rate) var audio := make_sample(buffer, size, sample_rate)
audio.loop_mode = AudioStreamSample.LOOP_FORWARD audio.loop_mode = AudioStreamSample.LOOP_FORWARD
audio.loop_begin = (loop_start_packet * 16) + silent_samples # Each 9byte packet is 16 samples audio.loop_begin = (loop_start_packet * 16) # Each 9byte packet is 16 samples
audio.loop_end = silent_samples + num_samples audio.loop_end = num_samples
# print_debug('Loaded instrument #%02X with lookup offset $%06X, BRR data offset $%06X, length $%04X (%f packets, %d samples) and loop point %d samples' % [id, lookup_offset, brr_offset, size, size/9.0, num_samples, audio.loop_begin]) # print_debug('Loaded instrument #%02X with lookup offset $%06X, BRR data offset $%06X, length $%04X (%f packets, %d samples) and loop point %d samples' % [id, lookup_offset, brr_offset, size, size/9.0, num_samples, audio.loop_begin])
return audio return audio
@ -148,11 +153,10 @@ func load_sfx_samples_data(snes_data: Dictionary, buffer: StreamPeerBuffer):
buffer.seek(brr_spc_addrs[i] + brr_spc_start) buffer.seek(brr_spc_addrs[i] + brr_spc_start)
# print('Loading sfx sample #%X with BRR data offset $%06X' % [i, buffer.get_position()]) # print('Loading sfx sample #%X with BRR data offset $%06X' % [i, buffer.get_position()])
var sample_rate := get_reference_pitch_samplerate(snes_data.sfx_samplerates[i] & 0xFF) var sample_rate := get_reference_pitch_samplerate(snes_data.sfx_samplerates[i] & 0xFF)
var silent_samples := ((sample_rate * PREPEND_MS) / 1000)
var audio := make_sample(buffer, 900, sample_rate) var audio := make_sample(buffer, 900, sample_rate)
var loop_start_packet: int = brr_spc_loop_addrs[i] - brr_spc_addrs[i] var loop_start_packet: int = brr_spc_loop_addrs[i] - brr_spc_addrs[i]
audio.loop_mode = AudioStreamSample.LOOP_FORWARD audio.loop_mode = AudioStreamSample.LOOP_FORWARD
audio.loop_begin = (loop_start_packet * 16) + silent_samples # Each 9byte packet is 16 samples audio.loop_begin = loop_start_packet * 16 # Each 9byte packet is 16 samples
audio.loop_end = (len(audio.data)/2) audio.loop_end = (len(audio.data)/2)
sfx_samples.append(audio) # Use 900 as a limit, it won't be hit, parser stops after End packet anyway sfx_samples.append(audio) # Use 900 as a limit, it won't be hit, parser stops after End packet anyway
emit_signal('audio_sfx_sample_loaded', i) emit_signal('audio_sfx_sample_loaded', i)
@ -162,13 +166,103 @@ func load_sfx_samples_data(snes_data: Dictionary, buffer: StreamPeerBuffer):
# Called when the node enters the scene tree for the first time. # Called when the node enters the scene tree for the first time.
func load_samples(snes_data: Dictionary, buffer: StreamPeerBuffer): func load_samples(snes_data: Dictionary, buffer: StreamPeerBuffer):
load_sfx_samples_data(snes_data, buffer) load_sfx_samples_data(snes_data, buffer)
var largest_sample_idx := -1
var largest_sample_sample_count := 0
var total_frames := 0
# For some reason, this is a bit slow currently under certain editor conditions. Might optimize later. # For some reason, this is a bit slow currently under certain editor conditions. Might optimize later.
for i in INST_NUM: for i in INST_NUM:
instrument_samples.append(get_inst_sample_data(snes_data, buffer, i)) instrument_samples.append(get_inst_sample_data(snes_data, buffer, i))
total_frames += instrument_samples[i].loop_end
if largest_sample_sample_count < instrument_samples[i].loop_end:
largest_sample_sample_count = instrument_samples[i].loop_end
largest_sample_idx = i
# Workaround for Godot 3.x quirk where looping samples are interpolated as if they go to nothing instead of looping # Workaround for Godot 3.x quirk where looping samples are interpolated as if they go to nothing instead of looping
instrument_samples_HACK_EXTENDED_LOOPS.append(HACK_EXTEND_LOOP_SAMPLE(instrument_samples[i])) instrument_samples_HACK_EXTENDED_LOOPS.append(HACK_EXTEND_LOOP_SAMPLE(instrument_samples[i]))
print('Instrument %02X has mix_rate %d Hz'%[i, instrument_samples[i].mix_rate]) # print('Instrument %02X has mix_rate %d Hz and %d samples'%[i, instrument_samples[i].mix_rate, len(instrument_samples[i].data)/2])
emit_signal('audio_inst_sample_loaded', i) emit_signal('audio_inst_sample_loaded', i)
# print('Largest sample is instrument %d with length %d and mix_rate %d'%[largest_sample_idx, largest_sample_sample_count, instrument_samples[largest_sample_idx].mix_rate])
# print('Total frames: %d'%total_frames)
# We start the texture with a bunch of same-size headers
# uint16 sample_start // The true start, after the prepended 3 frames of silence
# uint16 sample_length // 3 frames after the true end, because of how we loop
# uint16 sample_loop_begin // 3 frames after the true loop point
# uint16 mixrate
# 2*uint8 AD of ADSR ([0.0, 1.0] is fine)
# 2*uint8 SR of ADSR ([0.0, 1.0] is fine)
var samples_tex: ImageTexture
const TEX_WIDTH := 2048
const FILTER_PAD := 3
func samples_to_texture():
var num_samples := INST_NUM + SFX_NUM
var header_length := num_samples * 6
# Create header and unwrapped payload separately first
var header_data := PoolByteArray()
var header_buffer := StreamPeerBuffer.new()
header_buffer.data_array = header_data
var payload_data := PoolByteArray()
var payload_buffer := StreamPeerBuffer.new()
payload_buffer.data_array = payload_data
for sample in instrument_samples + sfx_samples:
var loop_end: int = sample.loop_end
var loop_begin: int = sample.loop_begin
var nonlooping: bool = loop_begin >= loop_end
if nonlooping:
loop_begin = loop_end
loop_end += 1
header_buffer.put_u16(header_length + (len(payload_data)/2) + FILTER_PAD) # sample_start
header_buffer.put_u16(sample.loop_end + FILTER_PAD) # sample_length
header_buffer.put_u16(sample.loop_begin + FILTER_PAD) # sample_loop_begin
header_buffer.put_u16(sample.mix_rate) # sample_mixrate
header_buffer.put_u8(0) # TODO: attack
header_buffer.put_u8(0) # TODO: decay
header_buffer.put_u8(0) # TODO: sustain
header_buffer.put_u8(0) # TODO: release
for i in FILTER_PAD: # Prepend 3 frames of silence
payload_buffer.put_16(0)
payload_buffer.put_data(sample.data) # Copy entire S16LE audio data
if nonlooping:
for i in FILTER_PAD*2:
payload_buffer.put_16(0) # 6 frames of trailing silence to loop
else:
# Copy frame by frame in case the loop is shorter than 6 frames
var loop_length = sample.loop_end - sample.loop_begin
for i in FILTER_PAD*2:
var pos := payload_buffer.get_position()
payload_buffer.seek(pos - loop_length)
var frame := payload_buffer.get_16()
payload_buffer.seek(pos)
payload_buffer.put_16(frame)
# Combine the unwrapped arrays
var data := header_data + payload_data
# Now calculate wrapping and rowwise padding for the combined array
for row in TEX_WIDTH:
var row_end: int = (row + 1) * TEX_WIDTH * 2 # Remember: 8bit array, 16bit values
if len(data)/2 > row_end:
# [... a b c] + [a b c] + [a b c ...]
data = data.subarray(0, row_end-1) + data.subarray(row_end-FILTER_PAD*2, row_end-1) + data.subarray(row_end-FILTER_PAD*2, -1)
else:
break
var needed_rows := (len(data)/2)/float(TEX_WIDTH)
var rows := int(pow(2, ceil(log(needed_rows) / log(2))))
if rows > TEX_WIDTH:
print_debug('Sound Sample Texture rows have exceeded width: %d > %d'%[rows, TEX_WIDTH])
# Now that the full texture size is known, pad our existing data with zeroes until the end
var final_data_size_bytes = rows * TEX_WIDTH * 2
if final_data_size_bytes > len(data):
var end_padding := PoolByteArray()
end_padding.resize(final_data_size_bytes - len(data))
end_padding.fill(0)
data = data + end_padding
# data is complete, turn it into an ImageTexture for the shader to use
var samples_img = Image.new()
samples_img.create_from_data(TEX_WIDTH, rows, false, Image.FORMAT_LA8, data)
self.samples_tex = ImageTexture.new()
self.samples_tex.create_from_image(samples_img, Texture.FLAG_FILTER)
var player := AudioStreamPlayer.new() # Make one for each channel, later var player := AudioStreamPlayer.new() # Make one for each channel, later

View File

@ -1,3 +1,6 @@
// ============================================================= BOILERPLATE =============================================================
// While most of the data we are working with is integral, GPU conversion overheads mean almost all of this will be floats.
// Unfortunately, this loses type-checking on [0.0, 1.0] vs [0,255] etc. so a lot of this will involve comments declaring ranges.
shader_type canvas_item; shader_type canvas_item;
render_mode blend_premul_alpha; render_mode blend_premul_alpha;
const float TEX_SIZE = 4096.0; const float TEX_SIZE = 4096.0;
@ -13,11 +16,16 @@ const float x10000 = float(0x10000); // 65536.0
const vec2 INT16_DOT_BE = vec2(xFF00, x00FF); const vec2 INT16_DOT_BE = vec2(xFF00, x00FF);
const vec2 INT16_DOT_LE = vec2(x00FF, xFF00); const vec2 INT16_DOT_LE = vec2(x00FF, xFF00);
uniform sampler2D tex : hint_normal; uniform sampler2D tex : hint_normal;
float unpack_uint16(vec2 uint16) {
// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [0, 65535] in float32
return dot(uint16, INT16_DOT_LE);
}
float unpack_int16(vec2 int16) { float unpack_int16(vec2 int16) {
// Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, // Convert packed 2byte integer, sampled as two [0.0, 1.0] range floats, to the original int value [-32768, 32767] in float32
// to the original int value [-32768, 32767] or [0, 65535] but in float32
float unsigned = dot(int16, INT16_DOT_LE); float unsigned = dot(int16, INT16_DOT_LE);
return unsigned - (unsigned < x7FFF ? 0.0 : x10000); return unsigned - (unsigned < x7FFF ? 0.0 : x10000);
} }
@ -48,6 +56,76 @@ vec4 test_writeback(vec2 uv) {
return output; return output;
} }
// ============================================================= LOGIC =============================================================
// We have around 200k frames across 35 instrument samples
// 35 instrument samples and 8 sfx samples = 43 samples
// 2048x128 texture maybe? at 2bytes per texel, that's 512KiB of VRAM
// We start the texture with a bunch of same-size headers
// uint16 sample_start // The true start, after the prepended 3 frames of silence
// uint16 sample_length // 3 frames after the true end, because of how we loop
// uint16 sample_loop_begin // 3 frames after the true loop point
// uint16 mixrate
// 2*uint8 AD of ADSR ([0.0, 1.0] is fine)
// 2*uint8 SR of ADSR ([0.0, 1.0] is fine)
// So six texture() calls spent on header information, and one on the final lookup.
// Alternatively, sample length could be omitted and fetched as the start of the next entry to save redundant entries.
//
// To accomodate filtering, every sample must begin with 3 frames of silence, and end with 6 frames of the beginning of the loop.
// Looped playback will go from the first 3 of 6 frames at the end, to the third frame after the loop start point, to avoid filter bleeding.
// If a sample does not loop, it must have 6 frames of silence at the end, not including the subsequent next sample's 3 frames of silence prefix.
// As such, every sample will have an additional 9 frames, 3 before, 6 after.
// Additionally, every row of the texture must have 3 redundant frames on either side - i.e., we only sample from [3, 2045) on any given row.
// So the payload of a 2048-wide texture will be 2042 per row, excluding the initial header.
// So for 43 samples, a header of 43*6 = 258 texels starts the first row,
// after which the first sample's 3 frames of silence (3 texels of (0.0, 0.0), 6 bytes of 0x00) may begin.
// A 2048x128 texture would have a payload of 2042x128 = 261376 frames (texels) excluding header
// With the 258 texel header, which uses 3 texels of margin, 255 would be subtracted from the above payload,
// leaving 261121 texels for the sample data.
const float HEADER_LENGTH_TEXELS = 6.0;
uniform sampler2D instrument_samples;
uniform vec2 instrument_samples_size = vec2(2048.0, 128.0);
uniform float instrument_row_padding = 3.0; // In case we want to go to cubic filtering
uniform float instrument_row_payload = 2042.0; // 2048-3-3 Make sure to set with instrument_samples_size and instrument_row_padding!
uniform float reference_note = 71.0; // [0, 255], possibly [0, 127]
uniform float output_mixrate = 32000.0; // SNES SPC output is 32kHz
float get_pitch_scale(float note) {
// return pow(2.0, (note - reference_note)/12.0);
return exp2((note - reference_note)/12.0);
}
vec2 get_inst_texel(vec2 xy) {
return texture(instrument_samples, xy/instrument_samples_size).xw;
}
float get_instrument_sample(float instrument_index, float pitch_scale, float t, float t_end) {
// t_end is for ADSR purposes
float header_offset = instrument_index * HEADER_LENGTH_TEXELS;
float sample_start = unpack_uint16(get_inst_texel(vec2(header_offset, 0.0))); // The true start, after the prepended 3 frames of silence
float sample_length = unpack_uint16(get_inst_texel(vec2(header_offset + 1.0, 0.0))); // 3 frames after the true end, because of how we loop
float sample_loop_begin = unpack_uint16(get_inst_texel(vec2(header_offset + 2.0, 0.0))); // 3 frames after the true loop point
float sample_mixrate = unpack_uint16(get_inst_texel(vec2(header_offset + 3.0, 0.0)));
vec2 attack_decay = get_inst_texel(vec2(header_offset + 4.0, 0.0));
vec2 sustain_release = get_inst_texel(vec2(header_offset + 5.0, 0.0));
// Calculate the point we want to sample in linear space
float mixrate = sample_mixrate * pitch_scale;
float target_frame = t * mixrate;
// If we're past the end of the sample, we need to wrap it back to within the loop range
float loop_length = sample_length - sample_loop_begin;
float overshoot = max(target_frame - sample_length, 0.0);
float overshoot_loops = ceil(overshoot/loop_length);
target_frame -= overshoot_loops*loop_length;
// Now we need to identify the sampling point since our frames are spread across multiple rows for GPU reasons
// We only sample from texel 4 onwards on a given row - texel 0 is the header, texels 1,2,3 are lead-in for filtering
// Note that y should be integral, but x should be continuous, as that's what applies the filtering!
target_frame += sample_start;
vec2 sample_xy = vec2(instrument_row_padding + mod(target_frame, instrument_row_payload), trunc(target_frame/instrument_row_payload));
return rescale_int16(unpack_int16(get_inst_texel(sample_xy)));
}
void fragment() { void fragment() {
// GLES2 // GLES2
vec2 uv = vec2(UV.x, 1.0-UV.y); vec2 uv = vec2(UV.x, 1.0-UV.y);

View File

@ -124,4 +124,23 @@ func _ready() -> void:
$btn_hack_loop_extension.text += ' (%dms)'%SoundLoader.HACK_EXTEND_LOOP_SAMPLE_EXTRA_MS $btn_hack_loop_extension.text += ' (%dms)'%SoundLoader.HACK_EXTEND_LOOP_SAMPLE_EXTRA_MS
for i in len(RomLoader.snes_data.bgm_song_pointers): for i in len(RomLoader.snes_data.bgm_song_pointers):
var pointer = RomLoader.snes_data.bgm_song_pointers[i] var pointer = RomLoader.snes_data.bgm_song_pointers[i]
print('BGM 0x%02X (%02d) at 0x%06X' % [i, i, pointer]) # print('BGM 0x%02X (%02d) at 0x%06X' % [i, i, pointer])
# var test_payload := PoolByteArray()
# test_payload.resize(4096*4096*2)
# # for i in 5:
# # test_payload.fill(i*2+10)
# # $'%audio_renderer'.render_queue.append(test_payload)
# test_payload.fill(0)
# for i in 65536:
# test_payload.set(i*2, i%256)
# test_payload.set(i*2+1, i/256)
# $'%audio_renderer'.render_queue.append(test_payload)
# # $'%audio_renderer'.render_queue.append(test_payload)
# func _process(_delta):
# update()
# func _draw() -> void:
# if $'%audio_renderer'.waiting_for_viewport:
# $'%audio_renderer'.get_result()