From c311171270210e1eebc2222546025a8fb1fbaf44 Mon Sep 17 00:00:00 2001 From: Luke Hubmayer-Werner Date: Thu, 27 Jun 2024 17:16:55 +0930 Subject: [PATCH] Refactor into separate source files --- includes/helpers.py | 163 +++++++++++++++++++++++++++++++++ includes/rom_serde.py | 47 ++++++++++ tabcomp.py | 208 +----------------------------------------- 3 files changed, 212 insertions(+), 206 deletions(-) create mode 100644 includes/helpers.py create mode 100644 includes/rom_serde.py diff --git a/includes/helpers.py b/includes/helpers.py new file mode 100644 index 0000000..098a683 --- /dev/null +++ b/includes/helpers.py @@ -0,0 +1,163 @@ +# Singular values may be decimal (no prefix), or any of the prefixes python accepts normally (0x for hex, 0b for binary, 0o for octal) +# Additionally, hexadecimals may be prefixed with '#' or '$', or suffixed with 'h', e.g. 0x10 #10 $10 10h are all parsed as 16 +# For nested IDs, the format is ONLY like IP addresses: +# decimal with '.' separator, e.g. 16.127.1 +# hexadecimal with ':' separator, e.g. 10:7f:1 +# Nested IDs do not support prefixes. + +def try_int(v): + try: + if v[0] in '#$': # Maybe % too? + return int(v[1:], 16) + if v[-1] == 'h': + return int(v[:-1], 16) + return int(v, 0) + except: + if v == '': + return None + return v + + +def get_max_number_width(container, fmt: str = 'd') -> int: + return len(f'{len(container)-1:{fmt}}') + + +def get_number_zero_pad(container, fmt: str = 'd') -> str: + max_digits = len(f'{len(container)-1:{fmt}}') # Could instead call get_max_number_width + return f'0{max_digits}{fmt}' + + +def encode_nested_ids(values: list[int], max_digits: list[int] = None, fmt: str = 'd') -> str: + delimiter = ':' if fmt in 'Xx' else '.' + if max_digits: + return delimiter.join([f'{value:0{digits}{fmt}}' for value, digits in zip(values, max_digits)]) + else: + return delimiter.join([f'{value:{fmt}}' for value in values]) + + +def decode_nested_ids(string: str) -> list[int]: + hex = ':' in string + delimiter = ':' if hex else '.' + return [int(i, 16 if hex else 10) for i in string.split(delimiter)] + + +def flatten_keys(container: dict | list, prefix: str = '') -> dict: + output = {} + + def flatten_item(k: str, v): + if isinstance(v, dict) or isinstance(v, list): + flat = flatten_keys(v, f'{prefix}{k}.') + for k2, v2 in flat.items(): + output[k2] = v2 + else: + output[f'{prefix}{k}'] = v + + if isinstance(container, list): + fmt = get_number_zero_pad(container, 'd') # Zero pad all of the indices to the same decimal string length as the final one + for k, v in enumerate(container): + flatten_item(f'{k:{fmt}}', v) + elif isinstance(container, dict): + for k, v in container.items(): + flatten_item(k, v) + else: + raise ValueError(f'flatten_keys is undefined for container type "{container}"') + + return output + + +def flatten_table(table: list, id_fmt: str = 'x') -> list: + if len(table) < 1: + return table # Empty + if isinstance(table[0], dict): # A simple table + return [flatten_keys(d) for d in table] + if isinstance(table[0], list): # Nested lists are bad when expanded as columns, so we'll expand + flattened_table = [] + + def flatten_list(data, ids: list[int], id_max_digits: list[int]) -> None: + if isinstance(data, list): + max_digits = id_max_digits + [get_max_number_width(data, id_fmt)] + for id, sub in enumerate(data): + flatten_list(sub, ids + [id], max_digits) + else: + entry = {'ID': encode_nested_ids(ids, id_max_digits, id_fmt)} + entry.update(flatten_keys(data)) + flattened_table.append(entry) + + flatten_list(table, [], []) + return flattened_table + else: + raise NotImplementedError(table[0]) + + +def unflatten_keys(d: dict) -> dict: + output = {} + for k, v in d.items(): + keysplit = k.split('.') + target_dict = output + for prefix in keysplit[:-1]: + if prefix not in target_dict: + target_dict[prefix] = {} + target_dict = target_dict[prefix] + target_dict[k] = v + return output + + +def unflatten_table(headers: list[str], entries: list): + if 'ID' not in headers: + return entries + # This could be an array of an array of an array of an... + id0 = entries[0]['ID'] + if '.' not in id0 and ':' not in id0: + return entries + # Treat this as a nested array + table = {tuple(decode_nested_ids(entry['ID'])): entry for entry in entries} + + output = [] + def unflatten_arrays(id_split: tuple[int], cur_array: list, value): + i, *remainder = id_split + if len(remainder) > 0: + while len(cur_array) <= i: # Make sure our array has the index we're about to jump into + cur_array.append([]) + unflatten_arrays(remainder, cur_array[i], value) + else: + while len(cur_array) <= i: # Make sure our array has the index we're about to overwrite + cur_array.append(None) + cur_array[i] = value + + for id_split in sorted(table.keys()): + unflatten_arrays(id_split, output, table[id_split]) + + return output + + +def dump_tsv(filename, table, id_column=True) -> None: + table_flat = flatten_table(table) + + with open(filename, 'w') as file: + headers = list(table_flat[0].keys()) + if id_column and 'ID' not in headers: # Some flattened tables build their own ID column! + # See how long the hex representation of the last number will be, so we can zero-pad the rest to match. + fmt = get_number_zero_pad(table_flat, 'X') + file.write('\t'.join(['ID'] + headers) + '\n') + for i, entry in enumerate(table_flat): + file.write('\t'.join([f'0x{i:{fmt}}'] + [str(entry[key]) for key in headers]) + '\n') + else: + file.write('\t'.join(headers) + '\n') + for i, entry in enumerate(table_flat): + file.write('\t'.join([str(entry[key]) for key in headers]) + '\n') + + +def load_tsv(filename) -> list: + with open(filename, 'r') as file: + lines = file.read().rstrip().split('\n') + if len(lines) < 2: + return [] + headers = lines[0].split('\t') + + # Simple line-by-line unflatten + entries = [] + for line in lines[1:]: + entry = {key: try_int(value) for key, value in zip(headers, line.split('\t'))} + entries.append(unflatten_keys(entry)) + + return unflatten_table(headers, entries) diff --git a/includes/rom_serde.py b/includes/rom_serde.py new file mode 100644 index 0000000..b057e24 --- /dev/null +++ b/includes/rom_serde.py @@ -0,0 +1,47 @@ +from ChocolateBirdData.reference_implementation import get_base_structarraytypes, parse_struct_definitions_from_tsv_filename, get_structarraytype, LeftoverBits, ReadBuffer, WriteBuffer +from includes.helpers import load_tsv + +class ROMHandler: + offset_key: str + struct_definitions: dict + + def extract(self, table: str, in_buffer) -> list[dict]: + # Deserialize a table + leftover_bits = LeftoverBits() + entry = self.addresses[table] # Remember to try/catch + offset = entry[self.offset_key] + buf = ReadBuffer(in_buffer, offset) + return get_structarraytype(entry['format'], self.struct_definitions).get_value(buf, leftover_bits) + + def build(self, table: str, new_data: list[dict], out_buffer): + # Serialize complete data. This WILL fail if the input data is incomplete. + leftover_bits = LeftoverBits() + entry = self.addresses[table] # Remember to try/catch + offset = entry[self.offset_key] + buf = WriteBuffer(out_buffer, offset) + get_structarraytype(entry['format'], self.struct_definitions).put_value(buf, new_data, leftover_bits) + + def build_partial(self, table: str, new_data: list[dict], in_buffer, out_buffer): + # Safely merge partial data over the existing data, then serialize it. + existing_data = self.extract(table, in_buffer) + for i, new in enumerate(new_data): + id = new.get('ID', i) + for k, v in new.items(): + if k != 'ID' and v is not None: # Allow holes in the table for values we don't care about overwriting + existing_data[id][k] = v + self.build(table, existing_data, out_buffer) + + +def load_ff5_snes_struct_definitions() -> dict: + existing_structs = get_base_structarraytypes() + parse_struct_definitions_from_tsv_filename('ChocolateBirdData/structs_SNES_stubs.tsv', existing_structs) + parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_stubs.tsv', existing_structs) + parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES.tsv', existing_structs) + parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_save.tsv', existing_structs) + return existing_structs + + +class FF5SNESHandler(ROMHandler): + offset_key: str = 'SNES' + struct_definitions: dict = load_ff5_snes_struct_definitions() + addresses: dict = {entry['Label']: entry for entry in load_tsv('ChocolateBirdData/5/addresses_SNES_PSX.tsv')} diff --git a/tabcomp.py b/tabcomp.py index 32ac40c..63bafa0 100644 --- a/tabcomp.py +++ b/tabcomp.py @@ -1,209 +1,5 @@ -from ChocolateBirdData.reference_implementation import get_base_structarraytypes, parse_struct_definitions_from_tsv_filename, get_structarraytype, LeftoverBits, ReadBuffer, WriteBuffer - -# Singular values may be decimal (no prefix), or any of the prefixes python accepts normally (0x for hex, 0b for binary, 0o for octal) -# Additionally, hexadecimals may be prefixed with '#' or '$', or suffixed with 'h', e.g. 0x10 #10 $10 10h are all parsed as 16 -# For nested IDs, the format is ONLY like IP addresses: -# decimal with '.' separator, e.g. 16.127.1 -# hexadecimal with ':' separator, e.g. 10:7f:1 -# Nested IDs do not support prefixes. - -def try_int(v): - try: - if v[0] in '#$': # Maybe % too? - return int(v[1:], 16) - if v[-1] == 'h': - return int(v[:-1], 16) - return int(v, 0) - except: - if v == '': - return None - return v - - -def get_max_number_width(container, fmt: str = 'd') -> int: - return len(f'{len(container)-1:{fmt}}') - - -def get_number_zero_pad(container, fmt: str = 'd') -> str: - max_digits = len(f'{len(container)-1:{fmt}}') # Could instead call get_max_number_width - return f'0{max_digits}{fmt}' - - -def encode_nested_ids(values: list[int], max_digits: list[int] = None, fmt: str = 'd') -> str: - delimiter = ':' if fmt in 'Xx' else '.' - if max_digits: - return delimiter.join([f'{value:0{digits}{fmt}}' for value, digits in zip(values, max_digits)]) - else: - return delimiter.join([f'{value:{fmt}}' for value in values]) - - -def decode_nested_ids(string: str) -> list[int]: - hex = ':' in string - delimiter = ':' if hex else '.' - return [int(i, 16 if hex else 10) for i in string.split(delimiter)] - - -def flatten_keys(container: dict | list, prefix: str = '') -> dict: - output = {} - - def flatten_item(k: str, v): - if isinstance(v, dict) or isinstance(v, list): - flat = flatten_keys(v, f'{prefix}{k}.') - for k2, v2 in flat.items(): - output[k2] = v2 - else: - output[f'{prefix}{k}'] = v - - if isinstance(container, list): - fmt = get_number_zero_pad(container, 'd') # Zero pad all of the indices to the same decimal string length as the final one - for k, v in enumerate(container): - flatten_item(f'{k:{fmt}}', v) - elif isinstance(container, dict): - for k, v in container.items(): - flatten_item(k, v) - else: - raise ValueError(f'flatten_keys is undefined for container type "{container}"') - - return output - - -def flatten_table(table: list, id_fmt: str = 'x') -> list: - if len(table) < 1: - return table # Empty - if isinstance(table[0], dict): # A simple table - return [flatten_keys(d) for d in table] - if isinstance(table[0], list): # Nested lists are bad when expanded as columns, so we'll expand - print(table[0]) - flattened_table = [] - - def flatten_list(data, ids: list[int], id_max_digits: list[int]) -> None: - if isinstance(data, list): - max_digits = id_max_digits + [get_max_number_width(data, id_fmt)] - for id, sub in enumerate(data): - flatten_list(sub, ids + [id], max_digits) - else: - entry = {'ID': encode_nested_ids(ids, id_max_digits, id_fmt)} - entry.update(flatten_keys(data)) - flattened_table.append(entry) - - flatten_list(table, [], []) - return flattened_table - else: - raise NotImplementedError(table[0]) - - -def unflatten_keys(d: dict) -> dict: - output = {} - for k, v in d.items(): - keysplit = k.split('.') - target_dict = output - for prefix in keysplit[:-1]: - if prefix not in target_dict: - target_dict[prefix] = {} - target_dict = target_dict[prefix] - target_dict[k] = v - return output - - -def unflatten_table(headers: list[str], entries: list): - if 'ID' not in headers: - return entries - # This could be an array of an array of an array of an... - id0 = entries[0]['ID'] - if '.' not in id0 and ':' not in id0: - return entries - # Treat this as a nested array - table = {tuple(decode_nested_ids(entry['ID'])): entry for entry in entries} - - output = [] - def unflatten_arrays(id_split: tuple[int], cur_array: list, value): - i, *remainder = id_split - if len(remainder) > 0: - while len(cur_array) <= i: # Make sure our array has the index we're about to jump into - cur_array.append([]) - unflatten_arrays(remainder, cur_array[i], value) - else: - while len(cur_array) <= i: # Make sure our array has the index we're about to overwrite - cur_array.append(None) - cur_array[i] = value - - for id_split in sorted(table.keys()): - unflatten_arrays(id_split, output, table[id_split]) - - return output - - -def dump_tsv(filename, table, id_column=True) -> None: - table_flat = flatten_table(table) - - with open(filename, 'w') as file: - headers = list(table_flat[0].keys()) - if id_column and 'ID' not in headers: # Some flattened tables build their own ID column! - # See how long the hex representation of the last number will be, so we can zero-pad the rest to match. - fmt = get_number_zero_pad(table_flat, 'X') - file.write('\t'.join(['ID'] + headers) + '\n') - for i, entry in enumerate(table_flat): - file.write('\t'.join([f'0x{i:{fmt}}'] + [str(entry[key]) for key in headers]) + '\n') - else: - file.write('\t'.join(headers) + '\n') - for i, entry in enumerate(table_flat): - file.write('\t'.join([str(entry[key]) for key in headers]) + '\n') - - -def load_tsv(filename) -> list: - with open(filename, 'r') as file: - lines = file.read().rstrip().split('\n') - if len(lines) < 2: - return [] - headers = lines[0].split('\t') - - # Simple line-by-line unflatten - entries = [] - for line in lines[1:]: - entry = {key: try_int(value) for key, value in zip(headers, line.split('\t'))} - entries.append(unflatten_keys(entry)) - - return unflatten_table(headers, entries) - - -def load_ff5_snes_struct_definitions() -> dict: - existing_structs = get_base_structarraytypes() - parse_struct_definitions_from_tsv_filename('ChocolateBirdData/structs_SNES_stubs.tsv', existing_structs) - parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_stubs.tsv', existing_structs) - parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES.tsv', existing_structs) - parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_save.tsv', existing_structs) - return existing_structs - -class FF5SNESHandler: - struct_definitions: dict = load_ff5_snes_struct_definitions() - addresses: dict = {entry['Label']: entry for entry in load_tsv('ChocolateBirdData/5/addresses_SNES_PSX.tsv')} - - def extract(self, table: str, in_buffer) -> list[dict]: - # Deserialize a table - leftover_bits = LeftoverBits() - entry = self.addresses[table] # Remember to try/catch - offset = entry['SNES'] - buf = ReadBuffer(in_buffer, offset) - return get_structarraytype(entry['format'], self.struct_definitions).get_value(buf, leftover_bits) - - def build(self, table: str, new_data: list[dict], out_buffer): - # Serialize complete data. This WILL fail if the input data is incomplete. - leftover_bits = LeftoverBits() - entry = self.addresses[table] # Remember to try/catch - offset = entry['SNES'] - buf = WriteBuffer(out_buffer, offset) - get_structarraytype(entry['format'], self.struct_definitions).put_value(buf, new_data, leftover_bits) - - def build_partial(self, table: str, new_data: list[dict], in_buffer, out_buffer): - # Safely merge partial data over the existing data, then serialize it. - existing_data = self.extract(table, in_buffer) - for i, new in enumerate(new_data): - id = new.get('ID', i) - for k, v in new.items(): - if k != 'ID' and v is not None: # Allow holes in the table for values we don't care about overwriting - existing_data[id][k] = v - self.build(table, existing_data, out_buffer) - +from includes.helpers import load_tsv, dump_tsv +from includes.rom_serde import FF5SNESHandler if __name__ == '__main__': from argparse import ArgumentParser