Refactor into separate source files

2024-06-27 17:16:55 +09:30 · 2024-06-27 17:16:55 +09:30 · c311171270
parent 41480ac025
commit c311171270
3 changed files with 212 additions and 206 deletions
--- a/includes/helpers.py
+++ b/includes/helpers.py
@ -0,0 +1,163 @@
 # Singular values may be decimal (no prefix), or any of the prefixes python accepts normally (0x for hex, 0b for binary, 0o for octal)
 # Additionally, hexadecimals may be prefixed with '#' or '$', or suffixed with 'h', e.g. 0x10 #10 $10 10h are all parsed as 16
 # For nested IDs, the format is ONLY like IP addresses:
 #   decimal with '.' separator,    e.g. 16.127.1
 #   hexadecimal with ':' separator, e.g. 10:7f:1
 # Nested IDs do not support prefixes.
 def try_int(v):
 	try:
 		if v[0] in '#$':  # Maybe % too?
 			return int(v[1:], 16)
 		if v[-1] == 'h':
 			return int(v[:-1], 16)
 		return int(v, 0)
 	except:
 		if v == '':
 			return None
 		return v
 def get_max_number_width(container, fmt: str = 'd') -> int:
 	return len(f'{len(container)-1:{fmt}}')
 def get_number_zero_pad(container, fmt: str = 'd') -> str:
 	max_digits = len(f'{len(container)-1:{fmt}}')  # Could instead call get_max_number_width
 	return f'0{max_digits}{fmt}'
 def encode_nested_ids(values: list[int], max_digits: list[int] = None, fmt: str = 'd') -> str:
 	delimiter = ':' if fmt in 'Xx' else '.'
 	if max_digits:
 		return delimiter.join([f'{value:0{digits}{fmt}}' for value, digits in zip(values, max_digits)])
 	else:
 		return delimiter.join([f'{value:{fmt}}' for value in values])
 def decode_nested_ids(string: str) -> list[int]:
 	hex = ':' in string
 	delimiter = ':' if hex else '.'
 	return [int(i, 16 if hex else 10) for i in string.split(delimiter)]
 def flatten_keys(container: dict | list, prefix: str = '') -> dict:
 	output = {}
 	def flatten_item(k: str, v):
 		if isinstance(v, dict) or isinstance(v, list):
 			flat = flatten_keys(v, f'{prefix}{k}.')
 			for k2, v2 in flat.items():
 				output[k2] = v2
 		else:
 			output[f'{prefix}{k}'] = v
 	if isinstance(container, list):
 		fmt = get_number_zero_pad(container, 'd')  # Zero pad all of the indices to the same decimal string length as the final one
 		for k, v in enumerate(container):
 			flatten_item(f'{k:{fmt}}', v)
 	elif isinstance(container, dict):
 		for k, v in container.items():
 			flatten_item(k, v)
 	else:
 		raise ValueError(f'flatten_keys is undefined for container type "{container}"')
 	return output
 def flatten_table(table: list, id_fmt: str = 'x') -> list:
 	if len(table) < 1:
 		return table  # Empty
 	if isinstance(table[0], dict):  # A simple table
 		return [flatten_keys(d) for d in table]
 	if isinstance(table[0], list):  # Nested lists are bad when expanded as columns, so we'll expand
 		flattened_table = []
 		def flatten_list(data, ids: list[int], id_max_digits: list[int]) -> None:
 			if isinstance(data, list):
 				max_digits = id_max_digits + [get_max_number_width(data, id_fmt)]
 				for id, sub in enumerate(data):
 					flatten_list(sub, ids + [id], max_digits)
 			else:
 				entry = {'ID': encode_nested_ids(ids, id_max_digits, id_fmt)}
 				entry.update(flatten_keys(data))
 				flattened_table.append(entry)
 		flatten_list(table, [], [])
 		return flattened_table
 	else:
 		raise NotImplementedError(table[0])
 def unflatten_keys(d: dict) -> dict:
 	output = {}
 	for k, v in d.items():
 		keysplit = k.split('.')
 		target_dict = output
 		for prefix in keysplit[:-1]:
 			if prefix not in target_dict:
 				target_dict[prefix] = {}
 			target_dict = target_dict[prefix]
 		target_dict[k] = v
 	return output
 def unflatten_table(headers: list[str], entries: list):
 	if 'ID' not in headers:
 		return entries
 	# This could be an array of an array of an array of an...
 	id0 = entries[0]['ID']
 	if '.' not in id0 and ':' not in id0:
 		return entries
 	# Treat this as a nested array
 	table = {tuple(decode_nested_ids(entry['ID'])): entry for entry in entries}
 	output = []
 	def unflatten_arrays(id_split: tuple[int], cur_array: list, value):
 		i, *remainder = id_split
 		if len(remainder) > 0:
 			while len(cur_array) <= i:  # Make sure our array has the index we're about to jump into
 				cur_array.append([])
 			unflatten_arrays(remainder, cur_array[i], value)
 		else:
 			while len(cur_array) <= i:  # Make sure our array has the index we're about to overwrite
 				cur_array.append(None)
 			cur_array[i] = value
 	for id_split in sorted(table.keys()):
 		unflatten_arrays(id_split, output, table[id_split])
 	return output
 def dump_tsv(filename, table, id_column=True) -> None:
 	table_flat = flatten_table(table)
 	with open(filename, 'w') as file:
 		headers = list(table_flat[0].keys())
 		if id_column and 'ID' not in headers:  # Some flattened tables build their own ID column!
 			# See how long the hex representation of the last number will be, so we can zero-pad the rest to match.
 			fmt = get_number_zero_pad(table_flat, 'X')
 			file.write('\t'.join(['ID'] + headers) + '\n')
 			for i, entry in enumerate(table_flat):
 				file.write('\t'.join([f'0x{i:{fmt}}'] + [str(entry[key]) for key in headers]) + '\n')
 		else:
 			file.write('\t'.join(headers) + '\n')
 			for i, entry in enumerate(table_flat):
 				file.write('\t'.join([str(entry[key]) for key in headers]) + '\n')
 def load_tsv(filename) -> list:
 	with open(filename, 'r') as file:
 		lines = file.read().rstrip().split('\n')
 	if len(lines) < 2:
 		return []
 	headers = lines[0].split('\t')
 	# Simple line-by-line unflatten
 	entries = []
 	for line in lines[1:]:
 		entry = {key: try_int(value) for key, value in zip(headers, line.split('\t'))}
 		entries.append(unflatten_keys(entry))
 	return unflatten_table(headers, entries)
--- a/includes/rom_serde.py
+++ b/includes/rom_serde.py
@ -0,0 +1,47 @@
 from ChocolateBirdData.reference_implementation import get_base_structarraytypes, parse_struct_definitions_from_tsv_filename, get_structarraytype, LeftoverBits, ReadBuffer, WriteBuffer
 from includes.helpers import load_tsv
 class ROMHandler:
 	offset_key: str
 	struct_definitions: dict
 	def extract(self, table: str, in_buffer) -> list[dict]:
 		# Deserialize a table
 		leftover_bits = LeftoverBits()
 		entry = self.addresses[table]  # Remember to try/catch
 		offset = entry[self.offset_key]
 		buf = ReadBuffer(in_buffer, offset)
 		return get_structarraytype(entry['format'], self.struct_definitions).get_value(buf, leftover_bits)
 	def build(self, table: str, new_data: list[dict], out_buffer):
 		# Serialize complete data. This WILL fail if the input data is incomplete.
 		leftover_bits = LeftoverBits()
 		entry = self.addresses[table]  # Remember to try/catch
 		offset = entry[self.offset_key]
 		buf = WriteBuffer(out_buffer, offset)
 		get_structarraytype(entry['format'], self.struct_definitions).put_value(buf, new_data, leftover_bits)
 	def build_partial(self, table: str, new_data: list[dict], in_buffer, out_buffer):
 		# Safely merge partial data over the existing data, then serialize it.
 		existing_data = self.extract(table, in_buffer)
 		for i, new in enumerate(new_data):
 			id = new.get('ID', i)
 			for k, v in new.items():
 				if k != 'ID' and v is not None:  # Allow holes in the table for values we don't care about overwriting
 					existing_data[id][k] = v
 		self.build(table, existing_data, out_buffer)
 def load_ff5_snes_struct_definitions() -> dict:
 	existing_structs = get_base_structarraytypes()
 	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/structs_SNES_stubs.tsv', existing_structs)
 	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_stubs.tsv', existing_structs)
 	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES.tsv', existing_structs)
 	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_save.tsv', existing_structs)
 	return existing_structs
 class FF5SNESHandler(ROMHandler):
 	offset_key: str = 'SNES'
 	struct_definitions: dict = load_ff5_snes_struct_definitions()
 	addresses: dict = {entry['Label']: entry for entry in load_tsv('ChocolateBirdData/5/addresses_SNES_PSX.tsv')}
--- a/tabcomp.py
+++ b/tabcomp.py
@ -1,209 +1,5 @@
-from ChocolateBirdData.reference_implementation import get_base_structarraytypes, parse_struct_definitions_from_tsv_filename, get_structarraytype, LeftoverBits, ReadBuffer, WriteBuffer
+from includes.helpers import load_tsv, dump_tsv
-
+from includes.rom_serde import FF5SNESHandler
 # Singular values may be decimal (no prefix), or any of the prefixes python accepts normally (0x for hex, 0b for binary, 0o for octal)
 # Additionally, hexadecimals may be prefixed with '#' or '$', or suffixed with 'h', e.g. 0x10 #10 $10 10h are all parsed as 16
 # For nested IDs, the format is ONLY like IP addresses:
 #   decimal with '.' separator,    e.g. 16.127.1
 #   hexadecimal with ':' separator, e.g. 10:7f:1
 # Nested IDs do not support prefixes.
 def try_int(v):
 	try:
 		if v[0] in '#$':  # Maybe % too?
 			return int(v[1:], 16)
 		if v[-1] == 'h':
 			return int(v[:-1], 16)
 		return int(v, 0)
 	except:
 		if v == '':
 			return None
 		return v
 def get_max_number_width(container, fmt: str = 'd') -> int:
 	return len(f'{len(container)-1:{fmt}}')
 def get_number_zero_pad(container, fmt: str = 'd') -> str:
 	max_digits = len(f'{len(container)-1:{fmt}}')  # Could instead call get_max_number_width
 	return f'0{max_digits}{fmt}'
 def encode_nested_ids(values: list[int], max_digits: list[int] = None, fmt: str = 'd') -> str:
 	delimiter = ':' if fmt in 'Xx' else '.'
 	if max_digits:
 		return delimiter.join([f'{value:0{digits}{fmt}}' for value, digits in zip(values, max_digits)])
 	else:
 		return delimiter.join([f'{value:{fmt}}' for value in values])
 def decode_nested_ids(string: str) -> list[int]:
 	hex = ':' in string
 	delimiter = ':' if hex else '.'
 	return [int(i, 16 if hex else 10) for i in string.split(delimiter)]
 def flatten_keys(container: dict | list, prefix: str = '') -> dict:
 	output = {}
 	def flatten_item(k: str, v):
 		if isinstance(v, dict) or isinstance(v, list):
 			flat = flatten_keys(v, f'{prefix}{k}.')
 			for k2, v2 in flat.items():
 				output[k2] = v2
 		else:
 			output[f'{prefix}{k}'] = v
 	if isinstance(container, list):
 		fmt = get_number_zero_pad(container, 'd')  # Zero pad all of the indices to the same decimal string length as the final one
 		for k, v in enumerate(container):
 			flatten_item(f'{k:{fmt}}', v)
 	elif isinstance(container, dict):
 		for k, v in container.items():
 			flatten_item(k, v)
 	else:
 		raise ValueError(f'flatten_keys is undefined for container type "{container}"')
 	return output
 def flatten_table(table: list, id_fmt: str = 'x') -> list:
 	if len(table) < 1:
 		return table  # Empty
 	if isinstance(table[0], dict):  # A simple table
 		return [flatten_keys(d) for d in table]
 	if isinstance(table[0], list):  # Nested lists are bad when expanded as columns, so we'll expand
 		print(table[0])
 		flattened_table = []
 		def flatten_list(data, ids: list[int], id_max_digits: list[int]) -> None:
 			if isinstance(data, list):
 				max_digits = id_max_digits + [get_max_number_width(data, id_fmt)]
 				for id, sub in enumerate(data):
 					flatten_list(sub, ids + [id], max_digits)
 			else:
 				entry = {'ID': encode_nested_ids(ids, id_max_digits, id_fmt)}
 				entry.update(flatten_keys(data))
 				flattened_table.append(entry)
 		flatten_list(table, [], [])
 		return flattened_table
 	else:
 		raise NotImplementedError(table[0])
 def unflatten_keys(d: dict) -> dict:
 	output = {}
 	for k, v in d.items():
 		keysplit = k.split('.')
 		target_dict = output
 		for prefix in keysplit[:-1]:
 			if prefix not in target_dict:
 				target_dict[prefix] = {}
 			target_dict = target_dict[prefix]
 		target_dict[k] = v
 	return output
 def unflatten_table(headers: list[str], entries: list):
 	if 'ID' not in headers:
 		return entries
 	# This could be an array of an array of an array of an...
 	id0 = entries[0]['ID']
 	if '.' not in id0 and ':' not in id0:
 		return entries
 	# Treat this as a nested array
 	table = {tuple(decode_nested_ids(entry['ID'])): entry for entry in entries}
 	output = []
 	def unflatten_arrays(id_split: tuple[int], cur_array: list, value):
 		i, *remainder = id_split
 		if len(remainder) > 0:
 			while len(cur_array) <= i:  # Make sure our array has the index we're about to jump into
 				cur_array.append([])
 			unflatten_arrays(remainder, cur_array[i], value)
 		else:
 			while len(cur_array) <= i:  # Make sure our array has the index we're about to overwrite
 				cur_array.append(None)
 			cur_array[i] = value
 	for id_split in sorted(table.keys()):
 		unflatten_arrays(id_split, output, table[id_split])
 	return output
 def dump_tsv(filename, table, id_column=True) -> None:
 	table_flat = flatten_table(table)
 	with open(filename, 'w') as file:
 		headers = list(table_flat[0].keys())
 		if id_column and 'ID' not in headers:  # Some flattened tables build their own ID column!
 			# See how long the hex representation of the last number will be, so we can zero-pad the rest to match.
 			fmt = get_number_zero_pad(table_flat, 'X')
 			file.write('\t'.join(['ID'] + headers) + '\n')
 			for i, entry in enumerate(table_flat):
 				file.write('\t'.join([f'0x{i:{fmt}}'] + [str(entry[key]) for key in headers]) + '\n')
 		else:
 			file.write('\t'.join(headers) + '\n')
 			for i, entry in enumerate(table_flat):
 				file.write('\t'.join([str(entry[key]) for key in headers]) + '\n')
 def load_tsv(filename) -> list:
 	with open(filename, 'r') as file:
 		lines = file.read().rstrip().split('\n')
 	if len(lines) < 2:
 		return []
 	headers = lines[0].split('\t')
 	# Simple line-by-line unflatten
 	entries = []
 	for line in lines[1:]:
 		entry = {key: try_int(value) for key, value in zip(headers, line.split('\t'))}
 		entries.append(unflatten_keys(entry))
 	return unflatten_table(headers, entries)
 def load_ff5_snes_struct_definitions() -> dict:
 	existing_structs = get_base_structarraytypes()
 	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/structs_SNES_stubs.tsv', existing_structs)
 	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_stubs.tsv', existing_structs)
 	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES.tsv', existing_structs)
 	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_save.tsv', existing_structs)
 	return existing_structs
 class FF5SNESHandler:
 	struct_definitions: dict = load_ff5_snes_struct_definitions()
 	addresses: dict = {entry['Label']: entry for entry in load_tsv('ChocolateBirdData/5/addresses_SNES_PSX.tsv')}
 	def extract(self, table: str, in_buffer) -> list[dict]:
 		# Deserialize a table
 		leftover_bits = LeftoverBits()
 		entry = self.addresses[table]  # Remember to try/catch
 		offset = entry['SNES']
 		buf = ReadBuffer(in_buffer, offset)
 		return get_structarraytype(entry['format'], self.struct_definitions).get_value(buf, leftover_bits)
 	def build(self, table: str, new_data: list[dict], out_buffer):
 		# Serialize complete data. This WILL fail if the input data is incomplete.
 		leftover_bits = LeftoverBits()
 		entry = self.addresses[table]  # Remember to try/catch
 		offset = entry['SNES']
 		buf = WriteBuffer(out_buffer, offset)
 		get_structarraytype(entry['format'], self.struct_definitions).put_value(buf, new_data, leftover_bits)
 	def build_partial(self, table: str, new_data: list[dict], in_buffer, out_buffer):
 		# Safely merge partial data over the existing data, then serialize it.
 		existing_data = self.extract(table, in_buffer)
 		for i, new in enumerate(new_data):
 			id = new.get('ID', i)
 			for k, v in new.items():
 				if k != 'ID' and v is not None:  # Allow holes in the table for values we don't care about overwriting
 					existing_data[id][k] = v
 		self.build(table, existing_data, out_buffer)
 if __name__ == '__main__':
 	from argparse import ArgumentParser