From c311171270210e1eebc2222546025a8fb1fbaf44 Mon Sep 17 00:00:00 2001
From: Luke Hubmayer-Werner <mongoose41@gmail.com>
Date: Thu, 27 Jun 2024 17:16:55 +0930
Subject: [PATCH] Refactor into separate source files

---
 includes/helpers.py   | 163 +++++++++++++++++++++++++++++++++
 includes/rom_serde.py |  47 ++++++++++
 tabcomp.py            | 208 +-----------------------------------------
 3 files changed, 212 insertions(+), 206 deletions(-)
 create mode 100644 includes/helpers.py
 create mode 100644 includes/rom_serde.py

diff --git a/includes/helpers.py b/includes/helpers.py
new file mode 100644
index 0000000..098a683
--- /dev/null
+++ b/includes/helpers.py
@@ -0,0 +1,163 @@
+# Singular values may be decimal (no prefix), or any of the prefixes python accepts normally (0x for hex, 0b for binary, 0o for octal)
+# Additionally, hexadecimals may be prefixed with '#' or '$', or suffixed with 'h', e.g. 0x10 #10 $10 10h are all parsed as 16
+# For nested IDs, the format is ONLY like IP addresses:
+#   decimal with '.' separator,    e.g. 16.127.1
+#   hexadecimal with ':' separator, e.g. 10:7f:1
+# Nested IDs do not support prefixes.
+
+def try_int(v):
+	try:
+		if v[0] in '#$':  # Maybe % too?
+			return int(v[1:], 16)
+		if v[-1] == 'h':
+			return int(v[:-1], 16)
+		return int(v, 0)
+	except:
+		if v == '':
+			return None
+		return v
+
+
+def get_max_number_width(container, fmt: str = 'd') -> int:
+	return len(f'{len(container)-1:{fmt}}')
+
+
+def get_number_zero_pad(container, fmt: str = 'd') -> str:
+	max_digits = len(f'{len(container)-1:{fmt}}')  # Could instead call get_max_number_width
+	return f'0{max_digits}{fmt}'
+
+
+def encode_nested_ids(values: list[int], max_digits: list[int] = None, fmt: str = 'd') -> str:
+	delimiter = ':' if fmt in 'Xx' else '.'
+	if max_digits:
+		return delimiter.join([f'{value:0{digits}{fmt}}' for value, digits in zip(values, max_digits)])
+	else:
+		return delimiter.join([f'{value:{fmt}}' for value in values])
+
+
+def decode_nested_ids(string: str) -> list[int]:
+	hex = ':' in string
+	delimiter = ':' if hex else '.'
+	return [int(i, 16 if hex else 10) for i in string.split(delimiter)]
+
+
+def flatten_keys(container: dict | list, prefix: str = '') -> dict:
+	output = {}
+
+	def flatten_item(k: str, v):
+		if isinstance(v, dict) or isinstance(v, list):
+			flat = flatten_keys(v, f'{prefix}{k}.')
+			for k2, v2 in flat.items():
+				output[k2] = v2
+		else:
+			output[f'{prefix}{k}'] = v
+
+	if isinstance(container, list):
+		fmt = get_number_zero_pad(container, 'd')  # Zero pad all of the indices to the same decimal string length as the final one
+		for k, v in enumerate(container):
+			flatten_item(f'{k:{fmt}}', v)
+	elif isinstance(container, dict):
+		for k, v in container.items():
+			flatten_item(k, v)
+	else:
+		raise ValueError(f'flatten_keys is undefined for container type "{container}"')
+
+	return output
+
+
+def flatten_table(table: list, id_fmt: str = 'x') -> list:
+	if len(table) < 1:
+		return table  # Empty
+	if isinstance(table[0], dict):  # A simple table
+		return [flatten_keys(d) for d in table]
+	if isinstance(table[0], list):  # Nested lists are bad when expanded as columns, so we'll expand
+		flattened_table = []
+
+		def flatten_list(data, ids: list[int], id_max_digits: list[int]) -> None:
+			if isinstance(data, list):
+				max_digits = id_max_digits + [get_max_number_width(data, id_fmt)]
+				for id, sub in enumerate(data):
+					flatten_list(sub, ids + [id], max_digits)
+			else:
+				entry = {'ID': encode_nested_ids(ids, id_max_digits, id_fmt)}
+				entry.update(flatten_keys(data))
+				flattened_table.append(entry)
+
+		flatten_list(table, [], [])
+		return flattened_table
+	else:
+		raise NotImplementedError(table[0])
+
+
+def unflatten_keys(d: dict) -> dict:
+	output = {}
+	for k, v in d.items():
+		keysplit = k.split('.')
+		target_dict = output
+		for prefix in keysplit[:-1]:
+			if prefix not in target_dict:
+				target_dict[prefix] = {}
+			target_dict = target_dict[prefix]
+		target_dict[k] = v
+	return output
+
+
+def unflatten_table(headers: list[str], entries: list):
+	if 'ID' not in headers:
+		return entries
+	# This could be an array of an array of an array of an...
+	id0 = entries[0]['ID']
+	if '.' not in id0 and ':' not in id0:
+		return entries
+	# Treat this as a nested array
+	table = {tuple(decode_nested_ids(entry['ID'])): entry for entry in entries}
+
+	output = []
+	def unflatten_arrays(id_split: tuple[int], cur_array: list, value):
+		i, *remainder = id_split
+		if len(remainder) > 0:
+			while len(cur_array) <= i:  # Make sure our array has the index we're about to jump into
+				cur_array.append([])
+			unflatten_arrays(remainder, cur_array[i], value)
+		else:
+			while len(cur_array) <= i:  # Make sure our array has the index we're about to overwrite
+				cur_array.append(None)
+			cur_array[i] = value
+
+	for id_split in sorted(table.keys()):
+		unflatten_arrays(id_split, output, table[id_split])
+
+	return output
+
+
+def dump_tsv(filename, table, id_column=True) -> None:
+	table_flat = flatten_table(table)
+
+	with open(filename, 'w') as file:
+		headers = list(table_flat[0].keys())
+		if id_column and 'ID' not in headers:  # Some flattened tables build their own ID column!
+			# See how long the hex representation of the last number will be, so we can zero-pad the rest to match.
+			fmt = get_number_zero_pad(table_flat, 'X')
+			file.write('\t'.join(['ID'] + headers) + '\n')
+			for i, entry in enumerate(table_flat):
+				file.write('\t'.join([f'0x{i:{fmt}}'] + [str(entry[key]) for key in headers]) + '\n')
+		else:
+			file.write('\t'.join(headers) + '\n')
+			for i, entry in enumerate(table_flat):
+				file.write('\t'.join([str(entry[key]) for key in headers]) + '\n')
+
+
+def load_tsv(filename) -> list:
+	with open(filename, 'r') as file:
+		lines = file.read().rstrip().split('\n')
+	if len(lines) < 2:
+		return []
+	headers = lines[0].split('\t')
+
+	# Simple line-by-line unflatten
+	entries = []
+	for line in lines[1:]:
+		entry = {key: try_int(value) for key, value in zip(headers, line.split('\t'))}
+		entries.append(unflatten_keys(entry))
+
+	return unflatten_table(headers, entries)
diff --git a/includes/rom_serde.py b/includes/rom_serde.py
new file mode 100644
index 0000000..b057e24
--- /dev/null
+++ b/includes/rom_serde.py
@@ -0,0 +1,47 @@
+from ChocolateBirdData.reference_implementation import get_base_structarraytypes, parse_struct_definitions_from_tsv_filename, get_structarraytype, LeftoverBits, ReadBuffer, WriteBuffer
+from includes.helpers import load_tsv
+
+class ROMHandler:
+	offset_key: str
+	struct_definitions: dict
+
+	def extract(self, table: str, in_buffer) -> list[dict]:
+		# Deserialize a table
+		leftover_bits = LeftoverBits()
+		entry = self.addresses[table]  # Remember to try/catch
+		offset = entry[self.offset_key]
+		buf = ReadBuffer(in_buffer, offset)
+		return get_structarraytype(entry['format'], self.struct_definitions).get_value(buf, leftover_bits)
+
+	def build(self, table: str, new_data: list[dict], out_buffer):
+		# Serialize complete data. This WILL fail if the input data is incomplete.
+		leftover_bits = LeftoverBits()
+		entry = self.addresses[table]  # Remember to try/catch
+		offset = entry[self.offset_key]
+		buf = WriteBuffer(out_buffer, offset)
+		get_structarraytype(entry['format'], self.struct_definitions).put_value(buf, new_data, leftover_bits)
+
+	def build_partial(self, table: str, new_data: list[dict], in_buffer, out_buffer):
+		# Safely merge partial data over the existing data, then serialize it.
+		existing_data = self.extract(table, in_buffer)
+		for i, new in enumerate(new_data):
+			id = new.get('ID', i)
+			for k, v in new.items():
+				if k != 'ID' and v is not None:  # Allow holes in the table for values we don't care about overwriting
+					existing_data[id][k] = v
+		self.build(table, existing_data, out_buffer)
+
+
+def load_ff5_snes_struct_definitions() -> dict:
+	existing_structs = get_base_structarraytypes()
+	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/structs_SNES_stubs.tsv', existing_structs)
+	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_stubs.tsv', existing_structs)
+	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES.tsv', existing_structs)
+	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_save.tsv', existing_structs)
+	return existing_structs
+
+
+class FF5SNESHandler(ROMHandler):
+	offset_key: str = 'SNES'
+	struct_definitions: dict = load_ff5_snes_struct_definitions()
+	addresses: dict = {entry['Label']: entry for entry in load_tsv('ChocolateBirdData/5/addresses_SNES_PSX.tsv')}
diff --git a/tabcomp.py b/tabcomp.py
index 32ac40c..63bafa0 100644
--- a/tabcomp.py
+++ b/tabcomp.py
@@ -1,209 +1,5 @@
-from ChocolateBirdData.reference_implementation import get_base_structarraytypes, parse_struct_definitions_from_tsv_filename, get_structarraytype, LeftoverBits, ReadBuffer, WriteBuffer
-
-# Singular values may be decimal (no prefix), or any of the prefixes python accepts normally (0x for hex, 0b for binary, 0o for octal)
-# Additionally, hexadecimals may be prefixed with '#' or '$', or suffixed with 'h', e.g. 0x10 #10 $10 10h are all parsed as 16
-# For nested IDs, the format is ONLY like IP addresses:
-#   decimal with '.' separator,    e.g. 16.127.1
-#   hexadecimal with ':' separator, e.g. 10:7f:1
-# Nested IDs do not support prefixes.
-
-def try_int(v):
-	try:
-		if v[0] in '#$':  # Maybe % too?
-			return int(v[1:], 16)
-		if v[-1] == 'h':
-			return int(v[:-1], 16)
-		return int(v, 0)
-	except:
-		if v == '':
-			return None
-		return v
-
-
-def get_max_number_width(container, fmt: str = 'd') -> int:
-	return len(f'{len(container)-1:{fmt}}')
-
-
-def get_number_zero_pad(container, fmt: str = 'd') -> str:
-	max_digits = len(f'{len(container)-1:{fmt}}')  # Could instead call get_max_number_width
-	return f'0{max_digits}{fmt}'
-
-
-def encode_nested_ids(values: list[int], max_digits: list[int] = None, fmt: str = 'd') -> str:
-	delimiter = ':' if fmt in 'Xx' else '.'
-	if max_digits:
-		return delimiter.join([f'{value:0{digits}{fmt}}' for value, digits in zip(values, max_digits)])
-	else:
-		return delimiter.join([f'{value:{fmt}}' for value in values])
-
-
-def decode_nested_ids(string: str) -> list[int]:
-	hex = ':' in string
-	delimiter = ':' if hex else '.'
-	return [int(i, 16 if hex else 10) for i in string.split(delimiter)]
-
-
-def flatten_keys(container: dict | list, prefix: str = '') -> dict:
-	output = {}
-
-	def flatten_item(k: str, v):
-		if isinstance(v, dict) or isinstance(v, list):
-			flat = flatten_keys(v, f'{prefix}{k}.')
-			for k2, v2 in flat.items():
-				output[k2] = v2
-		else:
-			output[f'{prefix}{k}'] = v
-
-	if isinstance(container, list):
-		fmt = get_number_zero_pad(container, 'd')  # Zero pad all of the indices to the same decimal string length as the final one
-		for k, v in enumerate(container):
-			flatten_item(f'{k:{fmt}}', v)
-	elif isinstance(container, dict):
-		for k, v in container.items():
-			flatten_item(k, v)
-	else:
-		raise ValueError(f'flatten_keys is undefined for container type "{container}"')
-
-	return output
-
-
-def flatten_table(table: list, id_fmt: str = 'x') -> list:
-	if len(table) < 1:
-		return table  # Empty
-	if isinstance(table[0], dict):  # A simple table
-		return [flatten_keys(d) for d in table]
-	if isinstance(table[0], list):  # Nested lists are bad when expanded as columns, so we'll expand
-		print(table[0])
-		flattened_table = []
-
-		def flatten_list(data, ids: list[int], id_max_digits: list[int]) -> None:
-			if isinstance(data, list):
-				max_digits = id_max_digits + [get_max_number_width(data, id_fmt)]
-				for id, sub in enumerate(data):
-					flatten_list(sub, ids + [id], max_digits)
-			else:
-				entry = {'ID': encode_nested_ids(ids, id_max_digits, id_fmt)}
-				entry.update(flatten_keys(data))
-				flattened_table.append(entry)
-
-		flatten_list(table, [], [])
-		return flattened_table
-	else:
-		raise NotImplementedError(table[0])
-
-
-def unflatten_keys(d: dict) -> dict:
-	output = {}
-	for k, v in d.items():
-		keysplit = k.split('.')
-		target_dict = output
-		for prefix in keysplit[:-1]:
-			if prefix not in target_dict:
-				target_dict[prefix] = {}
-			target_dict = target_dict[prefix]
-		target_dict[k] = v
-	return output
-
-
-def unflatten_table(headers: list[str], entries: list):
-	if 'ID' not in headers:
-		return entries
-	# This could be an array of an array of an array of an...
-	id0 = entries[0]['ID']
-	if '.' not in id0 and ':' not in id0:
-		return entries
-	# Treat this as a nested array
-	table = {tuple(decode_nested_ids(entry['ID'])): entry for entry in entries}
-
-	output = []
-	def unflatten_arrays(id_split: tuple[int], cur_array: list, value):
-		i, *remainder = id_split
-		if len(remainder) > 0:
-			while len(cur_array) <= i:  # Make sure our array has the index we're about to jump into
-				cur_array.append([])
-			unflatten_arrays(remainder, cur_array[i], value)
-		else:
-			while len(cur_array) <= i:  # Make sure our array has the index we're about to overwrite
-				cur_array.append(None)
-			cur_array[i] = value
-
-	for id_split in sorted(table.keys()):
-		unflatten_arrays(id_split, output, table[id_split])
-
-	return output
-
-
-def dump_tsv(filename, table, id_column=True) -> None:
-	table_flat = flatten_table(table)
-
-	with open(filename, 'w') as file:
-		headers = list(table_flat[0].keys())
-		if id_column and 'ID' not in headers:  # Some flattened tables build their own ID column!
-			# See how long the hex representation of the last number will be, so we can zero-pad the rest to match.
-			fmt = get_number_zero_pad(table_flat, 'X')
-			file.write('\t'.join(['ID'] + headers) + '\n')
-			for i, entry in enumerate(table_flat):
-				file.write('\t'.join([f'0x{i:{fmt}}'] + [str(entry[key]) for key in headers]) + '\n')
-		else:
-			file.write('\t'.join(headers) + '\n')
-			for i, entry in enumerate(table_flat):
-				file.write('\t'.join([str(entry[key]) for key in headers]) + '\n')
-
-
-def load_tsv(filename) -> list:
-	with open(filename, 'r') as file:
-		lines = file.read().rstrip().split('\n')
-	if len(lines) < 2:
-		return []
-	headers = lines[0].split('\t')
-
-	# Simple line-by-line unflatten
-	entries = []
-	for line in lines[1:]:
-		entry = {key: try_int(value) for key, value in zip(headers, line.split('\t'))}
-		entries.append(unflatten_keys(entry))
-
-	return unflatten_table(headers, entries)
-
-
-def load_ff5_snes_struct_definitions() -> dict:
-	existing_structs = get_base_structarraytypes()
-	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/structs_SNES_stubs.tsv', existing_structs)
-	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_stubs.tsv', existing_structs)
-	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES.tsv', existing_structs)
-	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_save.tsv', existing_structs)
-	return existing_structs
-
-class FF5SNESHandler:
-	struct_definitions: dict = load_ff5_snes_struct_definitions()
-	addresses: dict = {entry['Label']: entry for entry in load_tsv('ChocolateBirdData/5/addresses_SNES_PSX.tsv')}
-
-	def extract(self, table: str, in_buffer) -> list[dict]:
-		# Deserialize a table
-		leftover_bits = LeftoverBits()
-		entry = self.addresses[table]  # Remember to try/catch
-		offset = entry['SNES']
-		buf = ReadBuffer(in_buffer, offset)
-		return get_structarraytype(entry['format'], self.struct_definitions).get_value(buf, leftover_bits)
-
-	def build(self, table: str, new_data: list[dict], out_buffer):
-		# Serialize complete data. This WILL fail if the input data is incomplete.
-		leftover_bits = LeftoverBits()
-		entry = self.addresses[table]  # Remember to try/catch
-		offset = entry['SNES']
-		buf = WriteBuffer(out_buffer, offset)
-		get_structarraytype(entry['format'], self.struct_definitions).put_value(buf, new_data, leftover_bits)
-
-	def build_partial(self, table: str, new_data: list[dict], in_buffer, out_buffer):
-		# Safely merge partial data over the existing data, then serialize it.
-		existing_data = self.extract(table, in_buffer)
-		for i, new in enumerate(new_data):
-			id = new.get('ID', i)
-			for k, v in new.items():
-				if k != 'ID' and v is not None:  # Allow holes in the table for values we don't care about overwriting
-					existing_data[id][k] = v
-		self.build(table, existing_data, out_buffer)
-
+from includes.helpers import load_tsv, dump_tsv
+from includes.rom_serde import FF5SNESHandler
 
 if __name__ == '__main__':
 	from argparse import ArgumentParser