TabComp/tabcomp.py

from ChocolateBirdData.reference_implementation import get_base_structarraytypes, parse_struct_definitions_from_tsv_filename, get_structarraytype, LeftoverBits, ReadBuffer, WriteBuffer

# Singular values may be decimal (no prefix), or any of the prefixes python accepts normally (0x for hex, 0b for binary, 0o for octal)
# Additionally, hexadecimals may be prefixed with '#' or '$', or suffixed with 'h', e.g. 0x10 #10 $10 10h are all parsed as 16
# For nested IDs, the format is ONLY like IP addresses:
#   decimal with '.' separator,    e.g. 16.127.1
#   hexadecimal with ':' separator, e.g. 10:7f:1
# Nested IDs do not support prefixes.

def try_int(v):
	try:
		if v[0] in '#$':  # Maybe % too?
			return int(v[1:], 16)
		if v[-1] == 'h':
			return int(v[:-1], 16)
		return int(v, 0)
	except:
		if v == '':
			return None
		return v


def get_max_number_width(container, fmt: str = 'd') -> int:
	return len(f'{len(container)-1:{fmt}}')


def get_number_zero_pad(container, fmt: str = 'd') -> str:
	max_digits = len(f'{len(container)-1:{fmt}}')  # Could instead call get_max_number_width
	return f'0{max_digits}{fmt}'


def encode_nested_ids(values: list[int], max_digits: list[int] = None, fmt: str = 'd') -> str:
	delimiter = ':' if fmt in 'Xx' else '.'
	if max_digits:
		return delimiter.join([f'{value:0{digits}{fmt}}' for value, digits in zip(values, max_digits)])
	else:
		return delimiter.join([f'{value:{fmt}}' for value in values])


def decode_nested_ids(string: str) -> list[int]:
	hex = ':' in string
	delimiter = ':' if hex else '.'
	return [int(i, 16 if hex else 10) for i in string.split(delimiter)]


def flatten_keys(container: dict | list, prefix: str = '') -> dict:
	output = {}

	def flatten_item(k: str, v):
		if isinstance(v, dict) or isinstance(v, list):
			flat = flatten_keys(v, f'{prefix}{k}.')
			for k2, v2 in flat.items():
				output[k2] = v2
		else:
			output[f'{prefix}{k}'] = v

	if isinstance(container, list):
		fmt = get_number_zero_pad(container, 'd')  # Zero pad all of the indices to the same decimal string length as the final one
		for k, v in enumerate(container):
			flatten_item(f'{k:{fmt}}', v)
	elif isinstance(container, dict):
		for k, v in container.items():
			flatten_item(k, v)
	else:
		raise ValueError(f'flatten_keys is undefined for container type "{container}"')

	return output


def flatten_table(table: list, id_fmt: str = 'x') -> list:
	if len(table) < 1:
		return table  # Empty
	if isinstance(table[0], dict):  # A simple table
		return [flatten_keys(d) for d in table]
	if isinstance(table[0], list):  # Nested lists are bad when expanded as columns, so we'll expand
		print(table[0])
		flattened_table = []

		def flatten_list(data, ids: list[int], id_max_digits: list[int]) -> None:
			if isinstance(data, list):
				max_digits = id_max_digits + [get_max_number_width(data, id_fmt)]
				for id, sub in enumerate(data):
					flatten_list(sub, ids + [id], max_digits)
			else:
				entry = {'ID': encode_nested_ids(ids, id_max_digits, id_fmt)}
				entry.update(flatten_keys(data))
				flattened_table.append(entry)

		flatten_list(table, [], [])
		return flattened_table
	else:
		raise NotImplementedError(table[0])


def unflatten_keys(d: dict) -> dict:
	output = {}
	for k, v in d.items():
		keysplit = k.split('.')
		target_dict = output
		for prefix in keysplit[:-1]:
			if prefix not in target_dict:
				target_dict[prefix] = {}
			target_dict = target_dict[prefix]
		target_dict[k] = v
	return output


def unflatten_table(headers: list[str], entries: list):
	if 'ID' not in headers:
		return entries
	# This could be an array of an array of an array of an...
	id0 = entries[0]['ID']
	if '.' not in id0 and ':' not in id0:
		return entries
	# Treat this as a nested array
	table = {tuple(decode_nested_ids(entry['ID'])): entry for entry in entries}

	output = []
	def unflatten_arrays(id_split: tuple[int], cur_array: list, value):
		i, *remainder = id_split
		if len(remainder) > 0:
			while len(cur_array) <= i:  # Make sure our array has the index we're about to jump into
				cur_array.append([])
			unflatten_arrays(remainder, cur_array[i], value)
		else:
			while len(cur_array) <= i:  # Make sure our array has the index we're about to overwrite
				cur_array.append(None)
			cur_array[i] = value

	for id_split in sorted(table.keys()):
		unflatten_arrays(id_split, output, table[id_split])

	return output


def dump_tsv(filename, table, id_column=True) -> None:
	table_flat = flatten_table(table)

	with open(filename, 'w') as file:
		headers = list(table_flat[0].keys())
		if id_column and 'ID' not in headers:  # Some flattened tables build their own ID column!
			# See how long the hex representation of the last number will be, so we can zero-pad the rest to match.
			fmt = get_number_zero_pad(table_flat, 'X')
			file.write('\t'.join(['ID'] + headers) + '\n')
			for i, entry in enumerate(table_flat):
				file.write('\t'.join([f'0x{i:{fmt}}'] + [str(entry[key]) for key in headers]) + '\n')
		else:
			file.write('\t'.join(headers) + '\n')
			for i, entry in enumerate(table_flat):
				file.write('\t'.join([str(entry[key]) for key in headers]) + '\n')


def load_tsv(filename) -> list:
	with open(filename, 'r') as file:
		lines = file.read().rstrip().split('\n')
	if len(lines) < 2:
		return []
	headers = lines[0].split('\t')

	# Simple line-by-line unflatten
	entries = []
	for line in lines[1:]:
		entry = {key: try_int(value) for key, value in zip(headers, line.split('\t'))}
		entries.append(unflatten_keys(entry))

	return unflatten_table(headers, entries)


def load_ff5_snes_struct_definitions() -> dict:
	existing_structs = get_base_structarraytypes()
	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/structs_SNES_stubs.tsv', existing_structs)
	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_stubs.tsv', existing_structs)
	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES.tsv', existing_structs)
	parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_save.tsv', existing_structs)
	return existing_structs

class FF5SNESHandler:
	struct_definitions: dict = load_ff5_snes_struct_definitions()
	addresses: dict = {entry['Label']: entry for entry in load_tsv('ChocolateBirdData/5/addresses_SNES_PSX.tsv')}

	def extract(self, table: str, in_buffer) -> list[dict]:
		# Deserialize a table
		leftover_bits = LeftoverBits()
		entry = self.addresses[table]  # Remember to try/catch
		offset = entry['SNES']
		buf = ReadBuffer(in_buffer, offset)
		return get_structarraytype(entry['format'], self.struct_definitions).get_value(buf, leftover_bits)

	def build(self, table: str, new_data: list[dict], out_buffer):
		# Serialize complete data. This WILL fail if the input data is incomplete.
		leftover_bits = LeftoverBits()
		entry = self.addresses[table]  # Remember to try/catch
		offset = entry['SNES']
		buf = WriteBuffer(out_buffer, offset)
		get_structarraytype(entry['format'], self.struct_definitions).put_value(buf, new_data, leftover_bits)

	def build_partial(self, table: str, new_data: list[dict], in_buffer, out_buffer):
		# Safely merge partial data over the existing data, then serialize it.
		existing_data = self.extract(table, in_buffer)
		for i, new in enumerate(new_data):
			id = new.get('ID', i)
			for k, v in new.items():
				if k != 'ID' and v is not None:  # Allow holes in the table for values we don't care about overwriting
					existing_data[id][k] = v
		self.build(table, existing_data, out_buffer)


if __name__ == '__main__':
	from argparse import ArgumentParser
	parser = ArgumentParser(description='The ROMhacking Table Compiler.')
	parser.add_argument('action', choices=['extract', 'build'])
	parser.add_argument('rom', help='The ROM to use as a basis for extracting data.')
	parser.add_argument('project', help='The project folder to extract data to, or compile data from.')
	parser.add_argument('tables', nargs='*', help='Specify which tables to extract or compile, separated by spaces. If left empty, nothing will be extracted, or all tables in a project will be compiled. See the labels in https://git.ufeff.net/birdulon/ChocolateBirdData/src/branch/master/5/addresses_SNES_PSX.tsv for a list of values which may be used, though bear in mind things such as graphics and maps are currently not supported in a sensible way.')
	args = parser.parse_args()

	if args.project:
		project_folder = args.project.rstrip('/') + '/'
		project_folder_len = len(project_folder)

		from glob import glob
		from configparser import ConfigParser
		config = ConfigParser()
		config['TabComp.Project'] = {'Game': 'Final Fantasy V', 'Platform': 'SNES', 'Region': 'any'}
		try:
			with open(f'{project_folder}project.ini', 'r') as configfile:
				config.read_file(configfile)
		except FileNotFoundError:
			pass
		with open(f'{project_folder}project.ini', 'w') as configfile:
			config.write(configfile)

		def run():
			game = config['TabComp.Project']['Game']
			platform = config['TabComp.Project']['Platform']
			if game != 'Final Fantasy V' or platform != 'SNES':
				print(f'Unsupported ROM for project - "{game}" on "{platform}"')
				return
			handler = FF5SNESHandler()
			if not args.rom:
				print('No ROM specified!')
				return
			with open(args.rom, 'rb') as file:
				rom_bytes = file.read()
			in_buffer = bytearray(rom_bytes)
			match args.action:
				case 'extract':
					if not args.tables:
						print('Must specify tables to extract!')
						return
					tables = [table for table in args.tables]
					print(f'Attempting to extract tables {tables}')
					for table in tables:
						data = handler.extract(table, in_buffer)
						dump_tsv(f'{project_folder}{table}.tsv', data)
					print('Done extracting!')

				case 'build':
					tables = [table for table in args.tables]
					if not args.tables:
						# Find all .tsv files in project folder
						tables = [file[project_folder_len:-4] for file in glob(f'{project_folder}*.tsv')]
					print(f'Attempting to build tables {tables}')
					out_buffer = bytearray(rom_bytes)
					for table in tables:
						data = load_tsv(f'{project_folder}{table}.tsv')
						handler.build_partial(table, data, in_buffer, out_buffer)
					out_filename = f'{project_folder}rom.sfc'
					with open(out_filename, 'wb') as file:
						file.write(out_buffer)
					print(f'Compiled to "{out_filename}", make your own .ips from this')
				case _:
					'Invalid action!'
					return
		run()
First edition 2024-06-26 23:59:10 +09:30			`from ChocolateBirdData.reference_implementation import get_base_structarraytypes, parse_struct_definitions_from_tsv_filename, get_structarraytype, LeftoverBits, ReadBuffer, WriteBuffer`

Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`# Singular values may be decimal (no prefix), or any of the prefixes python accepts normally (0x for hex, 0b for binary, 0o for octal)`
			`# Additionally, hexadecimals may be prefixed with '#' or '$', or suffixed with 'h', e.g. 0x10 #10 $10 10h are all parsed as 16`
			`# For nested IDs, the format is ONLY like IP addresses:`
			`# decimal with '.' separator, e.g. 16.127.1`
			`# hexadecimal with ':' separator, e.g. 10:7f:1`
			`# Nested IDs do not support prefixes.`
First edition 2024-06-26 23:59:10 +09:30
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`def try_int(v):`
			`try:`
			`if v[0] in '#$': # Maybe % too?`
			`return int(v[1:], 16)`
			`if v[-1] == 'h':`
			`return int(v[:-1], 16)`
			`return int(v, 0)`
			`except:`
			`if v == '':`
			`return None`
			`return v`


			`def get_max_number_width(container, fmt: str = 'd') -> int:`
			`return len(f'{len(container)-1:{fmt}}')`


			`def get_number_zero_pad(container, fmt: str = 'd') -> str:`
			`max_digits = len(f'{len(container)-1:{fmt}}') # Could instead call get_max_number_width`
			`return f'0{max_digits}{fmt}'`


			`def encode_nested_ids(values: list[int], max_digits: list[int] = None, fmt: str = 'd') -> str:`
			`delimiter = ':' if fmt in 'Xx' else '.'`
			`if max_digits:`
			`return delimiter.join([f'{value:0{digits}{fmt}}' for value, digits in zip(values, max_digits)])`
			`else:`
			`return delimiter.join([f'{value:{fmt}}' for value in values])`


			`def decode_nested_ids(string: str) -> list[int]:`
			`hex = ':' in string`
			`delimiter = ':' if hex else '.'`
			`return [int(i, 16 if hex else 10) for i in string.split(delimiter)]`


			`def flatten_keys(container: dict \| list, prefix: str = '') -> dict:`
First edition 2024-06-26 23:59:10 +09:30			`output = {}`
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30
			`def flatten_item(k: str, v):`
			`if isinstance(v, dict) or isinstance(v, list):`
First edition 2024-06-26 23:59:10 +09:30			`flat = flatten_keys(v, f'{prefix}{k}.')`
			`for k2, v2 in flat.items():`
			`output[k2] = v2`
			`else:`
			`output[f'{prefix}{k}'] = v`
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30
			`if isinstance(container, list):`
			`fmt = get_number_zero_pad(container, 'd') # Zero pad all of the indices to the same decimal string length as the final one`
			`for k, v in enumerate(container):`
			`flatten_item(f'{k:{fmt}}', v)`
			`elif isinstance(container, dict):`
			`for k, v in container.items():`
			`flatten_item(k, v)`
			`else:`
			`raise ValueError(f'flatten_keys is undefined for container type "{container}"')`

First edition 2024-06-26 23:59:10 +09:30			`return output`


Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`def flatten_table(table: list, id_fmt: str = 'x') -> list:`
			`if len(table) < 1:`
			`return table # Empty`
			`if isinstance(table[0], dict): # A simple table`
			`return [flatten_keys(d) for d in table]`
			`if isinstance(table[0], list): # Nested lists are bad when expanded as columns, so we'll expand`
			`print(table[0])`
			`flattened_table = []`

			`def flatten_list(data, ids: list[int], id_max_digits: list[int]) -> None:`
			`if isinstance(data, list):`
			`max_digits = id_max_digits + [get_max_number_width(data, id_fmt)]`
			`for id, sub in enumerate(data):`
			`flatten_list(sub, ids + [id], max_digits)`
			`else:`
			`entry = {'ID': encode_nested_ids(ids, id_max_digits, id_fmt)}`
			`entry.update(flatten_keys(data))`
			`flattened_table.append(entry)`

			`flatten_list(table, [], [])`
			`return flattened_table`
			`else:`
			`raise NotImplementedError(table[0])`


First edition 2024-06-26 23:59:10 +09:30			`def unflatten_keys(d: dict) -> dict:`
			`output = {}`
			`for k, v in d.items():`
			`keysplit = k.split('.')`
			`target_dict = output`
			`for prefix in keysplit[:-1]:`
			`if prefix not in target_dict:`
			`target_dict[prefix] = {}`
			`target_dict = target_dict[prefix]`
			`target_dict[k] = v`
			`return output`


Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`def unflatten_table(headers: list[str], entries: list):`
			`if 'ID' not in headers:`
			`return entries`
			`# This could be an array of an array of an array of an...`
			`id0 = entries[0]['ID']`
			`if '.' not in id0 and ':' not in id0:`
			`return entries`
			`# Treat this as a nested array`
			`table = {tuple(decode_nested_ids(entry['ID'])): entry for entry in entries}`

			`output = []`
			`def unflatten_arrays(id_split: tuple[int], cur_array: list, value):`
			`i, *remainder = id_split`
			`if len(remainder) > 0:`
			`while len(cur_array) <= i: # Make sure our array has the index we're about to jump into`
			`cur_array.append([])`
			`unflatten_arrays(remainder, cur_array[i], value)`
			`else:`
			`while len(cur_array) <= i: # Make sure our array has the index we're about to overwrite`
			`cur_array.append(None)`
			`cur_array[i] = value`

			`for id_split in sorted(table.keys()):`
			`unflatten_arrays(id_split, output, table[id_split])`

			`return output`


First edition 2024-06-26 23:59:10 +09:30			`def dump_tsv(filename, table, id_column=True) -> None:`
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`table_flat = flatten_table(table)`
First edition 2024-06-26 23:59:10 +09:30
			`with open(filename, 'w') as file:`
			`headers = list(table_flat[0].keys())`
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`if id_column and 'ID' not in headers: # Some flattened tables build their own ID column!`
			`# See how long the hex representation of the last number will be, so we can zero-pad the rest to match.`
			`fmt = get_number_zero_pad(table_flat, 'X')`
First edition 2024-06-26 23:59:10 +09:30			`file.write('\t'.join(['ID'] + headers) + '\n')`
			`for i, entry in enumerate(table_flat):`
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`file.write('\t'.join([f'0x{i:{fmt}}'] + [str(entry[key]) for key in headers]) + '\n')`
First edition 2024-06-26 23:59:10 +09:30			`else:`
			`file.write('\t'.join(headers) + '\n')`
			`for i, entry in enumerate(table_flat):`
			`file.write('\t'.join([str(entry[key]) for key in headers]) + '\n')`


			`def load_tsv(filename) -> list:`
			`with open(filename, 'r') as file:`
			`lines = file.read().rstrip().split('\n')`
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`if len(lines) < 2:`
			`return []`
First edition 2024-06-26 23:59:10 +09:30			`headers = lines[0].split('\t')`
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30
			`# Simple line-by-line unflatten`
			`entries = []`
First edition 2024-06-26 23:59:10 +09:30			`for line in lines[1:]:`
			`entry = {key: try_int(value) for key, value in zip(headers, line.split('\t'))}`
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`entries.append(unflatten_keys(entry))`

			`return unflatten_table(headers, entries)`
First edition 2024-06-26 23:59:10 +09:30

			`def load_ff5_snes_struct_definitions() -> dict:`
			`existing_structs = get_base_structarraytypes()`
			`parse_struct_definitions_from_tsv_filename('ChocolateBirdData/structs_SNES_stubs.tsv', existing_structs)`
			`parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_stubs.tsv', existing_structs)`
			`parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES.tsv', existing_structs)`
			`parse_struct_definitions_from_tsv_filename('ChocolateBirdData/5/structs/SNES_save.tsv', existing_structs)`
			`return existing_structs`

			`class FF5SNESHandler:`
			`struct_definitions: dict = load_ff5_snes_struct_definitions()`
			`addresses: dict = {entry['Label']: entry for entry in load_tsv('ChocolateBirdData/5/addresses_SNES_PSX.tsv')}`

			`def extract(self, table: str, in_buffer) -> list[dict]:`
			`# Deserialize a table`
			`leftover_bits = LeftoverBits()`
			`entry = self.addresses[table] # Remember to try/catch`
			`offset = entry['SNES']`
			`buf = ReadBuffer(in_buffer, offset)`
			`return get_structarraytype(entry['format'], self.struct_definitions).get_value(buf, leftover_bits)`

			`def build(self, table: str, new_data: list[dict], out_buffer):`
			`# Serialize complete data. This WILL fail if the input data is incomplete.`
			`leftover_bits = LeftoverBits()`
			`entry = self.addresses[table] # Remember to try/catch`
			`offset = entry['SNES']`
			`buf = WriteBuffer(out_buffer, offset)`
			`get_structarraytype(entry['format'], self.struct_definitions).put_value(buf, new_data, leftover_bits)`

			`def build_partial(self, table: str, new_data: list[dict], in_buffer, out_buffer):`
			`# Safely merge partial data over the existing data, then serialize it.`
			`existing_data = self.extract(table, in_buffer)`
			`for i, new in enumerate(new_data):`
			`id = new.get('ID', i)`
			`for k, v in new.items():`
Add support for sane array-of-array-of-array table flattening 2024-06-27 17:03:51 +09:30			`if k != 'ID' and v is not None: # Allow holes in the table for values we don't care about overwriting`
First edition 2024-06-26 23:59:10 +09:30			`existing_data[id][k] = v`
			`self.build(table, existing_data, out_buffer)`


			`if __name__ == '__main__':`
			`from argparse import ArgumentParser`
			`parser = ArgumentParser(description='The ROMhacking Table Compiler.')`
			`parser.add_argument('action', choices=['extract', 'build'])`
			`parser.add_argument('rom', help='The ROM to use as a basis for extracting data.')`
			`parser.add_argument('project', help='The project folder to extract data to, or compile data from.')`
			`parser.add_argument('tables', nargs='*', help='Specify which tables to extract or compile, separated by spaces. If left empty, nothing will be extracted, or all tables in a project will be compiled. See the labels in https://git.ufeff.net/birdulon/ChocolateBirdData/src/branch/master/5/addresses_SNES_PSX.tsv for a list of values which may be used, though bear in mind things such as graphics and maps are currently not supported in a sensible way.')`
			`args = parser.parse_args()`

			`if args.project:`
			`project_folder = args.project.rstrip('/') + '/'`
			`project_folder_len = len(project_folder)`

			`from glob import glob`
			`from configparser import ConfigParser`
			`config = ConfigParser()`
			`config['TabComp.Project'] = {'Game': 'Final Fantasy V', 'Platform': 'SNES', 'Region': 'any'}`
			`try:`
			`with open(f'{project_folder}project.ini', 'r') as configfile:`
			`config.read_file(configfile)`
			`except FileNotFoundError:`
			`pass`
			`with open(f'{project_folder}project.ini', 'w') as configfile:`
			`config.write(configfile)`

			`def run():`
			`game = config['TabComp.Project']['Game']`
			`platform = config['TabComp.Project']['Platform']`
			`if game != 'Final Fantasy V' or platform != 'SNES':`
			`print(f'Unsupported ROM for project - "{game}" on "{platform}"')`
			`return`
			`handler = FF5SNESHandler()`
			`if not args.rom:`
			`print('No ROM specified!')`
			`return`
			`with open(args.rom, 'rb') as file:`
			`rom_bytes = file.read()`
			`in_buffer = bytearray(rom_bytes)`
			`match args.action:`
			`case 'extract':`
			`if not args.tables:`
			`print('Must specify tables to extract!')`
			`return`
			`tables = [table for table in args.tables]`
			`print(f'Attempting to extract tables {tables}')`
			`for table in tables:`
			`data = handler.extract(table, in_buffer)`
			`dump_tsv(f'{project_folder}{table}.tsv', data)`
			`print('Done extracting!')`

			`case 'build':`
			`tables = [table for table in args.tables]`
			`if not args.tables:`
			`# Find all .tsv files in project folder`
			`tables = [file[project_folder_len:-4] for file in glob(f'{project_folder}*.tsv')]`
			`print(f'Attempting to build tables {tables}')`
			`out_buffer = bytearray(rom_bytes)`
			`for table in tables:`
			`data = load_tsv(f'{project_folder}{table}.tsv')`
			`handler.build_partial(table, data, in_buffer, out_buffer)`
			`out_filename = f'{project_folder}rom.sfc'`
			`with open(out_filename, 'wb') as file:`
			`file.write(out_buffer)`
			`print(f'Compiled to "{out_filename}", make your own .ips from this')`
			`case _:`
			`'Invalid action!'`
			`return`
			`run()`