diff --git a/format.py b/format.py index cfeaf47..2a69fed 100644 --- a/format.py +++ b/format.py @@ -2,15 +2,20 @@ from collections import namedtuple # store '{与|あた}えた{使命|しめい} ' as [('与','あた'), ('えた',''), ('使命','しめい'), (' ','')] FuriBlock = namedtuple('FuriBlock', ['kanji', 'furi']) +# spb (seconds per beat) is preferred to bpm (beats per minute) +# spb = 60.0/bpm class LyricLine: beat_stamps: list[float] = [] # Start at zero for each line, do real timing via get_timestamps() translated_line: str + hiragana_syllables: list[str] # Allow space entries which will be skipped over when calculating timing romaji_syllables: list[str] # Allow space entries which will be skipped over when calculating timing furi_blocks: list[FuriBlock] - def get_timestamps(self, bpm: float, start_offset: float) -> list[float]: - spb = 60.0/bpm # seconds per beat + def get_timestamps(self, spb: float, start_offset: float) -> list[float]: return [(spb*beat)+start_offset for beat in self.beat_stamps] + def get_karaoke_centiseconds(self, spb: float) -> list[float]: + return [int(spb*beat*100) for beat in self.beat_stamps] + class LyricTrack: lines: list[LyricLine] diff --git a/japanese_converters.py b/japanese_converters.py index 9228dc4..ffe9759 100644 --- a/japanese_converters.py +++ b/japanese_converters.py @@ -37,7 +37,7 @@ def parse_jp_text(text: str) -> list[tuple[str, str]]: # Our custom word overrides have two levels: # - One is a simple search-replace to turn matches into manual furigana "{kanji|furi}" format. This could have false hits on short words. # - The latter is to override a word's kana post-tokenization, which requires it to be a dictionary word with multiple readings. -word_overrides = {'主': 'しゅ'} +word_overrides = {'私': 'わたし', '主': 'しゅ'} re_manual_furi = re.compile(r'{(.+?)\|(.+?)}') def manual_furi_string_to_blocks(line: str) -> list[FuriBlock]: @@ -141,10 +141,17 @@ def parse_japanese_line(line: str): for kanji, hiragana in output['word_pairs']: output['furi_blocks'] += word_to_furi_blocks(kanji, hiragana) - # Create word-spaced romaji syllables + # Create word-spaced hiragana and romaji syllables + output['hiragana_syllables'] = [] # Will have spaces mixed in so must be iterated for timing output['romaji_syllables'] = [] # Will have spaces mixed in so must be iterated for timing for _, hiragana in output['word_pairs']: - output['romaji_syllables'] += [to_romaji(s) for syl in kana_to_syllable_list(hiragana) if (s:= syl.strip())] - if output['romaji_syllables'][-1] != ' ': + l = [s for syl in kana_to_syllable_list(hiragana) if (s:= syl.strip())] + output['hiragana_syllables'] += l + output['romaji_syllables'] += [to_romaji(s) for syl in l if (s:= syl.strip())] + if len(l) > 0: + output['hiragana_syllables'].append(' ') output['romaji_syllables'].append(' ') + if len(output['romaji_syllables']) > 0: # remove trailing space + output['hiragana_syllables'].pop() + output['romaji_syllables'].pop() return output diff --git a/subtitle_generator.py b/subtitle_generator.py index 25a81a4..f65b8f0 100644 --- a/subtitle_generator.py +++ b/subtitle_generator.py @@ -34,25 +34,92 @@ format_defaults = { 'KaraokeColourPast': 'E02A0A00', } +def seconds_to_timestamp(t: float) -> str: + minutes, seconds = divmod(t, 60) + hours, minutes = divmod(minutes, 60) + return f'{int(hours):02}:{int(minutes):02}:{seconds:05.2f}' + from format import LyricTrack +from japanese_converters import kana_to_syllable_list def generate_ass(filename: str, lyric_track: LyricTrack, format_overloads: dict = None): format_dict = format_defaults.copy() if format_overloads: format_dict.update(format_overloads) - preamble = ass_preamble.format(format_dict) + preamble = ass_preamble.format(**format_dict) # Kanji Furigana layout stuff - size_kanji = format_dict['KanjiSize'] - size_furi = format_dict['FuriSize'] + size_kanji_x = format_dict['KanjiSize'] # TODO: work out scaling factor for fullwidth from point size + size_furi_x = format_dict['FuriSize'] # TODO: work out scaling factor for fullwidth from point size + res_x = format_dict['PlayResX'] with open(filename, 'w') as file: file.write(preamble) - # for line in lines: - # # - # for syllable in line: - # t, kanji, furi, romaji = syllable + t = 68.0 # placeholder + for line in lyric_track.lines: + t0 = seconds_to_timestamp(t) + timestamps = line.get_timestamps(0.5, t) + centiseconds = line.get_karaoke_centiseconds(0.5) + t = timestamps[-1] + 1.0 # placeholder + t1 = seconds_to_timestamp(t) + sub_preamble = f'Dialogue: 0,{t0},{t1}' -example_layout = ''' + # Translation line is easy and static + file.write(f'{sub_preamble},Translation,,,,,,{line.translated_line}\n') + + # Romaji line is also easy, just intersperse durations + romaji_line = f'{{\\k{centiseconds[0]}}}' + i = 0 # syllable counter + for syl in line.romaji_syllables: + if not syl.strip(): + romaji_line += f'{{\\k0}}{syl}' + continue + romaji_line += f'{{\\K{centiseconds[i+1]-centiseconds[i]}}}{syl}' + i += 1 + file.write(f'{sub_preamble},Romaji,,,,,,{romaji_line}\n') + + # Now for the kanji and furi lines... + kanji_plain_str = ''.join([b.kanji for b in line.furi_blocks]) + full_kanji_width = len(kanji_plain_str) * size_kanji_x + kanji_line = f'{{\\k{centiseconds[0]}}}' + kanji_line_progress = 0 # increment as we go, to track furi position + furi_lines = [] + i = 0 # syllable counter + for furi_block in line.furi_blocks: + if len(furi_block.furi) == 0: # kana or punctuation, nice and simple! + syls = kana_to_syllable_list(furi_block.kanji) + for syl in syls: + if len(syl.strip()) == 0: # don't time spaces + kanji_line += f'{{\\k0}}{syl}' + kanji_line_progress += len(syl) + else: + kanji_line += f'{{\\K{centiseconds[i+1]-centiseconds[i]}}}{syl}' + kanji_line_progress += len(syl) + i += 1 + else: # Kanji block + i0 = i # Store this to later calculate block time for the kanji + syls = kana_to_syllable_list(furi_block.furi) + furi_line = f'{{\\k{centiseconds[i]}}}' + furi_chars = 0 + for syl in syls: + furi_line += f'{{\\K{centiseconds[i+1]-centiseconds[i]}}}{syl}' + furi_chars += len(syl) + i += 1 + # Need to calculate kanji block position and span to typeset the furigana above it + k = furi_block.kanji + k_start = kanji_line_progress + kanji_line_progress += len(k) + k_end = kanji_line_progress + target_middle_x = (size_kanji_x * (k_end+k_start)/2) - (full_kanji_width/2) # x=0 at center + furi_width = furi_chars * size_furi_x + margin_l = 0 if target_middle_x < 0 else int(target_middle_x*1.57) + margin_r = 0 if target_middle_x > 0 else int(-target_middle_x*1.57) + furi_lines.append(f'{sub_preamble},Furigana,,{margin_l},{margin_r},,,{furi_line}\n') + kanji_line += f'{{\\K{centiseconds[i]-centiseconds[i0]}}}{k}' + file.write(f'{sub_preamble},Kanji,,,,,,{kanji_line}\n') + for line in furi_lines: + file.write(line) + +example_layout = r''' Dialogue: 0,0:01:08.00,0:01:26.00,Kanji,,,,,,{\k0}{\K100}雨{\K100}や{\K100}雪{\K100}が{\K100}天{\K100}から{\K100}降{\K100}って{\K100}地{\K100}を{\K100}潤{\K100}し {\K100}芽{\K100}を{\K100}出{\K100}さ{\K100}せ{\K100}る Dialogue: 0,0:01:08.00,0:01:26.00,Furigana,, 0,1130,,,{\k0}{\K100}あめ Dialogue: 0,0:01:08.00,0:01:26.00,Furigana,, 0, 900,,,{\k200}{\K100}ゆき