Subtitle generation with slightly wrong furigana margin positioning
This commit is contained in:
parent
e7ffe3e7e7
commit
55017bcac6
|
@ -2,15 +2,20 @@ from collections import namedtuple
|
||||||
# store '{与|あた}えた{使命|しめい} ' as [('与','あた'), ('えた',''), ('使命','しめい'), (' ','')]
|
# store '{与|あた}えた{使命|しめい} ' as [('与','あた'), ('えた',''), ('使命','しめい'), (' ','')]
|
||||||
FuriBlock = namedtuple('FuriBlock', ['kanji', 'furi'])
|
FuriBlock = namedtuple('FuriBlock', ['kanji', 'furi'])
|
||||||
|
|
||||||
|
# spb (seconds per beat) is preferred to bpm (beats per minute)
|
||||||
|
# spb = 60.0/bpm
|
||||||
class LyricLine:
|
class LyricLine:
|
||||||
beat_stamps: list[float] = [] # Start at zero for each line, do real timing via get_timestamps()
|
beat_stamps: list[float] = [] # Start at zero for each line, do real timing via get_timestamps()
|
||||||
translated_line: str
|
translated_line: str
|
||||||
|
hiragana_syllables: list[str] # Allow space entries which will be skipped over when calculating timing
|
||||||
romaji_syllables: list[str] # Allow space entries which will be skipped over when calculating timing
|
romaji_syllables: list[str] # Allow space entries which will be skipped over when calculating timing
|
||||||
furi_blocks: list[FuriBlock]
|
furi_blocks: list[FuriBlock]
|
||||||
|
|
||||||
def get_timestamps(self, bpm: float, start_offset: float) -> list[float]:
|
def get_timestamps(self, spb: float, start_offset: float) -> list[float]:
|
||||||
spb = 60.0/bpm # seconds per beat
|
|
||||||
return [(spb*beat)+start_offset for beat in self.beat_stamps]
|
return [(spb*beat)+start_offset for beat in self.beat_stamps]
|
||||||
|
|
||||||
|
def get_karaoke_centiseconds(self, spb: float) -> list[float]:
|
||||||
|
return [int(spb*beat*100) for beat in self.beat_stamps]
|
||||||
|
|
||||||
class LyricTrack:
|
class LyricTrack:
|
||||||
lines: list[LyricLine]
|
lines: list[LyricLine]
|
||||||
|
|
|
@ -37,7 +37,7 @@ def parse_jp_text(text: str) -> list[tuple[str, str]]:
|
||||||
# Our custom word overrides have two levels:
|
# Our custom word overrides have two levels:
|
||||||
# - One is a simple search-replace to turn matches into manual furigana "{kanji|furi}" format. This could have false hits on short words.
|
# - One is a simple search-replace to turn matches into manual furigana "{kanji|furi}" format. This could have false hits on short words.
|
||||||
# - The latter is to override a word's kana post-tokenization, which requires it to be a dictionary word with multiple readings.
|
# - The latter is to override a word's kana post-tokenization, which requires it to be a dictionary word with multiple readings.
|
||||||
word_overrides = {'主': 'しゅ'}
|
word_overrides = {'私': 'わたし', '主': 'しゅ'}
|
||||||
re_manual_furi = re.compile(r'{(.+?)\|(.+?)}')
|
re_manual_furi = re.compile(r'{(.+?)\|(.+?)}')
|
||||||
|
|
||||||
def manual_furi_string_to_blocks(line: str) -> list[FuriBlock]:
|
def manual_furi_string_to_blocks(line: str) -> list[FuriBlock]:
|
||||||
|
@ -141,10 +141,17 @@ def parse_japanese_line(line: str):
|
||||||
for kanji, hiragana in output['word_pairs']:
|
for kanji, hiragana in output['word_pairs']:
|
||||||
output['furi_blocks'] += word_to_furi_blocks(kanji, hiragana)
|
output['furi_blocks'] += word_to_furi_blocks(kanji, hiragana)
|
||||||
|
|
||||||
# Create word-spaced romaji syllables
|
# Create word-spaced hiragana and romaji syllables
|
||||||
|
output['hiragana_syllables'] = [] # Will have spaces mixed in so must be iterated for timing
|
||||||
output['romaji_syllables'] = [] # Will have spaces mixed in so must be iterated for timing
|
output['romaji_syllables'] = [] # Will have spaces mixed in so must be iterated for timing
|
||||||
for _, hiragana in output['word_pairs']:
|
for _, hiragana in output['word_pairs']:
|
||||||
output['romaji_syllables'] += [to_romaji(s) for syl in kana_to_syllable_list(hiragana) if (s:= syl.strip())]
|
l = [s for syl in kana_to_syllable_list(hiragana) if (s:= syl.strip())]
|
||||||
if output['romaji_syllables'][-1] != ' ':
|
output['hiragana_syllables'] += l
|
||||||
|
output['romaji_syllables'] += [to_romaji(s) for syl in l if (s:= syl.strip())]
|
||||||
|
if len(l) > 0:
|
||||||
|
output['hiragana_syllables'].append(' ')
|
||||||
output['romaji_syllables'].append(' ')
|
output['romaji_syllables'].append(' ')
|
||||||
|
if len(output['romaji_syllables']) > 0: # remove trailing space
|
||||||
|
output['hiragana_syllables'].pop()
|
||||||
|
output['romaji_syllables'].pop()
|
||||||
return output
|
return output
|
||||||
|
|
|
@ -34,25 +34,92 @@ format_defaults = {
|
||||||
'KaraokeColourPast': 'E02A0A00',
|
'KaraokeColourPast': 'E02A0A00',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def seconds_to_timestamp(t: float) -> str:
|
||||||
|
minutes, seconds = divmod(t, 60)
|
||||||
|
hours, minutes = divmod(minutes, 60)
|
||||||
|
return f'{int(hours):02}:{int(minutes):02}:{seconds:05.2f}'
|
||||||
|
|
||||||
from format import LyricTrack
|
from format import LyricTrack
|
||||||
|
from japanese_converters import kana_to_syllable_list
|
||||||
def generate_ass(filename: str, lyric_track: LyricTrack, format_overloads: dict = None):
|
def generate_ass(filename: str, lyric_track: LyricTrack, format_overloads: dict = None):
|
||||||
format_dict = format_defaults.copy()
|
format_dict = format_defaults.copy()
|
||||||
if format_overloads:
|
if format_overloads:
|
||||||
format_dict.update(format_overloads)
|
format_dict.update(format_overloads)
|
||||||
preamble = ass_preamble.format(format_dict)
|
preamble = ass_preamble.format(**format_dict)
|
||||||
|
|
||||||
# Kanji Furigana layout stuff
|
# Kanji Furigana layout stuff
|
||||||
size_kanji = format_dict['KanjiSize']
|
size_kanji_x = format_dict['KanjiSize'] # TODO: work out scaling factor for fullwidth from point size
|
||||||
size_furi = format_dict['FuriSize']
|
size_furi_x = format_dict['FuriSize'] # TODO: work out scaling factor for fullwidth from point size
|
||||||
|
res_x = format_dict['PlayResX']
|
||||||
|
|
||||||
with open(filename, 'w') as file:
|
with open(filename, 'w') as file:
|
||||||
file.write(preamble)
|
file.write(preamble)
|
||||||
# for line in lines:
|
t = 68.0 # placeholder
|
||||||
# #
|
for line in lyric_track.lines:
|
||||||
# for syllable in line:
|
t0 = seconds_to_timestamp(t)
|
||||||
# t, kanji, furi, romaji = syllable
|
timestamps = line.get_timestamps(0.5, t)
|
||||||
|
centiseconds = line.get_karaoke_centiseconds(0.5)
|
||||||
|
t = timestamps[-1] + 1.0 # placeholder
|
||||||
|
t1 = seconds_to_timestamp(t)
|
||||||
|
sub_preamble = f'Dialogue: 0,{t0},{t1}'
|
||||||
|
|
||||||
example_layout = '''
|
# Translation line is easy and static
|
||||||
|
file.write(f'{sub_preamble},Translation,,,,,,{line.translated_line}\n')
|
||||||
|
|
||||||
|
# Romaji line is also easy, just intersperse durations
|
||||||
|
romaji_line = f'{{\\k{centiseconds[0]}}}'
|
||||||
|
i = 0 # syllable counter
|
||||||
|
for syl in line.romaji_syllables:
|
||||||
|
if not syl.strip():
|
||||||
|
romaji_line += f'{{\\k0}}{syl}'
|
||||||
|
continue
|
||||||
|
romaji_line += f'{{\\K{centiseconds[i+1]-centiseconds[i]}}}{syl}'
|
||||||
|
i += 1
|
||||||
|
file.write(f'{sub_preamble},Romaji,,,,,,{romaji_line}\n')
|
||||||
|
|
||||||
|
# Now for the kanji and furi lines...
|
||||||
|
kanji_plain_str = ''.join([b.kanji for b in line.furi_blocks])
|
||||||
|
full_kanji_width = len(kanji_plain_str) * size_kanji_x
|
||||||
|
kanji_line = f'{{\\k{centiseconds[0]}}}'
|
||||||
|
kanji_line_progress = 0 # increment as we go, to track furi position
|
||||||
|
furi_lines = []
|
||||||
|
i = 0 # syllable counter
|
||||||
|
for furi_block in line.furi_blocks:
|
||||||
|
if len(furi_block.furi) == 0: # kana or punctuation, nice and simple!
|
||||||
|
syls = kana_to_syllable_list(furi_block.kanji)
|
||||||
|
for syl in syls:
|
||||||
|
if len(syl.strip()) == 0: # don't time spaces
|
||||||
|
kanji_line += f'{{\\k0}}{syl}'
|
||||||
|
kanji_line_progress += len(syl)
|
||||||
|
else:
|
||||||
|
kanji_line += f'{{\\K{centiseconds[i+1]-centiseconds[i]}}}{syl}'
|
||||||
|
kanji_line_progress += len(syl)
|
||||||
|
i += 1
|
||||||
|
else: # Kanji block
|
||||||
|
i0 = i # Store this to later calculate block time for the kanji
|
||||||
|
syls = kana_to_syllable_list(furi_block.furi)
|
||||||
|
furi_line = f'{{\\k{centiseconds[i]}}}'
|
||||||
|
furi_chars = 0
|
||||||
|
for syl in syls:
|
||||||
|
furi_line += f'{{\\K{centiseconds[i+1]-centiseconds[i]}}}{syl}'
|
||||||
|
furi_chars += len(syl)
|
||||||
|
i += 1
|
||||||
|
# Need to calculate kanji block position and span to typeset the furigana above it
|
||||||
|
k = furi_block.kanji
|
||||||
|
k_start = kanji_line_progress
|
||||||
|
kanji_line_progress += len(k)
|
||||||
|
k_end = kanji_line_progress
|
||||||
|
target_middle_x = (size_kanji_x * (k_end+k_start)/2) - (full_kanji_width/2) # x=0 at center
|
||||||
|
furi_width = furi_chars * size_furi_x
|
||||||
|
margin_l = 0 if target_middle_x < 0 else int(target_middle_x*1.57)
|
||||||
|
margin_r = 0 if target_middle_x > 0 else int(-target_middle_x*1.57)
|
||||||
|
furi_lines.append(f'{sub_preamble},Furigana,,{margin_l},{margin_r},,,{furi_line}\n')
|
||||||
|
kanji_line += f'{{\\K{centiseconds[i]-centiseconds[i0]}}}{k}'
|
||||||
|
file.write(f'{sub_preamble},Kanji,,,,,,{kanji_line}\n')
|
||||||
|
for line in furi_lines:
|
||||||
|
file.write(line)
|
||||||
|
|
||||||
|
example_layout = r'''
|
||||||
Dialogue: 0,0:01:08.00,0:01:26.00,Kanji,,,,,,{\k0}{\K100}雨{\K100}や{\K100}雪{\K100}が{\K100}天{\K100}から{\K100}降{\K100}って{\K100}地{\K100}を{\K100}潤{\K100}し {\K100}芽{\K100}を{\K100}出{\K100}さ{\K100}せ{\K100}る
|
Dialogue: 0,0:01:08.00,0:01:26.00,Kanji,,,,,,{\k0}{\K100}雨{\K100}や{\K100}雪{\K100}が{\K100}天{\K100}から{\K100}降{\K100}って{\K100}地{\K100}を{\K100}潤{\K100}し {\K100}芽{\K100}を{\K100}出{\K100}さ{\K100}せ{\K100}る
|
||||||
Dialogue: 0,0:01:08.00,0:01:26.00,Furigana,, 0,1130,,,{\k0}{\K100}あめ
|
Dialogue: 0,0:01:08.00,0:01:26.00,Furigana,, 0,1130,,,{\k0}{\K100}あめ
|
||||||
Dialogue: 0,0:01:08.00,0:01:26.00,Furigana,, 0, 900,,,{\k200}{\K100}ゆき
|
Dialogue: 0,0:01:08.00,0:01:26.00,Furigana,, 0, 900,,,{\k200}{\K100}ゆき
|
||||||
|
|
Loading…
Reference in New Issue