Subtitle generation with slightly wrong furigana margin positioning
This commit is contained in:
parent
e7ffe3e7e7
commit
55017bcac6
|
@ -2,15 +2,20 @@ from collections import namedtuple
|
|||
# store '{与|あた}えた{使命|しめい} ' as [('与','あた'), ('えた',''), ('使命','しめい'), (' ','')]
|
||||
FuriBlock = namedtuple('FuriBlock', ['kanji', 'furi'])
|
||||
|
||||
# spb (seconds per beat) is preferred to bpm (beats per minute)
|
||||
# spb = 60.0/bpm
|
||||
class LyricLine:
|
||||
beat_stamps: list[float] = [] # Start at zero for each line, do real timing via get_timestamps()
|
||||
translated_line: str
|
||||
hiragana_syllables: list[str] # Allow space entries which will be skipped over when calculating timing
|
||||
romaji_syllables: list[str] # Allow space entries which will be skipped over when calculating timing
|
||||
furi_blocks: list[FuriBlock]
|
||||
|
||||
def get_timestamps(self, bpm: float, start_offset: float) -> list[float]:
|
||||
spb = 60.0/bpm # seconds per beat
|
||||
def get_timestamps(self, spb: float, start_offset: float) -> list[float]:
|
||||
return [(spb*beat)+start_offset for beat in self.beat_stamps]
|
||||
|
||||
def get_karaoke_centiseconds(self, spb: float) -> list[float]:
|
||||
return [int(spb*beat*100) for beat in self.beat_stamps]
|
||||
|
||||
class LyricTrack:
|
||||
lines: list[LyricLine]
|
||||
|
|
|
@ -37,7 +37,7 @@ def parse_jp_text(text: str) -> list[tuple[str, str]]:
|
|||
# Our custom word overrides have two levels:
|
||||
# - One is a simple search-replace to turn matches into manual furigana "{kanji|furi}" format. This could have false hits on short words.
|
||||
# - The latter is to override a word's kana post-tokenization, which requires it to be a dictionary word with multiple readings.
|
||||
word_overrides = {'主': 'しゅ'}
|
||||
word_overrides = {'私': 'わたし', '主': 'しゅ'}
|
||||
re_manual_furi = re.compile(r'{(.+?)\|(.+?)}')
|
||||
|
||||
def manual_furi_string_to_blocks(line: str) -> list[FuriBlock]:
|
||||
|
@ -141,10 +141,17 @@ def parse_japanese_line(line: str):
|
|||
for kanji, hiragana in output['word_pairs']:
|
||||
output['furi_blocks'] += word_to_furi_blocks(kanji, hiragana)
|
||||
|
||||
# Create word-spaced romaji syllables
|
||||
# Create word-spaced hiragana and romaji syllables
|
||||
output['hiragana_syllables'] = [] # Will have spaces mixed in so must be iterated for timing
|
||||
output['romaji_syllables'] = [] # Will have spaces mixed in so must be iterated for timing
|
||||
for _, hiragana in output['word_pairs']:
|
||||
output['romaji_syllables'] += [to_romaji(s) for syl in kana_to_syllable_list(hiragana) if (s:= syl.strip())]
|
||||
if output['romaji_syllables'][-1] != ' ':
|
||||
l = [s for syl in kana_to_syllable_list(hiragana) if (s:= syl.strip())]
|
||||
output['hiragana_syllables'] += l
|
||||
output['romaji_syllables'] += [to_romaji(s) for syl in l if (s:= syl.strip())]
|
||||
if len(l) > 0:
|
||||
output['hiragana_syllables'].append(' ')
|
||||
output['romaji_syllables'].append(' ')
|
||||
if len(output['romaji_syllables']) > 0: # remove trailing space
|
||||
output['hiragana_syllables'].pop()
|
||||
output['romaji_syllables'].pop()
|
||||
return output
|
||||
|
|
|
@ -34,25 +34,92 @@ format_defaults = {
|
|||
'KaraokeColourPast': 'E02A0A00',
|
||||
}
|
||||
|
||||
def seconds_to_timestamp(t: float) -> str:
|
||||
minutes, seconds = divmod(t, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
return f'{int(hours):02}:{int(minutes):02}:{seconds:05.2f}'
|
||||
|
||||
from format import LyricTrack
|
||||
from japanese_converters import kana_to_syllable_list
|
||||
def generate_ass(filename: str, lyric_track: LyricTrack, format_overloads: dict = None):
|
||||
format_dict = format_defaults.copy()
|
||||
if format_overloads:
|
||||
format_dict.update(format_overloads)
|
||||
preamble = ass_preamble.format(format_dict)
|
||||
preamble = ass_preamble.format(**format_dict)
|
||||
|
||||
# Kanji Furigana layout stuff
|
||||
size_kanji = format_dict['KanjiSize']
|
||||
size_furi = format_dict['FuriSize']
|
||||
size_kanji_x = format_dict['KanjiSize'] # TODO: work out scaling factor for fullwidth from point size
|
||||
size_furi_x = format_dict['FuriSize'] # TODO: work out scaling factor for fullwidth from point size
|
||||
res_x = format_dict['PlayResX']
|
||||
|
||||
with open(filename, 'w') as file:
|
||||
file.write(preamble)
|
||||
# for line in lines:
|
||||
# #
|
||||
# for syllable in line:
|
||||
# t, kanji, furi, romaji = syllable
|
||||
t = 68.0 # placeholder
|
||||
for line in lyric_track.lines:
|
||||
t0 = seconds_to_timestamp(t)
|
||||
timestamps = line.get_timestamps(0.5, t)
|
||||
centiseconds = line.get_karaoke_centiseconds(0.5)
|
||||
t = timestamps[-1] + 1.0 # placeholder
|
||||
t1 = seconds_to_timestamp(t)
|
||||
sub_preamble = f'Dialogue: 0,{t0},{t1}'
|
||||
|
||||
example_layout = '''
|
||||
# Translation line is easy and static
|
||||
file.write(f'{sub_preamble},Translation,,,,,,{line.translated_line}\n')
|
||||
|
||||
# Romaji line is also easy, just intersperse durations
|
||||
romaji_line = f'{{\\k{centiseconds[0]}}}'
|
||||
i = 0 # syllable counter
|
||||
for syl in line.romaji_syllables:
|
||||
if not syl.strip():
|
||||
romaji_line += f'{{\\k0}}{syl}'
|
||||
continue
|
||||
romaji_line += f'{{\\K{centiseconds[i+1]-centiseconds[i]}}}{syl}'
|
||||
i += 1
|
||||
file.write(f'{sub_preamble},Romaji,,,,,,{romaji_line}\n')
|
||||
|
||||
# Now for the kanji and furi lines...
|
||||
kanji_plain_str = ''.join([b.kanji for b in line.furi_blocks])
|
||||
full_kanji_width = len(kanji_plain_str) * size_kanji_x
|
||||
kanji_line = f'{{\\k{centiseconds[0]}}}'
|
||||
kanji_line_progress = 0 # increment as we go, to track furi position
|
||||
furi_lines = []
|
||||
i = 0 # syllable counter
|
||||
for furi_block in line.furi_blocks:
|
||||
if len(furi_block.furi) == 0: # kana or punctuation, nice and simple!
|
||||
syls = kana_to_syllable_list(furi_block.kanji)
|
||||
for syl in syls:
|
||||
if len(syl.strip()) == 0: # don't time spaces
|
||||
kanji_line += f'{{\\k0}}{syl}'
|
||||
kanji_line_progress += len(syl)
|
||||
else:
|
||||
kanji_line += f'{{\\K{centiseconds[i+1]-centiseconds[i]}}}{syl}'
|
||||
kanji_line_progress += len(syl)
|
||||
i += 1
|
||||
else: # Kanji block
|
||||
i0 = i # Store this to later calculate block time for the kanji
|
||||
syls = kana_to_syllable_list(furi_block.furi)
|
||||
furi_line = f'{{\\k{centiseconds[i]}}}'
|
||||
furi_chars = 0
|
||||
for syl in syls:
|
||||
furi_line += f'{{\\K{centiseconds[i+1]-centiseconds[i]}}}{syl}'
|
||||
furi_chars += len(syl)
|
||||
i += 1
|
||||
# Need to calculate kanji block position and span to typeset the furigana above it
|
||||
k = furi_block.kanji
|
||||
k_start = kanji_line_progress
|
||||
kanji_line_progress += len(k)
|
||||
k_end = kanji_line_progress
|
||||
target_middle_x = (size_kanji_x * (k_end+k_start)/2) - (full_kanji_width/2) # x=0 at center
|
||||
furi_width = furi_chars * size_furi_x
|
||||
margin_l = 0 if target_middle_x < 0 else int(target_middle_x*1.57)
|
||||
margin_r = 0 if target_middle_x > 0 else int(-target_middle_x*1.57)
|
||||
furi_lines.append(f'{sub_preamble},Furigana,,{margin_l},{margin_r},,,{furi_line}\n')
|
||||
kanji_line += f'{{\\K{centiseconds[i]-centiseconds[i0]}}}{k}'
|
||||
file.write(f'{sub_preamble},Kanji,,,,,,{kanji_line}\n')
|
||||
for line in furi_lines:
|
||||
file.write(line)
|
||||
|
||||
example_layout = r'''
|
||||
Dialogue: 0,0:01:08.00,0:01:26.00,Kanji,,,,,,{\k0}{\K100}雨{\K100}や{\K100}雪{\K100}が{\K100}天{\K100}から{\K100}降{\K100}って{\K100}地{\K100}を{\K100}潤{\K100}し {\K100}芽{\K100}を{\K100}出{\K100}さ{\K100}せ{\K100}る
|
||||
Dialogue: 0,0:01:08.00,0:01:26.00,Furigana,, 0,1130,,,{\k0}{\K100}あめ
|
||||
Dialogue: 0,0:01:08.00,0:01:26.00,Furigana,, 0, 900,,,{\k200}{\K100}ゆき
|
||||
|
|
Loading…
Reference in New Issue