Implement `--sub-format` in dl to set output subtitle format

The default is still SubRip SRT, but you can now change the output format to almost any of the available Codec options. There is no option to leave the subtitle format as-is yet. I.e., if there's a SRT and WebVTT subtitle, leave them both as-is.

Like always, you can configure a default in your config file, e.g.,

```yaml
dl:
  sub_format: vtt
```

Note though that SSA, SSAv4, fTTML, and fVTT are not yet supported. There are no plans to support fTTML or fVTT.
This commit is contained in:
rlaphoenix 2023-12-02 17:56:40 +00:00
parent e87de50940
commit 8cd6dfb65a
2 changed files with 36 additions and 10 deletions

View File

@ -120,7 +120,7 @@ For example to set the default primary language to download to German,
lang: de
```
or to set `--bitrate=CVBR` for the AMZN service,
to set `--bitrate=CVBR` for the AMZN service,
```yaml
lang: de
@ -128,6 +128,12 @@ AMZN:
bitrate: CVBR
```
or to change the output subtitle format from the default (SubRip SRT) to WebVTT,
```yaml
sub_format: vtt
```
## downloader (str)
Choose what software to use to download data throughout Devine where needed.

View File

@ -102,6 +102,9 @@ class dl:
help="Proxy URI to use. If a 2-letter country is provided, it will try get a proxy from the config.")
@click.option("--group", type=str, default=None,
help="Set the Group Tag to be used, overriding the one in config if any.")
@click.option("--sub-format", type=click.Choice(Subtitle.Codec, case_sensitive=False),
default=Subtitle.Codec.SubRip,
help="Set Output Subtitle Format, only converting if necessary.")
@click.option("-V", "--video-only", is_flag=True, default=False,
help="Only download video tracks.")
@click.option("-A", "--audio-only", is_flag=True, default=False,
@ -261,6 +264,7 @@ class dl:
lang: list[str],
v_lang: list[str],
s_lang: list[str],
sub_format: Subtitle.Codec,
video_only: bool,
audio_only: bool,
subs_only: bool,
@ -575,18 +579,34 @@ class dl:
break
video_track_n += 1
with console.status(f"Converting subtitles to {Subtitle.Codec.SubRip}..."):
with console.status(f"Converting Subtitles to {sub_format.name}..."):
for subtitle in title.tracks.subtitles:
# convert subs to SRT unless it's already SRT, or SSA
if subtitle.codec not in (Subtitle.Codec.SubRip, Subtitle.Codec.SubStationAlphav4):
if subtitle.codec != sub_format:
writer = {
Subtitle.Codec.SubRip: pycaption.SRTWriter,
Subtitle.Codec.SubStationAlpha: None,
Subtitle.Codec.SubStationAlphav4: None,
Subtitle.Codec.TimedTextMarkupLang: pycaption.DFXPWriter,
Subtitle.Codec.WebVTT: pycaption.WebVTTWriter,
# MPEG-DASH box-encapsulated subtitle formats
Subtitle.Codec.fTTML: None,
Subtitle.Codec.fVTT: None,
}[sub_format]
if writer is None:
self.log.error(f"Cannot yet convert {subtitle.codec} to {sub_format.name}...")
sys.exit(1)
caption_set = subtitle.parse(subtitle.path.read_bytes(), subtitle.codec)
subtitle.merge_same_cues(caption_set)
srt = pycaption.SRTWriter().write(caption_set)
# NOW sometimes has this, when it isn't, causing mux problems
srt = srt.replace("MULTI-LANGUAGE SRT\n", "")
subtitle.path.write_text(srt, encoding="utf8")
subtitle.codec = Subtitle.Codec.SubRip
subtitle.move(subtitle.path.with_suffix(".srt"))
subtitle_text = writer().write(caption_set)
if sub_format == Subtitle.Codec.SubRip:
# NOW sometimes has this, when it isn't, causing mux problems
subtitle_text = subtitle_text.replace("MULTI-LANGUAGE SRT\n", "")
subtitle.path.write_text(subtitle_text, encoding="utf8")
subtitle.codec = sub_format
subtitle.move(subtitle.path.with_suffix(f".{sub_format.value.lower()}"))
with console.status("Repackaging tracks with FFMPEG..."):
has_repacked = False