shaka-packager/packager/media/base/text_sample.h

153 lines
4.8 KiB
C
Raw Normal View History

// Copyright 2017 Google LLC. All rights reserved.
//
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd
#ifndef PACKAGER_MEDIA_BASE_TEXT_SAMPLE_H_
#define PACKAGER_MEDIA_BASE_TEXT_SAMPLE_H_
#include <cstdint>
#include <optional>
#include <string>
#include <vector>
namespace shaka {
namespace media {
enum class TextUnitType {
/// The units are absolute units in pixels.
kPixels,
/// The units are absolute units in number of lines.
kLines,
/// The units are relative to some size, in percent (i.e. 0-100).
kPercent,
};
enum class WritingDirection {
kHorizontal,
kVerticalGrowingLeft,
kVerticalGrowingRight,
};
enum class TextAlignment {
/// Align the text at the start, based on the Unicode text direction.
kStart,
/// Align the text in the center of the box.
kCenter,
/// Align the text at the end, based on the Unicode text direction.
kEnd,
/// Align the text at the left side (or top for non-horizontal).
kLeft,
/// Align the text at the right side (or bottom for non-horizontal).
kRight,
};
struct TextNumber {
TextNumber(float value, TextUnitType type) : value(value), type(type) {}
float value;
TextUnitType type;
};
struct TextSettings {
/// The line offset of the cue. For horizontal cues, this is the vertical
/// offset. Percent units are relative to the window.
std::optional<TextNumber> line;
/// The position offset of the cue. For horizontal cues, this is the
/// horizontal offset. Percent units are relative to the window.
std::optional<TextNumber> position;
/// For horizontal cues, this is the width of the area to draw cues. For
/// vertical cues, this is the height. Percent units are relative to the
/// window.
std::optional<TextNumber> width;
/// For horizontal cues, this is the height of the area to draw cues. For
/// vertical cues, this is the width. Percent units are relative to the
/// window.
std::optional<TextNumber> height;
/// The region to draw the cue in.
std::string region;
/// The direction to draw text. This is also used to determine how cues are
/// positioned within the region.
WritingDirection writing_direction = WritingDirection::kHorizontal;
/// How to align the text within the cue box.
TextAlignment text_alignment = TextAlignment::kCenter;
};
struct TextFragmentStyle {
std::optional<bool> underline;
std::optional<bool> bold;
std::optional<bool> italic;
feat: teletext formatting (#1384) This PR adds parsing of teletext styling, and rendering of the styling in output TTML and WebVTT subtitle tracks. Beyond unit tests, I've used the sample https://drive.google.com/file/d/19ZYsoeUfH85gEilQkaAdLbPhC4CxhDEh/view?usp=sharing which has rather advanced subtitling with two separate rows at the same time, where one is left aligned and another is right aligned. This necessitates two parallel cues to be rendered. It also has some colored text. Solve #1335. ## parse teletext styling and formatting Extend the teletext parser to parse the teletext styling and formatting. This includes translating rows into regions, calculating alignment from start and stop position of the text, and extracting text and background colors. The colors are limited to full lines. Both lines and regions are propagated in the TextSample structures. This is because the number of lines may differ from different sources. For teletext, there are 24 rows, but they are essentially always used with double height, so the number of output lines is 12 from 0 to 11. There are also corresponding regions are denoted "ttx_R", where R is an integer row number. A renderer can use either the line number or the region ID to render the text. ## ttml generation for teletext to EBU-TT-D Add support to render teletext input in EBU-TT-D (IMSC-1) format. This includes appropriate regions ttx_0 to ttx_11 signalled in the TextSamples, alignment and text and background colors. The general TTML output has been changed to always include metadata, layout, and styling nodes, even if they are empty. EBU-TT-D is detected by the presence of "ttx_?" regions in the samples. If detected, extra TTML elements will be added and the EBU-TT-D linePadding used as well. Appropriate styles for background and text colors are generated depending on the color and backgroundColor attributes in the text fragments. ## adapt WebVTT output to teletext TextSample. Teletext input generates both a region with prefix ttx_ and a floating point line number (e.g. 9.5) in the range 0 to 11.5 (due to input 0-23 as double lines). The output is adopted to drop such regions and convert the line number to an integer since the standard only used floats for percent values but not for plain line numbers.
2024-04-29 17:33:03 +00:00
// The colors could be any string that can be interpreted as
// a color in TTML (or WebVTT). As a start, the 8 teletext colors are used,
// i.e. black, red, green, yellow, blue, magenta, cyan, and white
std::string color;
std::string backgroundColor;
};
/// Represents a recursive structure of styled blocks of text. Only one of
/// sub_fragments, body, image, or newline will be set.
struct TextFragment {
TextFragment() {}
TextFragment(const TextFragmentStyle& style,
const std::vector<TextFragment>& sub_fragments)
: style(style), sub_fragments(sub_fragments) {}
TextFragment(const TextFragmentStyle& style, const char* body)
: style(style), body(body) {}
TextFragment(const TextFragmentStyle& style, const std::string& body)
: style(style), body(body) {}
TextFragment(const TextFragmentStyle& style,
const std::vector<uint8_t>& image)
: style(style), image(image) {}
TextFragment(const TextFragmentStyle& style, bool newline)
: style(style), newline(newline) {}
TextFragmentStyle style;
std::vector<TextFragment> sub_fragments;
std::string body;
/// PNG image data.
std::vector<uint8_t> image;
bool newline = false;
bool is_empty() const;
};
class TextSample {
public:
TextSample(const std::string& id,
int64_t start_time,
int64_t end_time,
const TextSettings& settings,
const TextFragment& body);
const std::string& id() const { return id_; }
int64_t start_time() const { return start_time_; }
int64_t duration() const { return duration_; }
const TextSettings& settings() const { return settings_; }
const TextFragment& body() const { return body_; }
int64_t EndTime() const;
int32_t sub_stream_index() const { return sub_stream_index_; }
void set_sub_stream_index(int32_t idx) { sub_stream_index_ = idx; }
private:
// Allow the compiler generated copy constructor and assignment operator
// intentionally. Since the text data is typically small, the performance
// impact is minimal.
const std::string id_;
const int64_t start_time_ = 0;
const int64_t duration_ = 0;
const TextSettings settings_;
const TextFragment body_;
int32_t sub_stream_index_ = -1;
};
} // namespace media
} // namespace shaka
#endif // PACKAGER_MEDIA_BASE_TEXT_SAMPLE_H_