From bf67d87387b1dfc4d3d8e0661bfe4efb1e4083b2 Mon Sep 17 00:00:00 2001 From: Theodore Abshire Date: Wed, 12 Jan 2022 16:02:24 -0800 Subject: [PATCH] fix(text): Fix caption overlap. This changes the TTML parser to not allow cue regions to be inherited to the children of the element the region was originally assigned on, except for the purposes of styles (colors, etc). To allow regions on elements "above" the cues in TTML, such as the or
elements, this also changes the TTML parser to render the full structure of the TTML file as a tree of cues. The end result will be a single cue representing the , with children representing the
elements inside it, and those will have children that represent the actual cues. Now that our text displayer can intelligently update child cues as they enter or leave the display window, this approach should be possible. Closes #3850 Closes #3741 Change-Id: Ia8d750daa06920610c04e9b26e29d2d304eaf8a9 --- externs/shaka/text.js | 10 + lib/text/cue.js | 6 + lib/text/ttml_text_parser.js | 90 +++-- lib/text/ui_text_displayer.js | 2 +- test/test/util/ttml_utils.js | 81 ++++ test/text/mp4_ttml_parser_unit.js | 29 +- test/text/ttml_text_parser_unit.js | 602 ++++++++++++++++++----------- 7 files changed, 534 insertions(+), 286 deletions(-) create mode 100644 test/test/util/ttml_utils.js diff --git a/externs/shaka/text.js b/externs/shaka/text.js index 1fff8609e1..979f427ef6 100644 --- a/externs/shaka/text.js +++ b/externs/shaka/text.js @@ -363,6 +363,16 @@ shaka.extern.Cue = class { */ this.nestedCues; + /** + * If true, this represents a container element that is "above" the main + * cues. For example, the and
tags that contain the

tags + * in a TTML file. This controls the flow of the final cues; any nested cues + * within an "isContainer" cue will be laid out as separate lines. + * @type {boolean} + * @exportDoc + */ + this.isContainer; + /** * Whether or not the cue only acts as a line break between two nested cues. * Should only appear in nested cues. diff --git a/lib/text/cue.js b/lib/text/cue.js index 40d24cb515..485aa58a55 100644 --- a/lib/text/cue.js +++ b/lib/text/cue.js @@ -222,6 +222,12 @@ shaka.text.Cue = class { */ this.nestedCues = []; + /** + * @override + * @exportInterface + */ + this.isContainer = false; + /** * @override * @exportInterface diff --git a/lib/text/ttml_text_parser.js b/lib/text/ttml_text_parser.js index 3b89f70ee5..7dedda641a 100644 --- a/lib/text/ttml_text_parser.js +++ b/lib/text/ttml_text_parser.js @@ -147,28 +147,15 @@ shaka.text.TtmlTextParser = class { shaka.util.Error.Code.INVALID_TEXT_CUE, ' can only be inside

in TTML'); } + } - const pChildren = XmlUtils.findChildren(div, 'p'); - if (pChildren && pChildren.length) { - for (const p of pChildren) { - const cue = TtmlTextParser.parseCue_( - p, time.periodStart, rateInfo, metadataElements, styles, - regionElements, cueRegions, whitespaceTrim, - cellResolutionInfo, /* parentCueElement= */ null); - if (cue) { - cues.push(cue); - } - } - } else { - // Only used for parsing the background image - const cue = TtmlTextParser.parseCue_( - div, time.periodStart, rateInfo, metadataElements, styles, - regionElements, cueRegions, whitespaceTrim, - cellResolutionInfo, /* parentCueElement= */ null); - if (cue) { - cues.push(cue); - } - } + const cue = TtmlTextParser.parseCue_( + body, time.periodStart, rateInfo, metadataElements, styles, + regionElements, cueRegions, whitespaceTrim, + cellResolutionInfo, /* parentCueElement= */ null, + /* isContent= */ false); + if (cue) { + cues.push(cue); } } @@ -188,16 +175,17 @@ shaka.text.TtmlTextParser = class { * @param {boolean} whitespaceTrim * @param {?{columns: number, rows: number}} cellResolution * @param {?Element} parentCueElement + * @param {boolean} isContent * @return {shaka.text.Cue} * @private */ static parseCue_( cueNode, offset, rateInfo, metadataElements, styles, regionElements, - cueRegions, whitespaceTrim, cellResolution, parentCueElement) { + cueRegions, whitespaceTrim, cellResolution, parentCueElement, isContent) { /** @type {Element} */ let cueElement; /** @type {Element} */ - let parentElement = /** @type {Element} */(cueNode.parentNode); + let parentElement = /** @type {Element} */ (cueNode.parentNode); if (cueNode.nodeType == Node.COMMENT_NODE) { // The comments do not contain information that interests us here. @@ -205,6 +193,12 @@ shaka.text.TtmlTextParser = class { } if (cueNode.nodeType == Node.TEXT_NODE) { + if (!isContent) { + // Ignore text elements outside the content. For example, whitespace + // on the same lexical level as the

elements, in a document with + // xml:space="preserve", should not be renderer. + return null; + } // This should generate an "anonymous span" according to the TTML spec. // So pretend the element was a . parentElement was set above, so // we should still be able to correctly traverse up for timing @@ -219,6 +213,21 @@ shaka.text.TtmlTextParser = class { } goog.asserts.assert(cueElement, 'cueElement should be non-null!'); + let imageElement = null; + for (const nameSpace of shaka.text.TtmlTextParser.smpteNsList_) { + imageElement = shaka.text.TtmlTextParser.getElementsFromCollection_( + cueElement, 'backgroundImage', metadataElements, '#', + nameSpace)[0]; + if (imageElement) { + break; + } + } + + const parentIsContent = isContent; + if (cueNode.nodeName == 'p' || imageElement) { + isContent = true; + } + const spaceStyle = cueElement.getAttribute('xml:space') || (whitespaceTrim ? 'default' : 'preserve'); @@ -245,6 +254,7 @@ shaka.text.TtmlTextParser = class { localWhitespaceTrim, cellResolution, cueElement, + isContent, ); // This node may or may not generate a nested cue. @@ -254,7 +264,7 @@ shaka.text.TtmlTextParser = class { } } - const isNested = /** @type {boolean} */(parentCueElement != null); + const isNested = /** @type {boolean} */ (parentCueElement != null); // In this regex, "\S" means "non-whitespace character". const hasTextContent = /\S/.test(cueElement.textContent); @@ -336,6 +346,13 @@ shaka.text.TtmlTextParser = class { const cue = new shaka.text.Cue(start, end, payload); cue.nestedCues = nestedCues; + if (!isContent) { + // If this is not a

element or a

with images, and it has no + // parent that was a

element, then it's part of the outer containers + // (e.g. the or a normal

element within it). + cue.isContainer = true; + } + if (cellResolution) { cue.cellResolution = cellResolution; } @@ -343,23 +360,16 @@ shaka.text.TtmlTextParser = class { // Get other properties if available. const regionElement = shaka.text.TtmlTextParser.getElementsFromCollection_( cueElement, 'region', regionElements, /* prefix= */ '')[0]; - if (regionElement && regionElement.getAttribute('xml:id')) { - const regionId = regionElement.getAttribute('xml:id'); - cue.region = cueRegions.filter((region) => region.id == regionId)[0]; - } - - let imageElement = null; - for (const nameSpace of shaka.text.TtmlTextParser.smpteNsList_) { - imageElement = shaka.text.TtmlTextParser.getElementsFromCollection_( - cueElement, 'backgroundImage', metadataElements, '#', - nameSpace)[0]; - if (imageElement) { - break; + // Do not actually apply that region unless it is non-inherited, though. + // This makes it so that, if a parent element has a region, the children + // don't also all independently apply the positioning of that region. + if (cueElement.hasAttribute('region')) { + if (regionElement && regionElement.getAttribute('xml:id')) { + const regionId = regionElement.getAttribute('xml:id'); + cue.region = cueRegions.filter((region) => region.id == regionId)[0]; } } - const isLeaf = nestedCues.length == 0; - let regionElementForStyle = regionElement; if (parentCueElement && isNested && !cueElement.getAttribute('region') && !cueElement.getAttribute('style')) { @@ -375,8 +385,8 @@ shaka.text.TtmlTextParser = class { regionElementForStyle, imageElement, styles, - isNested, - isLeaf); + /** isNested= */ parentIsContent, // "nested in a
" doesn't count. + /** isLeaf= */ (nestedCues.length == 0)); return cue; } diff --git a/lib/text/ui_text_displayer.js b/lib/text/ui_text_displayer.js index 04ab02bcb8..08c476c736 100644 --- a/lib/text/ui_text_displayer.js +++ b/lib/text/ui_text_displayer.js @@ -456,7 +456,7 @@ shaka.text.UITextDisplayer = class { // The displayAlign attribute specifies the vertical alignment of the // captions inside the text container. Before means at the top of the // text container, and after means at the bottom. - if (isNested) { + if (isNested && !parents[parents.length - 1].isContainer) { style.display = 'inline'; } else { style.display = 'flex'; diff --git a/test/test/util/ttml_utils.js b/test/test/util/ttml_utils.js new file mode 100644 index 0000000000..4489e0d5db --- /dev/null +++ b/test/test/util/ttml_utils.js @@ -0,0 +1,81 @@ +/*! @license + * Shaka Player + * Copyright 2016 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +goog.provide('shaka.test.TtmlUtils'); + +shaka.test.TtmlUtils = class { + /** + * @param {!Array} expectedCues + * @param {!Array} actualCues + * @param {!Object} bodyProperties + * @param {Object=} divProperties + */ + static verifyHelper(expectedCues, actualCues, bodyProperties, divProperties) { + const mapExpected = (cue) => { + if (cue.region) { + cue.region = jasmine.objectContaining(cue.region); + } + + if (cue.nestedCues && (cue.nestedCues instanceof Array)) { + cue.nestedCues = cue.nestedCues.map(mapExpected); + } + + if (cue.isContainer == undefined) { + // If not specified to be true, check for isContainer to be false. + cue.isContainer = false; + } + + return jasmine.objectContaining(cue); + }; + + /** + * @param {!Object} properties + * @return {!shaka.extern.Cue} + */ + const makeContainer = (properties) => { + const region = { + id: '', + viewportAnchorX: 0, + viewportAnchorY: 0, + regionAnchorX: 0, + regionAnchorY: 0, + width: 100, + height: 100, + widthUnits: shaka.text.CueRegion.units.PERCENTAGE, + heightUnits: shaka.text.CueRegion.units.PERCENTAGE, + viewportAnchorUnits: shaka.text.CueRegion.units.PERCENTAGE, + scroll: '', + }; + const containerCue = /** @type {!shaka.extern.Cue} */ ({ + region, + nestedCues: jasmine.any(Object), + payload: '', + startTime: 0, + endTime: Infinity, + isContainer: true, + }); + Object.assign(containerCue, properties); + return mapExpected(containerCue); + }; + + if (expectedCues.length == 0 && !divProperties) { + expect(actualCues.length).toBe(0); + } else { + // Body. + expect(actualCues.length).toBe(1); + const body = actualCues[0]; + expect(body).toEqual(makeContainer(bodyProperties)); + + // Div. + expect(body.nestedCues.length).toBe(1); + const div = body.nestedCues[0]; + expect(div).toEqual(makeContainer(divProperties || bodyProperties)); + + // Cues. + expect(div.nestedCues).toEqual(expectedCues.map(mapExpected)); + } + } +}; diff --git a/test/text/mp4_ttml_parser_unit.js b/test/text/mp4_ttml_parser_unit.js index 22a1b8799b..d1410062bd 100644 --- a/test/text/mp4_ttml_parser_unit.js +++ b/test/text/mp4_ttml_parser_unit.js @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +goog.require('shaka.test.TtmlUtils'); goog.require('shaka.test.Util'); goog.require('shaka.text.Mp4TtmlParser'); goog.require('shaka.util.BufferUtils'); @@ -48,7 +49,14 @@ describe('Mp4TtmlParser', () => { parser.parseInit(ttmlInitSegment); const time = {periodStart: 0, segmentStart: 0, segmentEnd: 0}; const ret = parser.parseMedia(ttmlSegmentMultipleMDAT, time); - expect(ret.length).toBe(20); + // Bodies. + expect(ret.length).toBe(2); + // Divs. + expect(ret[0].nestedCues.length).toBe(1); + expect(ret[1].nestedCues.length).toBe(1); + // Cues. + expect(ret[0].nestedCues[0].nestedCues.length).toBe(10); + expect(ret[1].nestedCues[0].nestedCues.length).toBe(10); }); it('accounts for offset', () => { @@ -159,22 +167,7 @@ describe('Mp4TtmlParser', () => { parser.parseInit(ttmlInitSegment); const time = {periodStart: 0, segmentStart: 0, segmentEnd: 0}; const result = parser.parseMedia(ttmlSegment, time); - verifyHelper(cues, result); + shaka.test.TtmlUtils.verifyHelper( + cues, result, {startTime: 23, endTime: 53.5}); }); - - function verifyHelper(/** !Array */ expected, /** !Array */ actual) { - const mapExpected = (cue) => { - if (cue.region) { - cue.region = jasmine.objectContaining(cue.region); - } - - if (cue.nestedCues) { - cue.nestedCues = cue.nestedCues.map(mapExpected); - } - - return jasmine.objectContaining(cue); - }; - - expect(actual).toEqual(expected.map(mapExpected)); - } }); diff --git a/test/text/ttml_text_parser_unit.js b/test/text/ttml_text_parser_unit.js index ddd8e62d98..fc81aed030 100644 --- a/test/text/ttml_text_parser_unit.js +++ b/test/text/ttml_text_parser_unit.js @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +goog.require('shaka.test.TtmlUtils'); goog.require('shaka.test.Util'); goog.require('shaka.text.Cue'); goog.require('shaka.text.CueRegion'); @@ -21,20 +22,23 @@ describe('TtmlTextParser', () => { it('supports no cues', () => { verifyHelper([], '', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {}); }); it('supports empty text string', () => { verifyHelper([], '', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {}); }); it('supports div with no cues but whitespace', () => { verifyHelper( [], '
\r\n
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {}); }); it('supports xml:space', () => { @@ -60,7 +64,8 @@ describe('TtmlTextParser', () => { }, ], '' + ttBody + '', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.03, endTime: 62.05}); // When xml:space="preserve", take them into account. verifyHelper( [ @@ -85,7 +90,8 @@ describe('TtmlTextParser', () => { }, ], '' + ttBody + '', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.03, endTime: 62.05}); // The default value for xml:space is "default". verifyHelper( [ @@ -100,7 +106,8 @@ describe('TtmlTextParser', () => { }, ], '' + ttBody + '', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.03, endTime: 62.05}); // Any other value is rejected as an error. errorHelper(shaka.util.Error.Code.INVALID_XML, @@ -130,7 +137,8 @@ describe('TtmlTextParser', () => { }, ], '' + ttBody + '', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.03, endTime: 62.05}); }); it('rejects invalid ttml', () => { @@ -199,7 +207,8 @@ describe('TtmlTextParser', () => { 'First cue
Second cue' + '

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 3723.2}); }); it('supports anonymous spans as nestedCues of paragraphs', () => { @@ -234,7 +243,8 @@ describe('TtmlTextParser', () => { 'First cue
Second cue' + '

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 3723.2}); }); it('supports multiple levels of nestedCues', () => { @@ -292,7 +302,8 @@ describe('TtmlTextParser', () => { '' + '

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 3723.2}); }); it('inherits timing information of nested cues if unprovided', () => { @@ -307,9 +318,11 @@ describe('TtmlTextParser', () => { }, ], '' + - '
Test
' + + '

' + + 'Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 3723.2}); }); it('does not discard cues with image subcues', () => { @@ -334,11 +347,12 @@ describe('TtmlTextParser', () => { '' + '' + 'base64EncodedImage' + - '' + - '
' + - '', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + '
' + + '

' + + '
', + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 3723.2}); }); it('supports colon formatted time', () => { @@ -349,7 +363,8 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 3723.2}); }); it('accounts for offset', () => { @@ -360,7 +375,8 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 7, segmentStart: 0, segmentEnd: 0}); + {periodStart: 7, segmentStart: 0, segmentEnd: 0}, + {startTime: 69.05, endTime: 3730.2}); }); it('supports nested cues with an offset', () => { @@ -382,7 +398,8 @@ describe('TtmlTextParser', () => { '
' + '

Nested cue

' + '
', - {periodStart: 7, segmentStart: 0, segmentEnd: 0}); + {periodStart: 7, segmentStart: 0, segmentEnd: 0}, + {startTime: 69.05, endTime: 3730.2}); }); it('supports time in 0.00h 0.00m 0.00s format', () => { @@ -393,7 +410,8 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 3567.03, endTime: 5402.3}); }); it('supports time with frame rate', () => { @@ -406,7 +424,8 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 615.5, endTime: 663}); }); it('supports time with frame rate multiplier', () => { @@ -419,7 +438,8 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 615.5, endTime: 663}); }); it('supports time with subframes', () => { @@ -436,7 +456,8 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: Util.closeTo(615.5 + 1 / 60), endTime: 663}); }); it('supports time in frame format', () => { @@ -449,7 +470,8 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 2.5, endTime: Util.closeTo(10.01)}); }); it('supports time in tick format', () => { @@ -462,7 +484,8 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 5, endTime: Util.closeTo(6.02)}); }); it('supports time with duration', () => { @@ -473,7 +496,8 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 67.05}); }); it('supports comments in the body', () => { @@ -482,7 +506,29 @@ describe('TtmlTextParser', () => { '
' + '' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {}); + }); + + it('does not inherit regions', () => { + verifyHelper( + [ + { + startTime: 62.05, + endTime: 3723.2, + payload: 'Test', + }, + ], + '' + + '' + + '' + + '' + + '
' + + '

Test

' + + '
', + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 3723.2, region: {id: 'subtitleArea'}}, + {startTime: 62.05, endTime: 3723.2}); }); it('parses alignment from textAlign attribute of a region', () => { @@ -502,7 +548,9 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 3723.2, region: {id: 'subtitleArea'}}, + {startTime: 62.05, endTime: 3723.2}); }); it('allows non-standard namespace names', () => { @@ -522,7 +570,9 @@ describe('TtmlTextParser', () => { '
' + '

Test

' + '
', - {periodStart: 0, segmentStart: 0, segmentEnd: 0}); + {periodStart: 0, segmentStart: 0, segmentEnd: 0}, + {startTime: 62.05, endTime: 3723.2, region: {id: 'subtitleArea'}}, + {startTime: 62.05, endTime: 3723.2}); }); it('parses alignment from