Skip to content

Commit

Permalink
Revert "fix: Fix VTT cue timing in HLS (shaka-project#4217)"
Browse files Browse the repository at this point in the history
This reverts commit 69d1c14.
  • Loading branch information
nyanmisaka committed Oct 6, 2022
1 parent caee130 commit 344e865
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 163 deletions.
128 changes: 37 additions & 91 deletions lib/media/media_source_engine.js
Expand Up @@ -106,12 +106,6 @@ shaka.media.MediaSourceEngine = class {

/** @private {string} */
this.url_ = '';

/** @private {boolean} */
this.sequenceMode_ = false;

/** @private {!shaka.util.PublicPromise.<number>} */
this.textSequenceModeOffset_ = new shaka.util.PublicPromise();
}

/**
Expand Down Expand Up @@ -337,8 +331,6 @@ shaka.media.MediaSourceEngine = class {

await this.mediaSourceOpen_;

this.sequenceMode_ = sequenceMode;

for (const contentType of streamsByType.keys()) {
const stream = streamsByType.get(contentType);
goog.asserts.assert(
Expand All @@ -356,9 +348,11 @@ shaka.media.MediaSourceEngine = class {
mimeType =
shaka.media.Transmuxer.convertTsCodecs(contentType, mimeType);
}

const sourceBuffer = this.mediaSource_.addSourceBuffer(mimeType);

if (sequenceMode) {
sourceBuffer.mode =
shaka.media.MediaSourceEngine.SourceBufferMode_.SEQUENCE;
}
this.eventManager_.listen(
sourceBuffer, 'error',
() => this.onError_(contentType));
Expand Down Expand Up @@ -521,29 +515,35 @@ shaka.media.MediaSourceEngine = class {
* @param {?boolean} hasClosedCaptions True if the buffer contains CEA closed
* captions
* @param {boolean=} seeked True if we just seeked
* @param {boolean=} sequenceMode True if sequence mode
* @return {!Promise}
*/
async appendBuffer(
contentType, data, startTime, endTime, hasClosedCaptions, seeked) {
async appendBuffer(contentType, data, startTime, endTime, hasClosedCaptions,
seeked, sequenceMode) {
const ContentType = shaka.util.ManifestParserUtils.ContentType;

if (contentType == ContentType.TEXT) {
if (this.sequenceMode_) {
// This won't be known until the first video segment is appended.
const offset = await this.textSequenceModeOffset_;
this.textEngine_.setTimestampOffset(offset);
if (startTime != null && sequenceMode && contentType != ContentType.TEXT) {
// If we just cleared buffer and is on an unbuffered seek, we need to set
// the new timestampOffset of the sourceBuffer.
// Don't do this for text streams, though, since they don't use
// MediaSource anyway.
if (seeked) {
const timestampOffset = /** @type {number} */ (startTime);
this.enqueueOperation_(
contentType,
() => this.setTimestampOffset_(contentType, timestampOffset));
}
await this.textEngine_.appendBuffer(data, startTime, endTime);
return;
}

if (this.transmuxers_[contentType]) {
if (contentType == ContentType.TEXT) {
await this.textEngine_.appendBuffer(data, startTime, endTime);
} else if (this.transmuxers_[contentType]) {
const transmuxedData =
await this.transmuxers_[contentType].transmux(data);
// For HLS CEA-608/708 CLOSED-CAPTIONS, text data is embedded in
// the video stream, so textEngine may not have been initialized.
if (!this.textEngine_) {
this.reinitText('text/vtt', this.sequenceMode_);
this.reinitText('text/vtt', sequenceMode || false);
}

if (transmuxedData.metadata) {
Expand All @@ -562,10 +562,15 @@ shaka.media.MediaSourceEngine = class {
closedCaptions, startTime, endTime, videoOffset);
}

data = transmuxedData.data;
let transmuxedSegment = transmuxedData.data;
transmuxedSegment = this.workAroundBrokenPlatforms_(
transmuxedSegment, startTime, contentType);

await this.enqueueOperation_(
contentType, () => this.append_(contentType, transmuxedSegment));
} else if (hasClosedCaptions) {
if (!this.textEngine_) {
this.reinitText('text/vtt', this.sequenceMode_);
this.reinitText('text/vtt', sequenceMode || false);
}
// If it is the init segment for closed captions, initialize the closed
// caption parser.
Expand All @@ -580,78 +585,19 @@ shaka.media.MediaSourceEngine = class {
closedCaptions, startTime, endTime, videoOffset);
}
}
}

data = this.workAroundBrokenPlatforms_(data, startTime, contentType);

const sourceBuffer = this.sourceBuffers_[contentType];
const SEQUENCE = shaka.media.MediaSourceEngine.SourceBufferMode_.SEQUENCE;

if (this.sequenceMode_ && sourceBuffer.mode != SEQUENCE &&
startTime != null) {
// This is the first media segment to be appended to a SourceBuffer in
// sequence mode. We set the mode late so that we can trick MediaSource
// into extracting a timestamp for us to align text segments in sequence
// mode.

// Timestamps can only be reliably extracted from video, not audio.
// Packed audio formats do not have internal timestamps at all.
// Prefer video for this when available.
const isBestSourceBufferForTimestamps =
contentType == ContentType.VIDEO ||
!(ContentType.VIDEO in this.sourceBuffers_);
if (isBestSourceBufferForTimestamps) {
// Append the segment in segments mode first, with offset of 0 and an
// open append window.
const originalRange =
[sourceBuffer.appendWindowStart, sourceBuffer.appendWindowEnd];
sourceBuffer.appendWindowStart = 0;
sourceBuffer.appendWindowEnd = Infinity;

const originalOffset = sourceBuffer.timestampOffset;
sourceBuffer.timestampOffset = 0;

await this.enqueueOperation_(
contentType, () => this.append_(contentType, data));

// Reset the offset and append window.
sourceBuffer.timestampOffset = originalOffset;
sourceBuffer.appendWindowStart = originalRange[0];
sourceBuffer.appendWindowEnd = originalRange[1];

// Now get the timestamp of the segment and compute the offset for text
// segments.
const mediaStartTime = shaka.media.TimeRangesUtils.bufferStart(
this.getBuffered_(contentType));
const textOffset = (startTime || 0) - (mediaStartTime || 0);
this.textSequenceModeOffset_.resolve(textOffset);

// Finally, clear the buffer.
await this.enqueueOperation_(
contentType,
() => this.remove_(contentType, 0, this.mediaSource_.duration));
}
data = this.workAroundBrokenPlatforms_(data, startTime, contentType);

// Now switch to sequence mode and fall through to our normal operations.
sourceBuffer.mode = SEQUENCE;
}
await this.enqueueOperation_(
contentType,
() => this.append_(contentType, data));
} else {
data = this.workAroundBrokenPlatforms_(data, startTime, contentType);

if (startTime != null && this.sequenceMode_ &&
contentType != ContentType.TEXT) {
// In sequence mode, for non-text streams, if we just cleared the buffer
// and are performing an unbuffered seek, we need to set a new
// timestampOffset on the sourceBuffer.
if (seeked) {
const timestampOffset = /** @type {number} */ (startTime);
this.enqueueOperation_(
contentType,
() => this.setTimestampOffset_(contentType, timestampOffset));
}
await this.enqueueOperation_(
contentType,
() => this.append_(contentType, data));
}

await this.enqueueOperation_(
contentType,
() => this.append_(contentType, data));
}

/**
Expand Down
3 changes: 2 additions & 1 deletion lib/media/streaming_engine.js
Expand Up @@ -1605,7 +1605,8 @@ shaka.media.StreamingEngine = class {
reference.syncTime == null ? reference.startTime : reference.syncTime,
reference.endTime,
hasClosedCaptions,
seeked);
seeked,
this.manifest_.sequenceMode);
this.destroyer_.ensureNotDestroyed();
shaka.log.v2(logPrefix, 'appended media segment');
}
Expand Down
14 changes: 7 additions & 7 deletions lib/text/vtt_text_parser.js
Expand Up @@ -68,15 +68,13 @@ shaka.text.VttTextParser = class {
// to the beginning of each segment.
// NOTE: "periodStart" is the timestamp offset applied via TextEngine.
// It is no longer closely tied to periods, but the name stuck around.
// NOTE: This offset and the flag choosing its meaning have no effect on
// HLS content, which should use X-TIMESTAMP-MAP and periodStart instead.
let offset = time.vttOffset;

// Only use 'X-TIMESTAMP-MAP' in sequence mode, as that is currently
// shorthand for HLS. Note that an offset based on the first video
// timestamp has already been extracted, and appears in periodStart.
// The relative offset from X-TIMESTAMP-MAP will be added to that for HLS.
if (blocks[0].includes('X-TIMESTAMP-MAP') && this.sequenceMode_) {
// Do not honor the 'X-TIMESTAMP-MAP' value when in sequence mode.
// That is because it is used mainly (solely?) to account for the timestamp
// offset of the video/audio; when in sequence mode, we normalize that
// timestamp offset to 0, so we should not account for it.
if (blocks[0].includes('X-TIMESTAMP-MAP') && !this.sequenceMode_) {
// https://bit.ly/2K92l7y
// The 'X-TIMESTAMP-MAP' header is used in HLS to align text with
// the rest of the media.
Expand Down Expand Up @@ -111,6 +109,8 @@ shaka.text.VttTextParser = class {
mpegTime += shaka.text.VttTextParser.TS_ROLLOVER_;
}

// Apple-encoded HLS content uses absolute timestamps, so assume the
// presence of the map tag means the content uses absolute timestamps.
offset = time.periodStart + mpegTime / mpegTimescale - cueTime;
}
}
Expand Down
69 changes: 5 additions & 64 deletions test/text/vtt_text_parser_unit.js
Expand Up @@ -535,61 +535,7 @@ describe('VttTextParser', () => {
'Test\n\n' +
'00:00:40.000 --> 00:00:50.000 line:-1\n' +
'Test2',
{periodStart: 0, segmentStart: 25, segmentEnd: 65, vttOffset: 0},
/* sequenceMode= */ true);
});

it('ignores X-TIMESTAMP-MAP header if not in sequence mode', () => {
verifyHelper(
[
{startTime: 20, endTime: 40, payload: 'Test'},
{startTime: 40, endTime: 50, payload: 'Test2'},
],
'WEBVTT\n' +
'X-TIMESTAMP-MAP=MPEGTS:900000,LOCAL:01:00:00.000\n\n' +
'00:00:20.000 --> 00:00:40.000 line:0\n' +
'Test\n\n' +
'00:00:40.000 --> 00:00:50.000 line:-1\n' +
'Test2',
{periodStart: 0, segmentStart: 25, segmentEnd: 65, vttOffset: 0},
/* sequenceMode= */ false);
});

it('parses X-TIMESTAMP-MAP header with non-zero local base', () => {
verifyHelper(
[
{startTime: 1800, endTime: 1810, payload: 'Test'},
{startTime: 1820, endTime: 1830, payload: 'Test2'},
],
// 162000000 = 30 * 60 * 90k = 30 minutes for the TS part of the map.
// The local (VTT) part of the map is 1 hour.
// So text times of 1 hour map to media times of 30 minutes.
'WEBVTT\n' +
'X-TIMESTAMP-MAP=MPEGTS:162000000,LOCAL:01:00:00.000\n\n' +
'01:00:00.000 --> 01:00:10.000 line:0\n' +
'Test\n\n' +
'01:00:20.000 --> 01:00:30.000 line:-1\n' +
'Test2',
{periodStart: 0, segmentStart: 25, segmentEnd: 65, vttOffset: 0},
/* sequenceMode= */ true);
});

it('combines X-TIMESTAMP-MAP header with periodStart', () => {
verifyHelper(
[
{startTime: 130, endTime: 150, payload: 'Test'},
{startTime: 150, endTime: 160, payload: 'Test2'},
],
// 900000 = 10 sec, so expect every timestamp to be 10
// seconds ahead of what is specified.
'WEBVTT\n' +
'X-TIMESTAMP-MAP=MPEGTS:900000,LOCAL:00:00:00.000\n\n' +
'00:00:20.000 --> 00:00:40.000 line:0\n' +
'Test\n\n' +
'00:00:40.000 --> 00:00:50.000 line:-1\n' +
'Test2',
{periodStart: 100, segmentStart: 25, segmentEnd: 65, vttOffset: 0},
/* sequenceMode= */ true);
{periodStart: 0, segmentStart: 25, segmentEnd: 65, vttOffset: 0});
});

it('handles timestamp rollover with X-TIMESTAMP-MAP header', () => {
Expand All @@ -605,8 +551,7 @@ describe('VttTextParser', () => {
'Test',
// Non-null segmentStart takes precedence over X-TIMESTAMP-MAP.
// This protects us from rollover in the MPEGTS field.
{periodStart: 0, segmentStart: 95440, segmentEnd: 95550, vttOffset: 0},
/* sequenceMode= */ true);
{periodStart: 0, segmentStart: 95440, segmentEnd: 95550, vttOffset: 0});

verifyHelper(
[
Expand All @@ -619,8 +564,7 @@ describe('VttTextParser', () => {
'X-TIMESTAMP-MAP=MPEGTS:9745408,LOCAL:00:00:00.000\n\n' +
'00:00:00.000 --> 00:00:02.000 line:0\n' +
'Test2',
{periodStart: 0, segmentStart: 95550, segmentEnd: 95560, vttOffset: 0},
/* sequenceMode= */ true);
{periodStart: 0, segmentStart: 95550, segmentEnd: 95560, vttOffset: 0});
});

it('supports global style blocks', () => {
Expand Down Expand Up @@ -1034,14 +978,11 @@ describe('VttTextParser', () => {
* @param {!Array} cues
* @param {string} text
* @param {shaka.extern.TextParser.TimeContext} time
* @param {boolean=} sequenceMode
*/
function verifyHelper(cues, text, time, sequenceMode = false) {
function verifyHelper(cues, text, time) {
const data =
shaka.util.BufferUtils.toUint8(shaka.util.StringUtils.toUTF8(text));
const parser = new shaka.text.VttTextParser();
parser.setSequenceMode(sequenceMode);
const result = parser.parseMedia(data, time);
const result = new shaka.text.VttTextParser().parseMedia(data, time);

const expected = cues.map((cue) => {
if (cue.nestedCues) {
Expand Down

0 comments on commit 344e865

Please sign in to comment.