diff --git a/api-extractor/report/hls.js.api.md b/api-extractor/report/hls.js.api.md index 1652f30bcd6..405d1b2d866 100644 --- a/api-extractor/report/hls.js.api.md +++ b/api-extractor/report/hls.js.api.md @@ -2143,9 +2143,19 @@ export type TSDemuxerConfig = { // @public (undocumented) export interface UserdataSample { // (undocumented) - bytes: Uint8Array; + bytes?: Uint8Array; + // (undocumented) + payloadType?: number; // (undocumented) pts: number; + // (undocumented) + type?: number; + // (undocumented) + userData?: string; + // (undocumented) + userDataBytes?: Uint8Array; + // (undocumented) + uuid?: string; } // Warnings were encountered during analysis: diff --git a/src/controller/timeline-controller.ts b/src/controller/timeline-controller.ts index f77d35ff4b3..5a70207daf4 100644 --- a/src/controller/timeline-controller.ts +++ b/src/controller/timeline-controller.ts @@ -664,23 +664,25 @@ export class TimelineController implements ComponentAPI { } private extractCea608Data(byteArray: Uint8Array): number[][] { - const count = byteArray[0] & 31; - let position = 2; const actualCCBytes: number[][] = [[], []]; + const count = byteArray[0] & 0x1f; + let position = 2; for (let j = 0; j < count; j++) { const tmpByte = byteArray[position++]; const ccbyte1 = 0x7f & byteArray[position++]; const ccbyte2 = 0x7f & byteArray[position++]; - const ccValid = (4 & tmpByte) !== 0; - const ccType = 3 & tmpByte; - if (ccbyte1 === 0 && ccbyte2 === 0) { continue; } - + const ccValid = (0x04 & tmpByte) !== 0; // Support all four channels if (ccValid) { - if (ccType === 0 || ccType === 1) { + const ccType = 0x03 & tmpByte; + if ( + 0x00 /* CEA608 field1*/ === ccType || + 0x01 /* CEA608 field2*/ === ccType + ) { + // Exclude CEA708 CC data. actualCCBytes[ccType].push(ccbyte1); actualCCBytes[ccType].push(ccbyte2); } diff --git a/src/demux/aacdemuxer.ts b/src/demux/aacdemuxer.ts index dfa742cac90..b3af70bda08 100644 --- a/src/demux/aacdemuxer.ts +++ b/src/demux/aacdemuxer.ts @@ -19,8 +19,13 @@ class AACDemuxer extends BaseAudioDemuxer { this.config = config; } - resetInitSegment(audioCodec, videoCodec, duration) { - super.resetInitSegment(audioCodec, videoCodec, duration); + resetInitSegment( + initSegment: Uint8Array | undefined, + audioCodec: string | undefined, + videoCodec: string | undefined, + trackDuration: number + ) { + super.resetInitSegment(initSegment, audioCodec, videoCodec, trackDuration); this._audioTrack = { container: 'audio/adts', type: 'audio', @@ -30,7 +35,7 @@ class AACDemuxer extends BaseAudioDemuxer { isAAC: true, samples: [], manifestCodec: audioCodec, - duration: duration, + duration: trackDuration, inputTimeScale: 90000, dropped: 0, }; diff --git a/src/demux/base-audio-demuxer.ts b/src/demux/base-audio-demuxer.ts index 7eddaf558f5..19727ff4320 100644 --- a/src/demux/base-audio-demuxer.ts +++ b/src/demux/base-audio-demuxer.ts @@ -5,7 +5,7 @@ import type { DemuxedAudioTrack, AudioFrame, DemuxedMetadataTrack, - DemuxedAvcTrack, + DemuxedVideoTrack, DemuxedUserdataTrack, KeyData, } from '../types/demuxer'; @@ -20,7 +20,12 @@ class BaseAudioDemuxer implements Demuxer { protected cachedData: Uint8Array | null = null; protected initPTS: number | null = null; - resetInitSegment(audioCodec: string, videoCodec: string, duration: number) { + resetInitSegment( + initSegment: Uint8Array | undefined, + audioCodec: string | undefined, + videoCodec: string | undefined, + trackDuration: number + ) { this._id3Track = { type: 'id3', id: 3, @@ -109,7 +114,7 @@ class BaseAudioDemuxer implements Demuxer { return { audioTrack: track, - avcTrack: dummyTrack() as DemuxedAvcTrack, + videoTrack: dummyTrack() as DemuxedVideoTrack, id3Track, textTrack: dummyTrack() as DemuxedUserdataTrack, }; @@ -137,7 +142,7 @@ class BaseAudioDemuxer implements Demuxer { return { audioTrack: this._audioTrack, - avcTrack: dummyTrack() as DemuxedAvcTrack, + videoTrack: dummyTrack() as DemuxedVideoTrack, id3Track: this._id3Track, textTrack: dummyTrack() as DemuxedUserdataTrack, }; diff --git a/src/demux/dummy-demuxed-track.ts b/src/demux/dummy-demuxed-track.ts index 05c39f2d35f..ce1a6dc5704 100644 --- a/src/demux/dummy-demuxed-track.ts +++ b/src/demux/dummy-demuxed-track.ts @@ -1,11 +1,11 @@ import type { DemuxedTrack } from '../types/demuxer'; -export function dummyTrack(): DemuxedTrack { +export function dummyTrack(type = '', inputTimeScale = 90000): DemuxedTrack { return { - type: '', + type, id: -1, pid: -1, - inputTimeScale: 90000, + inputTimeScale, sequenceNumber: -1, samples: [], dropped: 0, diff --git a/src/demux/mp3demuxer.ts b/src/demux/mp3demuxer.ts index 459b685abdd..dc7c2c00bb5 100644 --- a/src/demux/mp3demuxer.ts +++ b/src/demux/mp3demuxer.ts @@ -9,8 +9,13 @@ import * as MpegAudio from './mpegaudio'; class MP3Demuxer extends BaseAudioDemuxer { static readonly minProbeByteLength: number = 4; - resetInitSegment(audioCodec, videoCodec, duration) { - super.resetInitSegment(audioCodec, videoCodec, duration); + resetInitSegment( + initSegment: Uint8Array | undefined, + audioCodec: string | undefined, + videoCodec: string | undefined, + trackDuration: number + ) { + super.resetInitSegment(initSegment, audioCodec, videoCodec, trackDuration); this._audioTrack = { container: 'audio/mpeg', type: 'audio', @@ -20,7 +25,7 @@ class MP3Demuxer extends BaseAudioDemuxer { isAAC: false, samples: [], manifestCodec: audioCodec, - duration: duration, + duration: trackDuration, inputTimeScale: 90000, dropped: 0, }; diff --git a/src/demux/mp4demuxer.ts b/src/demux/mp4demuxer.ts index c0946c1fd35..0d600a5fc8e 100644 --- a/src/demux/mp4demuxer.ts +++ b/src/demux/mp4demuxer.ts @@ -4,7 +4,7 @@ import { Demuxer, DemuxerResult, - PassthroughVideoTrack, + PassthroughTrack, DemuxedAudioTrack, DemuxedUserdataTrack, DemuxedMetadataTrack, @@ -15,6 +15,9 @@ import { segmentValidRange, appendUint8Array, parseEmsg, + parseSamples, + parseInitSegment, + RemuxerTrackIdConfig, } from '../utils/mp4-tools'; import { dummyTrack } from './dummy-demuxed-track'; import type { HlsEventEmitter } from '../events'; @@ -25,87 +28,147 @@ const emsgSchemePattern = /\/emsg[-/]ID3/i; class MP4Demuxer implements Demuxer { static readonly minProbeByteLength = 1024; private remainderData: Uint8Array | null = null; + private timeOffset: number = 0; private config: HlsConfig; + private videoTrack?: PassthroughTrack; + private audioTrack?: DemuxedAudioTrack; + private id3Track?: DemuxedMetadataTrack; + private txtTrack?: DemuxedUserdataTrack; constructor(observer: HlsEventEmitter, config: HlsConfig) { this.config = config; } - resetTimeStamp() {} + public resetTimeStamp() {} - resetInitSegment() {} + public resetInitSegment( + initSegment: Uint8Array, + audioCodec: string | undefined, + videoCodec: string | undefined, + trackDuration: number + ) { + const initData = parseInitSegment(initSegment); + const videoTrack = (this.videoTrack = dummyTrack( + 'video', + 1 + ) as PassthroughTrack); + const audioTrack = (this.audioTrack = dummyTrack( + 'audio', + 1 + ) as DemuxedAudioTrack); + const captionTrack = (this.txtTrack = dummyTrack( + 'text', + 1 + ) as DemuxedUserdataTrack); - resetContiguity(): void {} + this.id3Track = dummyTrack('id3', 1) as DemuxedMetadataTrack; + this.timeOffset = 0; - static probe(data) { + if (initData.video) { + const { id, timescale, codec } = initData.video; + videoTrack.id = id; + videoTrack.timescale = captionTrack.timescale = timescale; + videoTrack.codec = codec; + } + + if (initData.audio) { + const { id, timescale, codec } = initData.audio; + audioTrack.id = id; + audioTrack.timescale = timescale; + audioTrack.codec = codec; + } + + captionTrack.id = RemuxerTrackIdConfig.text; + videoTrack.sampleDuration = 0; + videoTrack.duration = audioTrack.duration = trackDuration; + } + + public resetContiguity(): void {} + + static probe(data: Uint8Array) { // ensure we find a moof box in the first 16 kB - return ( - findBox({ data: data, start: 0, end: Math.min(data.length, 16384) }, [ - 'moof', - ]).length > 0 - ); + data = data.length > 16384 ? data.subarray(0, 16384) : data; + return findBox(data, ['moof']).length > 0; } - demux(data: Uint8Array, timeOffset: number): DemuxerResult { + public demux(data: Uint8Array, timeOffset: number): DemuxerResult { + this.timeOffset = timeOffset; // Load all data into the avc track. The CMAF remuxer will look for the data in the samples object; the rest of the fields do not matter - let avcSamples = data; - const avcTrack = dummyTrack() as PassthroughVideoTrack; + let videoSamples = data; + const videoTrack = this.videoTrack as PassthroughTrack; + const textTrack = this.txtTrack as DemuxedUserdataTrack; if (this.config.progressive) { // Split the bytestream into two ranges: one encompassing all data up until the start of the last moof, and everything else. // This is done to guarantee that we're sending valid data to MSE - when demuxing progressively, we have no guarantee // that the fetch loader gives us flush moof+mdat pairs. If we push jagged data to MSE, it will throw an exception. if (this.remainderData) { - avcSamples = appendUint8Array(this.remainderData, data); + videoSamples = appendUint8Array(this.remainderData, data); } - const segmentedData = segmentValidRange(avcSamples); + const segmentedData = segmentValidRange(videoSamples); this.remainderData = segmentedData.remainder; - avcTrack.samples = segmentedData.valid || new Uint8Array(); + videoTrack.samples = segmentedData.valid || new Uint8Array(); } else { - avcTrack.samples = avcSamples; + videoTrack.samples = videoSamples; } - const id3Track = dummyTrack() as DemuxedMetadataTrack; - const emsgs = findBox(avcTrack.samples, ['emsg']); - if (emsgs) { - id3Track.inputTimeScale = 1; - emsgs.forEach(({ data, start, end }) => { - const emsgInfo = parseEmsg(data.subarray(start, end)); - if (emsgSchemePattern.test(emsgInfo.schemeIdUri)) { - const pts = Number.isFinite(emsgInfo.presentationTime) - ? emsgInfo.presentationTime! / emsgInfo.timeScale - : timeOffset + emsgInfo.presentationTimeDelta! / emsgInfo.timeScale; - const payload = emsgInfo.payload; - id3Track.samples.push({ - data: payload, - len: payload.byteLength, - dts: pts, - pts: pts, - }); - } - }); - } + const id3Track = this.extractID3Track(videoTrack, timeOffset); + textTrack.samples = parseSamples(timeOffset, videoTrack); return { - audioTrack: dummyTrack() as DemuxedAudioTrack, - avcTrack, + videoTrack, + audioTrack: this.audioTrack as DemuxedAudioTrack, id3Track, - textTrack: dummyTrack() as DemuxedUserdataTrack, + textTrack: this.txtTrack as DemuxedUserdataTrack, }; } - flush() { - const avcTrack = dummyTrack() as PassthroughVideoTrack; - avcTrack.samples = this.remainderData || new Uint8Array(); + public flush() { + const timeOffset = this.timeOffset; + const videoTrack = this.videoTrack as PassthroughTrack; + const textTrack = this.txtTrack as DemuxedUserdataTrack; + videoTrack.samples = this.remainderData || new Uint8Array(); this.remainderData = null; + const id3Track = this.extractID3Track(videoTrack, this.timeOffset); + textTrack.samples = parseSamples(timeOffset, videoTrack); + return { + videoTrack, audioTrack: dummyTrack() as DemuxedAudioTrack, - avcTrack, - id3Track: dummyTrack() as DemuxedMetadataTrack, + id3Track, textTrack: dummyTrack() as DemuxedUserdataTrack, }; } + private extractID3Track( + videoTrack: PassthroughTrack, + timeOffset: number + ): DemuxedMetadataTrack { + const id3Track = this.id3Track as DemuxedMetadataTrack; + if (videoTrack.samples.length) { + const emsgs = findBox(videoTrack.samples, ['emsg']); + if (emsgs) { + emsgs.forEach((data: Uint8Array) => { + const emsgInfo = parseEmsg(data); + if (emsgSchemePattern.test(emsgInfo.schemeIdUri)) { + const pts = Number.isFinite(emsgInfo.presentationTime) + ? emsgInfo.presentationTime! / emsgInfo.timeScale + : timeOffset + + emsgInfo.presentationTimeDelta! / emsgInfo.timeScale; + const payload = emsgInfo.payload; + id3Track.samples.push({ + data: payload, + len: payload.byteLength, + dts: pts, + pts: pts, + }); + } + }); + } + } + return id3Track; + } + demuxSampleAes( data: Uint8Array, keyData: KeyData, diff --git a/src/demux/transmuxer.ts b/src/demux/transmuxer.ts index 330646cf154..ddc2c6027f5 100644 --- a/src/demux/transmuxer.ts +++ b/src/demux/transmuxer.ts @@ -11,7 +11,7 @@ import PassThroughRemuxer from '../remux/passthrough-remuxer'; import ChunkCache from './chunk-cache'; import { appendUint8Array } from '../utils/mp4-tools'; import { logger } from '../utils/logger'; -import type { Demuxer, KeyData } from '../types/demuxer'; +import type { Demuxer, DemuxerResult, KeyData } from '../types/demuxer'; import type { Remuxer } from '../types/remuxer'; import type { TransmuxerResult, ChunkMetadata } from '../types/transmuxer'; import type { HlsConfig } from '../config'; @@ -201,7 +201,7 @@ export default class Transmuxer { }); } - const transmuxResults: Array = []; + const transmuxResults: TransmuxerResult[] = []; const { timeOffset } = currentTransmuxState; if (decrypter) { // The decrypter may have data cached, which needs to be demuxed. In this case we'll have two TransmuxResults @@ -246,8 +246,12 @@ export default class Transmuxer { return transmuxResults; } - private flushRemux(transmuxResults, demuxResult, chunkMeta) { - const { audioTrack, avcTrack, id3Track, textTrack } = demuxResult; + private flushRemux( + transmuxResults: TransmuxerResult[], + demuxResult: DemuxerResult, + chunkMeta: ChunkMetadata + ) { + const { audioTrack, videoTrack, id3Track, textTrack } = demuxResult; const { accurateTimeOffset, timeOffset } = this.currentTransmuxState; logger.log( `[transmuxer.ts]: Flushed fragment ${chunkMeta.sn}${ @@ -256,7 +260,7 @@ export default class Transmuxer { ); const remuxResult = this.remuxer!.remux( audioTrack, - avcTrack, + videoTrack, id3Track, textTrack, timeOffset, @@ -294,13 +298,18 @@ export default class Transmuxer { initSegmentData: Uint8Array | undefined, audioCodec: string | undefined, videoCodec: string | undefined, - duration: number + trackDuration: number ) { const { demuxer, remuxer } = this; if (!demuxer || !remuxer) { return; } - demuxer.resetInitSegment(audioCodec, videoCodec, duration); + demuxer.resetInitSegment( + initSegmentData, + audioCodec, + videoCodec, + trackDuration + ); remuxer.resetInitSegment(initSegmentData, audioCodec, videoCodec); } @@ -348,12 +357,12 @@ export default class Transmuxer { accurateTimeOffset: boolean, chunkMeta: ChunkMetadata ): TransmuxerResult { - const { audioTrack, avcTrack, id3Track, textTrack } = ( + const { audioTrack, videoTrack, id3Track, textTrack } = ( this.demuxer as Demuxer ).demux(data, timeOffset, false, !this.config.progressive); const remuxResult = this.remuxer!.remux( audioTrack, - avcTrack, + videoTrack, id3Track, textTrack, timeOffset, @@ -379,7 +388,7 @@ export default class Transmuxer { .then((demuxResult) => { const remuxResult = this.remuxer!.remux( demuxResult.audioTrack, - demuxResult.avcTrack, + demuxResult.videoTrack, demuxResult.id3Track, demuxResult.textTrack, timeOffset, diff --git a/src/demux/tsdemuxer.ts b/src/demux/tsdemuxer.ts index b7cf14fd50c..9ea95ab0332 100644 --- a/src/demux/tsdemuxer.ts +++ b/src/demux/tsdemuxer.ts @@ -12,10 +12,13 @@ import * as ADTS from './adts'; import * as MpegAudio from './mpegaudio'; import ExpGolomb from './exp-golomb'; -import { utf8ArrayToStr } from './id3'; import SampleAesDecrypter from './sample-aes'; import { Events } from '../events'; -import { appendUint8Array } from '../utils/mp4-tools'; +import { + appendUint8Array, + parseSEIMessageFromNALu, + RemuxerTrackIdConfig, +} from '../utils/mp4-tools'; import { logger } from '../utils/logger'; import { ErrorTypes, ErrorDetails } from '../errors'; import type { HlsConfig } from '../config'; @@ -34,21 +37,6 @@ import type { } from '../types/demuxer'; import { AudioFrame } from '../types/demuxer'; -// We are using fixed track IDs for driving the MP4 remuxer -// instead of following the TS PIDs. -// There is no reason not to do this and some browsers/SourceBuffer-demuxers -// may not like if there are TrackID "switches" -// See https://github.com/video-dev/hls.js/issues/1331 -// Here we are mapping our internal track types to constant MP4 track IDs -// With MSE currently one can only have one track of each, and we are muxing -// whatever video/audio rendition in them. -const RemuxerTrackIdConfig = { - video: 1, - audio: 2, - id3: 3, - text: 4, -}; - type ParsedTimestamp = { pts?: number; dts?: number; @@ -76,18 +64,15 @@ class TSDemuxer implements Demuxer { private sampleAes: SampleAesDecrypter | null = null; private pmtParsed: boolean = false; - private audioCodec!: string; - private videoCodec!: string; + private audioCodec?: string; + private videoCodec?: string; private _duration: number = 0; - private aacLastPTS: number | null = null; - private _initPTS: number | null = null; - private _initDTS?: number | null = null; private _pmtId: number = -1; - private _avcTrack!: DemuxedAvcTrack; - private _audioTrack!: DemuxedAudioTrack; - private _id3Track!: DemuxedMetadataTrack; - private _txtTrack!: DemuxedUserdataTrack; + private _avcTrack?: DemuxedAvcTrack; + private _audioTrack?: DemuxedAudioTrack; + private _id3Track?: DemuxedMetadataTrack; + private _txtTrack?: DemuxedUserdataTrack; private aacOverFlow: AudioFrame | null = null; private avcSample: ParsedAvcSample | null = null; private remainderData: Uint8Array | null = null; @@ -145,7 +130,7 @@ class TSDemuxer implements Demuxer { */ static createTrack( type: 'audio' | 'video' | 'id3' | 'text', - duration: number + duration?: number ): DemuxedTrack { return { container: @@ -166,38 +151,29 @@ class TSDemuxer implements Demuxer { * Resets all internal track instances of the demuxer. */ public resetInitSegment( + initSegment: Uint8Array | undefined, audioCodec: string, videoCodec: string, - duration: number + trackDuration: number ) { this.pmtParsed = false; this._pmtId = -1; - this._avcTrack = TSDemuxer.createTrack( - 'video', - duration - ) as DemuxedAvcTrack; + this._avcTrack = TSDemuxer.createTrack('video') as DemuxedAvcTrack; this._audioTrack = TSDemuxer.createTrack( 'audio', - duration + trackDuration ) as DemuxedAudioTrack; - this._id3Track = TSDemuxer.createTrack( - 'id3', - duration - ) as DemuxedMetadataTrack; - this._txtTrack = TSDemuxer.createTrack( - 'text', - duration - ) as DemuxedUserdataTrack; + this._id3Track = TSDemuxer.createTrack('id3') as DemuxedMetadataTrack; + this._txtTrack = TSDemuxer.createTrack('text') as DemuxedUserdataTrack; this._audioTrack.isAAC = true; // flush any partial content this.aacOverFlow = null; - this.aacLastPTS = null; this.avcSample = null; this.audioCodec = audioCodec; this.videoCodec = videoCodec; - this._duration = duration; + this._duration = trackDuration; } public resetTimeStamp() {} @@ -214,7 +190,6 @@ class TSDemuxer implements Demuxer { _id3Track.pesData = null; } this.aacOverFlow = null; - this.aacLastPTS = null; } public demux( @@ -229,12 +204,13 @@ class TSDemuxer implements Demuxer { let pes: PES | null; - const avcTrack = this._avcTrack; - const audioTrack = this._audioTrack; - const id3Track = this._id3Track; + const videoTrack = this._avcTrack as DemuxedAvcTrack; + const audioTrack = this._audioTrack as DemuxedAudioTrack; + const id3Track = this._id3Track as DemuxedMetadataTrack; + const textTrack = this._txtTrack as DemuxedUserdataTrack; - let avcId = avcTrack.pid; - let avcData = avcTrack.pesData; + let avcId = videoTrack.pid; + let avcData = videoTrack.pesData; let audioId = audioTrack.pid; let id3Id = id3Track.pid; let audioData = audioTrack.pesData; @@ -254,9 +230,9 @@ class TSDemuxer implements Demuxer { this.remainderData = data; return { audioTrack, - avcTrack, + videoTrack, id3Track, - textTrack: this._txtTrack, + textTrack, }; } @@ -294,7 +270,7 @@ class TSDemuxer implements Demuxer { case avcId: if (stt) { if (avcData && (pes = parsePES(avcData))) { - this.parseAVCPES(pes, false); + this.parseAVCPES(videoTrack, textTrack, pes, false); } avcData = { data: [], size: 0 }; @@ -308,9 +284,9 @@ class TSDemuxer implements Demuxer { if (stt) { if (audioData && (pes = parsePES(audioData))) { if (audioTrack.isAAC) { - this.parseAACPES(pes); + this.parseAACPES(audioTrack, pes); } else { - this.parseMPEGPES(pes); + this.parseMPEGPES(audioTrack, pes); } } audioData = { data: [], size: 0 }; @@ -323,7 +299,7 @@ class TSDemuxer implements Demuxer { case id3Id: if (stt) { if (id3Data && (pes = parsePES(id3Data))) { - this.parseID3PES(pes); + this.parseID3PES(id3Track, pes); } id3Data = { data: [], size: 0 }; @@ -361,7 +337,7 @@ class TSDemuxer implements Demuxer { // but we are not using this for MP4 track IDs. avcId = parsedPIDs.avc; if (avcId > 0) { - avcTrack.pid = avcId; + videoTrack.pid = avcId; } audioId = parsedPIDs.audio; @@ -400,15 +376,15 @@ class TSDemuxer implements Demuxer { } } - avcTrack.pesData = avcData; + videoTrack.pesData = avcData; audioTrack.pesData = audioData; id3Track.pesData = id3Data; const demuxResult: DemuxerResult = { audioTrack, - avcTrack, + videoTrack, id3Track, - textTrack: this._txtTrack, + textTrack, }; if (flush) { @@ -426,10 +402,10 @@ class TSDemuxer implements Demuxer { result = this.demux(remainderData, -1, false, true); } else { result = { - audioTrack: this._audioTrack, - avcTrack: this._avcTrack, - textTrack: this._txtTrack, - id3Track: this._id3Track, + videoTrack: this._avcTrack as DemuxedAvcTrack, + audioTrack: this._audioTrack as DemuxedAudioTrack, + id3Track: this._id3Track as DemuxedMetadataTrack, + textTrack: this._txtTrack as DemuxedUserdataTrack, }; } this.extractRemainingSamples(result); @@ -440,25 +416,30 @@ class TSDemuxer implements Demuxer { } private extractRemainingSamples(demuxResult: DemuxerResult) { - const { audioTrack, avcTrack, id3Track } = demuxResult; - const avcData = avcTrack.pesData; + const { audioTrack, videoTrack, id3Track, textTrack } = demuxResult; + const avcData = videoTrack.pesData; const audioData = audioTrack.pesData; const id3Data = id3Track.pesData; // try to parse last PES packets let pes: PES | null; if (avcData && (pes = parsePES(avcData))) { - this.parseAVCPES(pes, true); - avcTrack.pesData = null; + this.parseAVCPES( + videoTrack as DemuxedAvcTrack, + textTrack as DemuxedUserdataTrack, + pes, + true + ); + videoTrack.pesData = null; } else { // either avcData null or PES truncated, keep it for next frag parsing - avcTrack.pesData = avcData; + videoTrack.pesData = avcData; } if (audioData && (pes = parsePES(audioData))) { if (audioTrack.isAAC) { - this.parseAACPES(pes); + this.parseAACPES(audioTrack, pes); } else { - this.parseMPEGPES(pes); + this.parseMPEGPES(audioTrack, pes); } audioTrack.pesData = null; @@ -474,7 +455,7 @@ class TSDemuxer implements Demuxer { } if (id3Data && (pes = parsePES(id3Data))) { - this.parseID3PES(pes); + this.parseID3PES(id3Track, pes); id3Track.pesData = null; } else { // either id3Data null or PES truncated, keep it for next frag parsing @@ -506,19 +487,19 @@ class TSDemuxer implements Demuxer { sampleAes: SampleAesDecrypter ): Promise { return new Promise((resolve) => { - const { audioTrack, avcTrack } = demuxResult; + const { audioTrack, videoTrack } = demuxResult; if (audioTrack.samples && audioTrack.isAAC) { sampleAes.decryptAacSamples(audioTrack.samples, 0, () => { - if (avcTrack.samples) { - sampleAes.decryptAvcSamples(avcTrack.samples, 0, 0, () => { + if (videoTrack.samples) { + sampleAes.decryptAvcSamples(videoTrack.samples, 0, 0, () => { resolve(demuxResult); }); } else { resolve(demuxResult); } }); - } else if (avcTrack.samples) { - sampleAes.decryptAvcSamples(avcTrack.samples, 0, 0, () => { + } else if (videoTrack.samples) { + sampleAes.decryptAvcSamples(videoTrack.samples, 0, 0, () => { resolve(demuxResult); }); } @@ -526,13 +507,16 @@ class TSDemuxer implements Demuxer { } public destroy() { - this._initPTS = this._initDTS = null; this._duration = 0; } - private parseAVCPES(pes: PES, last: boolean) { - const track = this._avcTrack; - const units = this.parseAVCNALu(pes.data); + private parseAVCPES( + track: DemuxedAvcTrack, + textTrack: DemuxedUserdataTrack, + pes: PES, + last: boolean + ) { + const units = this.parseAVCNALu(track, pes.data); const debug = false; let avcSample = this.avcSample; let push: boolean; @@ -613,106 +597,11 @@ class TSDemuxer implements Demuxer { if (debug && avcSample) { avcSample.debug += 'SEI '; } - - const expGolombDecoder = new ExpGolomb(discardEPB(unit.data)); - - // skip frameType - expGolombDecoder.readUByte(); - - let payloadType = 0; - let payloadSize = 0; - let endOfCaptions = false; - let b = 0; - - while (!endOfCaptions && expGolombDecoder.bytesAvailable > 1) { - payloadType = 0; - do { - b = expGolombDecoder.readUByte(); - payloadType += b; - } while (b === 0xff); - - // Parse payload size. - payloadSize = 0; - do { - b = expGolombDecoder.readUByte(); - payloadSize += b; - } while (b === 0xff); - - // TODO: there can be more than one payload in an SEI packet... - // TODO: need to read type and size in a while loop to get them all - if (payloadType === 4 && expGolombDecoder.bytesAvailable !== 0) { - endOfCaptions = true; - - const countryCode = expGolombDecoder.readUByte(); - - if (countryCode === 181) { - const providerCode = expGolombDecoder.readUShort(); - - if (providerCode === 49) { - const userStructure = expGolombDecoder.readUInt(); - - if (userStructure === 0x47413934) { - const userDataType = expGolombDecoder.readUByte(); - - // Raw CEA-608 bytes wrapped in CEA-708 packet - if (userDataType === 3) { - const firstByte = expGolombDecoder.readUByte(); - const secondByte = expGolombDecoder.readUByte(); - - const totalCCs = 31 & firstByte; - const byteArray = [firstByte, secondByte]; - - for (let i = 0; i < totalCCs; i++) { - // 3 bytes per CC - byteArray.push(expGolombDecoder.readUByte()); - byteArray.push(expGolombDecoder.readUByte()); - byteArray.push(expGolombDecoder.readUByte()); - } - - insertSampleInOrder(this._txtTrack.samples, { - type: 3, - pts: pes.pts, - bytes: byteArray, - }); - } - } - } - } - } else if ( - payloadType === 5 && - expGolombDecoder.bytesAvailable !== 0 - ) { - endOfCaptions = true; - - if (payloadSize > 16) { - const uuidStrArray: Array = []; - for (let i = 0; i < 16; i++) { - uuidStrArray.push(expGolombDecoder.readUByte().toString(16)); - - if (i === 3 || i === 5 || i === 7 || i === 9) { - uuidStrArray.push('-'); - } - } - const length = payloadSize - 16; - const userDataPayloadBytes = new Uint8Array(length); - for (let i = 0; i < length; i++) { - userDataPayloadBytes[i] = expGolombDecoder.readUByte(); - } - - insertSampleInOrder(this._txtTrack.samples, { - pts: pes.pts, - payloadType: payloadType, - uuid: uuidStrArray.join(''), - userData: utf8ArrayToStr(userDataPayloadBytes), - userDataBytes: userDataPayloadBytes, - }); - } - } else if (payloadSize < expGolombDecoder.bytesAvailable) { - for (let i = 0; i < payloadSize; i++) { - expGolombDecoder.readUByte(); - } - } - } + parseSEIMessageFromNALu( + discardEPB(unit.data), + pes.pts as number, + textTrack.samples + ); break; // SPS } @@ -797,12 +686,11 @@ class TSDemuxer implements Demuxer { } } - private getLastNalUnit() { + private getLastNalUnit(samples: AvcSample[]) { let avcSample = this.avcSample; let lastUnit; // try to fallback to previous sample if current one is empty if (!avcSample || avcSample.units.length === 0) { - const samples = this._avcTrack.samples; avcSample = samples[samples.length - 1]; } if (avcSample?.units) { @@ -812,13 +700,15 @@ class TSDemuxer implements Demuxer { return lastUnit; } - private parseAVCNALu(array: Uint8Array): Array<{ + private parseAVCNALu( + track: DemuxedAvcTrack, + array: Uint8Array + ): Array<{ data: Uint8Array; type: number; state?: number; }> { const len = array.byteLength; - const track = this._avcTrack; let state = track.naluState || 0; const lastState = state; const units = [] as Array<{ @@ -870,7 +760,7 @@ class TSDemuxer implements Demuxer { // first check if start code delimiter is overlapping between 2 PES packets, // ie it started in last packet (lastState not zero) // and ended at the beginning of this PES packet (i <= 4 - lastState) - const lastUnit = this.getLastNalUnit(); + const lastUnit = this.getLastNalUnit(track.samples); if (lastUnit) { if (lastState && i <= 4 - lastState) { // start delimiter overlapping between PES packets @@ -923,7 +813,7 @@ class TSDemuxer implements Demuxer { // no NALu found if (units.length === 0) { // append pes.data to previous NAL unit - const lastUnit = this.getLastNalUnit(); + const lastUnit = this.getLastNalUnit(track.samples); if (lastUnit) { const tmp = new Uint8Array(lastUnit.data.byteLength + array.byteLength); tmp.set(lastUnit.data, 0); @@ -935,9 +825,8 @@ class TSDemuxer implements Demuxer { return units; } - private parseAACPES(pes: PES) { + private parseAACPES(track: DemuxedAudioTrack, pes: PES) { let startOffset = 0; - const track = this._audioTrack; const aacOverFlow = this.aacOverFlow; const data = pes.data; if (aacOverFlow) { @@ -985,7 +874,13 @@ class TSDemuxer implements Demuxer { } } - ADTS.initTrackConfig(track, this.observer, data, offset, this.audioCodec); + ADTS.initTrackConfig( + track, + this.observer, + data, + offset, + this.audioCodec as string + ); let pts: number; if (pes.pts !== undefined) { @@ -1026,7 +921,7 @@ class TSDemuxer implements Demuxer { } } - private parseMPEGPES(pes: PES) { + private parseMPEGPES(track: DemuxedAudioTrack, pes: PES) { const data = pes.data; const length = data.length; let frameIndex = 0; @@ -1040,7 +935,7 @@ class TSDemuxer implements Demuxer { while (offset < length) { if (MpegAudio.isHeader(data, offset)) { const frame = MpegAudio.appendFrame( - this._audioTrack, + track, data, offset, pts, @@ -1060,12 +955,12 @@ class TSDemuxer implements Demuxer { } } - private parseID3PES(pes: PES) { + private parseID3PES(id3Track: DemuxedMetadataTrack, pes: PES) { if (pes.pts === undefined) { logger.warn('[tsdemuxer]: ID3 PES unknown PTS'); return; } - this._id3Track.samples.push(pes as Required); + id3Track.samples.push(pes as Required); } } @@ -1299,24 +1194,6 @@ function pushAccessUnit(avcSample: ParsedAvcSample, avcTrack: DemuxedAvcTrack) { } } -function insertSampleInOrder(arr, data) { - const len = arr.length; - if (len > 0) { - if (data.pts >= arr[len - 1].pts) { - arr.push(data); - } else { - for (let pos = len - 1; pos >= 0; pos--) { - if (data.pts < arr[pos].pts) { - arr.splice(pos, 0, data); - break; - } - } - } - } else { - arr.push(data); - } -} - /** * remove Emulation Prevention bytes from a RBSP */ diff --git a/src/loader/fragment-loader.ts b/src/loader/fragment-loader.ts index 7dd4228029f..65aa52b6db9 100644 --- a/src/loader/fragment-loader.ts +++ b/src/loader/fragment-loader.ts @@ -75,7 +75,7 @@ export default class FragmentLoader { maxRetry: 0, retryDelay: 0, maxRetryDelay: config.fragLoadingMaxRetryTimeout, - highWaterMark: MIN_CHUNK_SIZE, + highWaterMark: frag.sn === 'initSegment' ? Infinity : MIN_CHUNK_SIZE, }; // Assign frag stats to the loader's stats reference frag.stats = loader.stats; diff --git a/src/remux/passthrough-remuxer.ts b/src/remux/passthrough-remuxer.ts index ef6eb2e76f9..9c81d39a5ef 100644 --- a/src/remux/passthrough-remuxer.ts +++ b/src/remux/passthrough-remuxer.ts @@ -1,4 +1,7 @@ -import { flushTextTrackMetadataCueSamples } from './mp4-remuxer'; +import { + flushTextTrackMetadataCueSamples, + flushTextTrackUserdataCueSamples, +} from './mp4-remuxer'; import type { InitData, InitDataTrack } from '../utils/mp4-tools'; import { getDuration, @@ -19,7 +22,7 @@ import type { DemuxedAudioTrack, DemuxedMetadataTrack, DemuxedUserdataTrack, - PassthroughVideoTrack, + PassthroughTrack, } from '../types/demuxer'; class PassThroughRemuxer implements Remuxer { @@ -31,18 +34,18 @@ class PassThroughRemuxer implements Remuxer { private initTracks?: TrackSet; private lastEndDTS: number | null = null; - destroy() {} + public destroy() {} - resetTimeStamp(defaultInitPTS) { + public resetTimeStamp(defaultInitPTS) { this.initPTS = defaultInitPTS; this.lastEndDTS = null; } - resetNextTimestamp() { + public resetNextTimestamp() { this.lastEndDTS = null; } - resetInitSegment( + public resetInitSegment( initSegment: Uint8Array | undefined, audioCodec: string | undefined, videoCodec: string | undefined @@ -53,7 +56,7 @@ class PassThroughRemuxer implements Remuxer { this.emitInitSegment = true; } - generateInitSegment(initSegment: Uint8Array | undefined): void { + private generateInitSegment(initSegment: Uint8Array | undefined): void { let { audioCodec, videoCodec } = this; if (!initSegment || !initSegment.byteLength) { this.initTracks = undefined; @@ -107,9 +110,9 @@ class PassThroughRemuxer implements Remuxer { this.initTracks = tracks; } - remux( + public remux( audioTrack: DemuxedAudioTrack, - videoTrack: PassthroughVideoTrack, + videoTrack: PassthroughTrack, id3Track: DemuxedMetadataTrack, textTrack: DemuxedUserdataTrack, timeOffset: number @@ -203,14 +206,22 @@ class PassThroughRemuxer implements Remuxer { result.audio = track.type === 'audio' ? track : undefined; result.video = track.type !== 'audio' ? track : undefined; result.initSegment = initSegment; - const id3InitPts = this.initPTS ?? 0; + const initPtsNum = this.initPTS ?? 0; result.id3 = flushTextTrackMetadataCueSamples( id3Track, timeOffset, - id3InitPts, - id3InitPts + initPtsNum, + initPtsNum ); + if (textTrack.samples.length) { + result.text = flushTextTrackUserdataCueSamples( + textTrack, + timeOffset, + initPtsNum + ); + } + return result; } } diff --git a/src/types/demuxer.ts b/src/types/demuxer.ts index f432f014255..70354a59f66 100644 --- a/src/types/demuxer.ts +++ b/src/types/demuxer.ts @@ -13,9 +13,10 @@ export interface Demuxer { flush(timeOffset?: number): DemuxerResult | Promise; destroy(): void; resetInitSegment( + initSegment: Uint8Array | undefined, audioCodec: string | undefined, videoCodec: string | undefined, - duration: number + trackDuration: number ); resetTimeStamp(defaultInitPTS?: number | null): void; resetContiguity(): void; @@ -23,7 +24,7 @@ export interface Demuxer { export interface DemuxerResult { audioTrack: DemuxedAudioTrack; - avcTrack: DemuxedVideoTrack; + videoTrack: DemuxedVideoTrack; id3Track: DemuxedMetadataTrack; textTrack: DemuxedUserdataTrack; } @@ -48,6 +49,13 @@ export interface DemuxedTrack { codec?: string; } +export interface PassthroughTrack extends DemuxedTrack { + sampleDuration: number; + samples: Uint8Array; + timescale: number; + duration: number; + codec: string; +} export interface DemuxedAudioTrack extends DemuxedTrack { config?: number[]; samplerate?: number; @@ -72,10 +80,6 @@ export interface DemuxedAvcTrack extends DemuxedVideoTrack { samples: AvcSample[]; } -export interface PassthroughVideoTrack extends DemuxedVideoTrack { - samples: Uint8Array; -} - export interface DemuxedMetadataTrack extends DemuxedTrack { samples: MetadataSample[]; } @@ -93,7 +97,12 @@ export interface MetadataSample { export interface UserdataSample { pts: number; - bytes: Uint8Array; + bytes?: Uint8Array; + type?: number; + payloadType?: number; + uuid?: string; + userData?: string; + userDataBytes?: Uint8Array; } export interface AvcSample { diff --git a/src/utils/imsc1-ttml-parser.ts b/src/utils/imsc1-ttml-parser.ts index 4337d02c9c1..cbd528052e0 100644 --- a/src/utils/imsc1-ttml-parser.ts +++ b/src/utils/imsc1-ttml-parser.ts @@ -34,9 +34,7 @@ export function parseIMSC1( return; } const mdat = results[0]; - const ttml = utf8ArrayToStr( - new Uint8Array(payload, mdat.start, mdat.end - mdat.start) - ); + const ttml = utf8ArrayToStr(mdat); const syncTime = toTimescaleFromScale(initPTS, 1, timescale); try { diff --git a/src/utils/mp4-tools.ts b/src/utils/mp4-tools.ts index b5e04836a2b..f54494c0d51 100644 --- a/src/utils/mp4-tools.ts +++ b/src/utils/mp4-tools.ts @@ -1,59 +1,50 @@ import { sliceUint8 } from './typed-array'; import { ElementaryStreamTypes } from '../loader/fragment'; - -type Mp4BoxData = { - data: Uint8Array; - start: number; - end: number; -}; +import { PassthroughTrack, UserdataSample } from '../types/demuxer'; +import { utf8ArrayToStr } from '../demux/id3'; const UINT32_MAX = Math.pow(2, 32) - 1; const push = [].push; +// We are using fixed track IDs for driving the MP4 remuxer +// instead of following the TS PIDs. +// There is no reason not to do this and some browsers/SourceBuffer-demuxers +// may not like if there are TrackID "switches" +// See https://github.com/video-dev/hls.js/issues/1331 +// Here we are mapping our internal track types to constant MP4 track IDs +// With MSE currently one can only have one track of each, and we are muxing +// whatever video/audio rendition in them. +export const RemuxerTrackIdConfig = { + video: 1, + audio: 2, + id3: 3, + text: 4, +}; + export function bin2str(data: Uint8Array): string { return String.fromCharCode.apply(null, data); } -export function readUint16( - buffer: Uint8Array | Mp4BoxData, - offset: number -): number { - if ('data' in buffer) { - offset += buffer.start; - buffer = buffer.data; - } - +export function readUint16(buffer: Uint8Array, offset: number): number { const val = (buffer[offset] << 8) | buffer[offset + 1]; - return val < 0 ? 65536 + val : val; } -export function readUint32( - buffer: Uint8Array | Mp4BoxData, - offset: number -): number { - if ('data' in buffer) { - offset += buffer.start; - buffer = buffer.data; - } +export function readUint32(buffer: Uint8Array, offset: number): number { + const val = readSint32(buffer, offset); + return val < 0 ? 4294967296 + val : val; +} - const val = +export function readSint32(buffer: Uint8Array, offset: number): number { + return ( (buffer[offset] << 24) | (buffer[offset + 1] << 16) | (buffer[offset + 2] << 8) | - buffer[offset + 3]; - return val < 0 ? 4294967296 + val : val; + buffer[offset + 3] + ); } -export function writeUint32( - buffer: Uint8Array | Mp4BoxData, - offset: number, - value: number -) { - if ('data' in buffer) { - offset += buffer.start; - buffer = buffer.data; - } +export function writeUint32(buffer: Uint8Array, offset: number, value: number) { buffer[offset] = value >> 24; buffer[offset + 1] = (value >> 16) & 0xff; buffer[offset + 2] = (value >> 8) & 0xff; @@ -61,30 +52,15 @@ export function writeUint32( } // Find the data for a box specified by its path -export function findBox( - input: Uint8Array | Mp4BoxData, - path: Array -): Array { - const results = [] as Array; +export function findBox(data: Uint8Array, path: string[]): Uint8Array[] { + const results = [] as Uint8Array[]; if (!path.length) { // short-circuit the search for empty paths return results; } + const end = data.byteLength; - let data: Uint8Array; - let start; - let end; - if ('data' in input) { - data = input.data; - start = input.start; - end = input.end; - } else { - data = input; - start = 0; - end = data.byteLength; - } - - for (let i = start; i < end; ) { + for (let i = 0; i < end; ) { const size = readUint32(data, i); const type = bin2str(data.subarray(i + 4, i + 8)); const endbox = size > 1 ? i + size : end; @@ -93,13 +69,10 @@ export function findBox( if (path.length === 1) { // this is the end of the path and we've found the box we were // looking for - results.push({ data: data, start: i + 8, end: endbox }); + results.push(data.subarray(i + 8, endbox)); } else { // recursively search for the next box along the path - const subresults = findBox( - { data: data, start: i + 8, end: endbox }, - path.slice(1) - ); + const subresults = findBox(data.subarray(i + 8, endbox), path.slice(1)); if (subresults.length) { push.apply(results, subresults); } @@ -124,7 +97,7 @@ type SidxInfo = { export function parseSegmentIndex(initSegment: Uint8Array): SidxInfo | null { const moovBox = findBox(initSegment, ['moov']); const moov = moovBox[0]; - const moovEndOffset = moov ? moov.end : null; // we need this in case we need to chop of garbage of the end of current data + const moovEndOffset = moov ? moov.length : null; // we need this in case we need to chop of garbage of the end of current data const sidxBox = findBox(initSegment, ['sidx']); @@ -135,7 +108,7 @@ export function parseSegmentIndex(initSegment: Uint8Array): SidxInfo | null { const references: any[] = []; const sidx = sidxBox[0]; - const version = sidx.data[0]; + const version = sidx[0]; // set initial offset, we skip the reference ID (not needed) let index = version === 0 ? 8 : 16; @@ -157,7 +130,7 @@ export function parseSegmentIndex(initSegment: Uint8Array): SidxInfo | null { // skip reserved index += 2; - let startByte = sidx.end + firstOffset; + let startByte = sidx.length + firstOffset; const referencesCount = readUint16(sidx, index); index += 2; @@ -251,6 +224,7 @@ export interface InitData extends Array { | undefined; audio?: InitDataTrack; video?: InitDataTrack; + caption?: InitDataTrack; } export function parseInitSegment(initSegment: Uint8Array): InitData { @@ -260,19 +234,17 @@ export function parseInitSegment(initSegment: Uint8Array): InitData { const trak = traks[i]; const tkhd = findBox(trak, ['tkhd'])[0]; if (tkhd) { - let version = tkhd.data[tkhd.start]; + let version = tkhd[0]; let index = version === 0 ? 12 : 20; const trackId = readUint32(tkhd, index); const mdhd = findBox(trak, ['mdia', 'mdhd'])[0]; if (mdhd) { - version = mdhd.data[mdhd.start]; + version = mdhd[0]; index = version === 0 ? 12 : 20; const timescale = readUint32(mdhd, index); const hdlr = findBox(trak, ['mdia', 'hdlr'])[0]; if (hdlr) { - const hdlrType = bin2str( - hdlr.data.subarray(hdlr.start + 8, hdlr.start + 12) - ); + const hdlrType = bin2str(hdlr.subarray(8, 12)); const type: HdlrType | undefined = { soun: ElementaryStreamTypes.AUDIO as const, vide: ElementaryStreamTypes.VIDEO as const, @@ -282,9 +254,7 @@ export function parseInitSegment(initSegment: Uint8Array): InitData { const stsd = findBox(trak, ['mdia', 'minf', 'stbl', 'stsd'])[0]; let codec; if (stsd) { - codec = bin2str( - stsd.data.subarray(stsd.start + 12, stsd.start + 16) - ); + codec = bin2str(stsd.subarray(12, 16)); // TODO: Parse codec details to be able to build MIME type. // stsd.start += 8; // const codecBox = findBox(stsd, [codec])[0]; @@ -337,7 +307,7 @@ export function getStartDTS(initData: InitData, fmp4: Uint8Array): number { return ( findBox(fmp4, ['moof', 'traf']).reduce((result: number | null, traf) => { const tfdt = findBox(traf, ['tfdt'])[0]; - const version = tfdt.data[tfdt.start]; + const version = tfdt[0]; const start = findBox(traf, ['tfhd']).reduce( (result: number | null, tfhd) => { // get the track id from the tfhd @@ -518,8 +488,8 @@ export function offsetStartDTS( fmp4: Uint8Array, timeOffset: number ) { - findBox(fmp4, ['moof', 'traf']).forEach(function (traf) { - findBox(traf, ['tfhd']).forEach(function (tfhd) { + findBox(fmp4, ['moof', 'traf']).forEach((traf) => { + findBox(traf, ['tfhd']).forEach((tfhd) => { // get the track id from the tfhd const id = readUint32(tfhd, 4); const track = initData[id]; @@ -529,8 +499,8 @@ export function offsetStartDTS( // assume a 90kHz clock if no timescale was specified const timescale = track.timescale || 90e3; // get the base media decode time from the tfdt - findBox(traf, ['tfdt']).forEach(function (tfdt) { - const version = tfdt.data[tfdt.start]; + findBox(traf, ['tfdt']).forEach((tfdt) => { + const version = tfdt[0]; let baseMediaDecodeTime = readUint32(tfdt, 4); if (version === 0) { writeUint32(tfdt, 4, baseMediaDecodeTime - timeOffset * timescale); @@ -565,8 +535,8 @@ export function segmentValidRange(data: Uint8Array): SegmentedRange { } const last = moofs[moofs.length - 1]; // Offset by 8 bytes; findBox offsets the start by as much - segmentedRange.valid = sliceUint8(data, 0, last.start - 8); - segmentedRange.remainder = sliceUint8(data, last.start - 8); + segmentedRange.valid = sliceUint8(data, 0, last.byteOffset - 8); + segmentedRange.remainder = sliceUint8(data, last.byteOffset - 8); return segmentedRange; } @@ -597,6 +567,322 @@ export interface IEmsgParsingData { payload: Uint8Array; } +export function parseSamples( + timeOffset: number, + track: PassthroughTrack +): UserdataSample[] { + const seiSamples = [] as UserdataSample[]; + const videoData = track.samples; + const timescale = track.timescale; + const trackId = track.id; + let isHEVCFlavor = false; + + const moofs = findBox(videoData, ['moof']); + moofs.map((moof) => { + const moofOffset = moof.byteOffset - 8; + const trafs = findBox(moof, ['traf']); + trafs.map((traf) => { + // get the base media decode time from the tfdt + const baseTime = findBox(traf, ['tfdt']).map((tfdt) => { + const version = tfdt[0]; + let result = readUint32(tfdt, 4); + if (version === 1) { + result *= Math.pow(2, 32); + result += readUint32(tfdt, 8); + } + return result / timescale; + })[0]; + + if (baseTime !== undefined) { + timeOffset = baseTime; + } + + return findBox(traf, ['tfhd']).map((tfhd) => { + const id = readUint32(tfhd, 4); + const tfhdFlags = readUint32(tfhd, 0) & 0xffffff; + const baseDataOffsetPresent = (tfhdFlags & 0x000001) !== 0; + const sampleDescriptionIndexPresent = (tfhdFlags & 0x000002) !== 0; + const defaultSampleDurationPresent = (tfhdFlags & 0x000008) !== 0; + let defaultSampleDuration = 0; + const defaultSampleSizePresent = (tfhdFlags & 0x000010) !== 0; + let defaultSampleSize = 0; + const defaultSampleFlagsPresent = (tfhdFlags & 0x000020) !== 0; + let tfhdOffset = 8; + + if (id === trackId) { + if (baseDataOffsetPresent) { + tfhdOffset += 8; + } + if (sampleDescriptionIndexPresent) { + tfhdOffset += 4; + } + if (defaultSampleDurationPresent) { + defaultSampleDuration = readUint32(tfhd, tfhdOffset); + tfhdOffset += 4; + } + if (defaultSampleSizePresent) { + defaultSampleSize = readUint32(tfhd, tfhdOffset); + tfhdOffset += 4; + } + if (defaultSampleFlagsPresent) { + tfhdOffset += 4; + } + if (track.type === 'video') { + isHEVCFlavor = isHEVC(track.codec); + } + + findBox(traf, ['trun']).map((trun) => { + const version = trun[0]; + const flags = readUint32(trun, 0) & 0xffffff; + const dataOffsetPresent = (flags & 0x000001) !== 0; + let dataOffset = 0; + const firstSampleFlagsPresent = (flags & 0x000004) !== 0; + const sampleDurationPresent = (flags & 0x000100) !== 0; + let sampleDuration = 0; + const sampleSizePresent = (flags & 0x000200) !== 0; + let sampleSize = 0; + const sampleFlagsPresent = (flags & 0x000400) !== 0; + const sampleCompositionOffsetsPresent = (flags & 0x000800) !== 0; + let compositionOffset = 0; + const sampleCount = readUint32(trun, 4); + let trunOffset = 8; // past version, flags, and sample count + + if (dataOffsetPresent) { + dataOffset = readUint32(trun, trunOffset); + trunOffset += 4; + } + if (firstSampleFlagsPresent) { + trunOffset += 4; + } + + let sampleOffset = dataOffset + moofOffset; + + for (let ix = 0; ix < sampleCount; ix++) { + if (sampleDurationPresent) { + sampleDuration = readUint32(trun, trunOffset); + trunOffset += 4; + } else { + sampleDuration = defaultSampleDuration; + } + if (sampleSizePresent) { + sampleSize = readUint32(trun, trunOffset); + trunOffset += 4; + } else { + sampleSize = defaultSampleSize; + } + if (sampleFlagsPresent) { + trunOffset += 4; + } + if (sampleCompositionOffsetsPresent) { + if (version === 0) { + compositionOffset = readUint32(trun, trunOffset); + } else { + compositionOffset = readSint32(trun, trunOffset); + } + trunOffset += 4; + } + if (track.type === ElementaryStreamTypes.VIDEO) { + let naluTotalSize = 0; + while (naluTotalSize < sampleSize) { + const naluSize = readUint32(videoData, sampleOffset); + sampleOffset += 4; + const naluType = videoData[sampleOffset] & 0x1f; + if (isSEIMessage(isHEVCFlavor, naluType)) { + const data = videoData.subarray( + sampleOffset, + sampleOffset + naluSize + ); + parseSEIMessageFromNALu( + data, + timeOffset + compositionOffset / timescale, + seiSamples + ); + } + sampleOffset += naluSize; + naluTotalSize += naluSize + 4; + } + } + + timeOffset += sampleDuration / timescale; + } + }); + } + }); + }); + }); + return seiSamples; +} + +function isHEVC(codec: string) { + if (!codec) { + return false; + } + const delimit = codec.indexOf('.'); + const baseCodec = delimit < 0 ? codec : codec.substring(0, delimit); + return ( + baseCodec === 'hvc1' || + baseCodec === 'hev1' || + // Dolby Vision + baseCodec === 'dvh1' || + baseCodec === 'dvhe' + ); +} + +function isSEIMessage(isHEVCFlavor: boolean, naluType: number) { + return isHEVCFlavor ? naluType === 39 || naluType === 40 : naluType === 6; +} + +export function parseSEIMessageFromNALu( + unescapedData: Uint8Array, + pts: number, + samples: UserdataSample[] +) { + const data = discardEPB(unescapedData); + let seiPtr = 0; + // skip frameType + seiPtr++; + let payloadType = 0; + let payloadSize = 0; + let endOfCaptions = false; + let b = 0; + + while (seiPtr < data.length) { + payloadType = 0; + do { + if (seiPtr >= data.length) { + break; + } + b = data[seiPtr++]; + payloadType += b; + } while (b === 0xff); + + // Parse payload size. + payloadSize = 0; + do { + if (seiPtr >= data.length) { + break; + } + b = data[seiPtr++]; + payloadSize += b; + } while (b === 0xff); + + const leftOver = data.length - seiPtr; + + if (!endOfCaptions && payloadType === 4 && seiPtr < data.length) { + endOfCaptions = true; + + const countryCode = data[seiPtr++]; + if (countryCode === 181) { + const providerCode = readUint16(data, seiPtr); + seiPtr += 2; + + if (providerCode === 49) { + const userStructure = readUint32(data, seiPtr); + seiPtr += 4; + + if (userStructure === 0x47413934) { + const userDataType = data[seiPtr++]; + + // Raw CEA-608 bytes wrapped in CEA-708 packet + if (userDataType === 3) { + const firstByte = data[seiPtr++]; + const totalCCs = 0x1f & firstByte; + const enabled = 0x40 & firstByte; + const totalBytes = enabled ? 2 + totalCCs * 3 : 0; + const byteArray = new Uint8Array(totalBytes); + if (enabled) { + byteArray[0] = firstByte; + for (let i = 1; i < totalBytes; i++) { + byteArray[i] = data[seiPtr++]; + } + } + + samples.push({ + type: userDataType, + payloadType, + pts, + bytes: byteArray, + }); + } + } + } + } + } else if (payloadType === 5 && payloadSize < leftOver) { + endOfCaptions = true; + + if (payloadSize > 16) { + const uuidStrArray: Array = []; + for (let i = 0; i < 16; i++) { + const b = data[seiPtr++].toString(16); + uuidStrArray.push(b.length == 1 ? '0' + b : b); + + if (i === 3 || i === 5 || i === 7 || i === 9) { + uuidStrArray.push('-'); + } + } + const length = payloadSize - 16; + const userDataBytes = new Uint8Array(length); + for (let i = 0; i < length; i++) { + userDataBytes[i] = data[seiPtr++]; + } + + samples.push({ + payloadType, + pts, + uuid: uuidStrArray.join(''), + userData: utf8ArrayToStr(userDataBytes), + userDataBytes, + }); + } + } else if (payloadSize < leftOver) { + seiPtr += payloadSize; + } else if (payloadSize > leftOver) { + break; + } + } +} + +/** + * remove Emulation Prevention bytes from a RBSP + */ +function discardEPB(data: Uint8Array): Uint8Array { + const length = data.byteLength; + const EPBPositions = [] as Array; + let i = 1; + + // Find all `Emulation Prevention Bytes` + while (i < length - 2) { + if (data[i] === 0 && data[i + 1] === 0 && data[i + 2] === 0x03) { + EPBPositions.push(i + 2); + i += 2; + } else { + i++; + } + } + + // If no Emulation Prevention Bytes were found just return the original + // array + if (EPBPositions.length === 0) { + return data; + } + + // Create a new array to hold the NAL unit data + const newLength = length - EPBPositions.length; + const newData = new Uint8Array(newLength); + let sourceIndex = 0; + + for (i = 0; i < newLength; sourceIndex++, i++) { + if (sourceIndex === EPBPositions[0]) { + // Skip this byte + sourceIndex++; + // Remove this position index + EPBPositions.shift(); + } + newData[i] = data[sourceIndex]; + } + return newData; +} + export function parseEmsg(data: Uint8Array): IEmsgParsingData { const version = data[0]; let schemeIdUri: string = '';