Skip to content

Commit

Permalink
Add support for CMAF SEI Captions
Browse files Browse the repository at this point in the history
Resolves #2623 (Add CMAF CC support)
Resolves #4242 (Fix parsing multiple user data unregistered SEI)
Closes #4421 (Does hls.js support extracting SEI frame from videos?)
Fixes #4317 (Progressive mode is broken with fmp4 playback)
  • Loading branch information
robwalch committed Dec 15, 2021
1 parent 3e59a8a commit 864b55d
Show file tree
Hide file tree
Showing 14 changed files with 668 additions and 388 deletions.
12 changes: 11 additions & 1 deletion api-extractor/report/hls.js.api.md
Expand Up @@ -2143,9 +2143,19 @@ export type TSDemuxerConfig = {
// @public (undocumented)
export interface UserdataSample {
// (undocumented)
bytes: Uint8Array;
bytes?: Uint8Array;
// (undocumented)
payloadType?: number;
// (undocumented)
pts: number;
// (undocumented)
type?: number;
// (undocumented)
userData?: string;
// (undocumented)
userDataBytes?: Uint8Array;
// (undocumented)
uuid?: string;
}

// Warnings were encountered during analysis:
Expand Down
16 changes: 9 additions & 7 deletions src/controller/timeline-controller.ts
Expand Up @@ -664,23 +664,25 @@ export class TimelineController implements ComponentAPI {
}

private extractCea608Data(byteArray: Uint8Array): number[][] {
const count = byteArray[0] & 31;
let position = 2;
const actualCCBytes: number[][] = [[], []];
const count = byteArray[0] & 0x1f;
let position = 2;

for (let j = 0; j < count; j++) {
const tmpByte = byteArray[position++];
const ccbyte1 = 0x7f & byteArray[position++];
const ccbyte2 = 0x7f & byteArray[position++];
const ccValid = (4 & tmpByte) !== 0;
const ccType = 3 & tmpByte;

if (ccbyte1 === 0 && ccbyte2 === 0) {
continue;
}

const ccValid = (0x04 & tmpByte) !== 0; // Support all four channels
if (ccValid) {
if (ccType === 0 || ccType === 1) {
const ccType = 0x03 & tmpByte;
if (
0x00 /* CEA608 field1*/ === ccType ||
0x01 /* CEA608 field2*/ === ccType
) {
// Exclude CEA708 CC data.
actualCCBytes[ccType].push(ccbyte1);
actualCCBytes[ccType].push(ccbyte2);
}
Expand Down
11 changes: 8 additions & 3 deletions src/demux/aacdemuxer.ts
Expand Up @@ -19,8 +19,13 @@ class AACDemuxer extends BaseAudioDemuxer {
this.config = config;
}

resetInitSegment(audioCodec, videoCodec, duration) {
super.resetInitSegment(audioCodec, videoCodec, duration);
resetInitSegment(
initSegment: Uint8Array | undefined,
audioCodec: string | undefined,
videoCodec: string | undefined,
trackDuration: number
) {
super.resetInitSegment(initSegment, audioCodec, videoCodec, trackDuration);
this._audioTrack = {
container: 'audio/adts',
type: 'audio',
Expand All @@ -30,7 +35,7 @@ class AACDemuxer extends BaseAudioDemuxer {
isAAC: true,
samples: [],
manifestCodec: audioCodec,
duration: duration,
duration: trackDuration,
inputTimeScale: 90000,
dropped: 0,
};
Expand Down
13 changes: 9 additions & 4 deletions src/demux/base-audio-demuxer.ts
Expand Up @@ -5,7 +5,7 @@ import type {
DemuxedAudioTrack,
AudioFrame,
DemuxedMetadataTrack,
DemuxedAvcTrack,
DemuxedVideoTrack,
DemuxedUserdataTrack,
KeyData,
} from '../types/demuxer';
Expand All @@ -20,7 +20,12 @@ class BaseAudioDemuxer implements Demuxer {
protected cachedData: Uint8Array | null = null;
protected initPTS: number | null = null;

resetInitSegment(audioCodec: string, videoCodec: string, duration: number) {
resetInitSegment(
initSegment: Uint8Array | undefined,
audioCodec: string | undefined,
videoCodec: string | undefined,
trackDuration: number
) {
this._id3Track = {
type: 'id3',
id: 3,
Expand Down Expand Up @@ -109,7 +114,7 @@ class BaseAudioDemuxer implements Demuxer {

return {
audioTrack: track,
avcTrack: dummyTrack() as DemuxedAvcTrack,
videoTrack: dummyTrack() as DemuxedVideoTrack,
id3Track,
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
Expand Down Expand Up @@ -137,7 +142,7 @@ class BaseAudioDemuxer implements Demuxer {

return {
audioTrack: this._audioTrack,
avcTrack: dummyTrack() as DemuxedAvcTrack,
videoTrack: dummyTrack() as DemuxedVideoTrack,
id3Track: this._id3Track,
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
Expand Down
6 changes: 3 additions & 3 deletions src/demux/dummy-demuxed-track.ts
@@ -1,11 +1,11 @@
import type { DemuxedTrack } from '../types/demuxer';

export function dummyTrack(): DemuxedTrack {
export function dummyTrack(type = '', inputTimeScale = 90000): DemuxedTrack {
return {
type: '',
type,
id: -1,
pid: -1,
inputTimeScale: 90000,
inputTimeScale,
sequenceNumber: -1,
samples: [],
dropped: 0,
Expand Down
11 changes: 8 additions & 3 deletions src/demux/mp3demuxer.ts
Expand Up @@ -9,8 +9,13 @@ import * as MpegAudio from './mpegaudio';
class MP3Demuxer extends BaseAudioDemuxer {
static readonly minProbeByteLength: number = 4;

resetInitSegment(audioCodec, videoCodec, duration) {
super.resetInitSegment(audioCodec, videoCodec, duration);
resetInitSegment(
initSegment: Uint8Array | undefined,
audioCodec: string | undefined,
videoCodec: string | undefined,
trackDuration: number
) {
super.resetInitSegment(initSegment, audioCodec, videoCodec, trackDuration);
this._audioTrack = {
container: 'audio/mpeg',
type: 'audio',
Expand All @@ -20,7 +25,7 @@ class MP3Demuxer extends BaseAudioDemuxer {
isAAC: false,
samples: [],
manifestCodec: audioCodec,
duration: duration,
duration: trackDuration,
inputTimeScale: 90000,
dropped: 0,
};
Expand Down
153 changes: 108 additions & 45 deletions src/demux/mp4demuxer.ts
Expand Up @@ -4,7 +4,7 @@
import {
Demuxer,
DemuxerResult,
PassthroughVideoTrack,
PassthroughTrack,
DemuxedAudioTrack,
DemuxedUserdataTrack,
DemuxedMetadataTrack,
Expand All @@ -15,6 +15,9 @@ import {
segmentValidRange,
appendUint8Array,
parseEmsg,
parseSamples,
parseInitSegment,
RemuxerTrackIdConfig,
} from '../utils/mp4-tools';
import { dummyTrack } from './dummy-demuxed-track';
import type { HlsEventEmitter } from '../events';
Expand All @@ -25,87 +28,147 @@ const emsgSchemePattern = /\/emsg[-/]ID3/i;
class MP4Demuxer implements Demuxer {
static readonly minProbeByteLength = 1024;
private remainderData: Uint8Array | null = null;
private timeOffset: number = 0;
private config: HlsConfig;
private videoTrack?: PassthroughTrack;
private audioTrack?: DemuxedAudioTrack;
private id3Track?: DemuxedMetadataTrack;
private txtTrack?: DemuxedUserdataTrack;

constructor(observer: HlsEventEmitter, config: HlsConfig) {
this.config = config;
}

resetTimeStamp() {}
public resetTimeStamp() {}

resetInitSegment() {}
public resetInitSegment(
initSegment: Uint8Array,
audioCodec: string | undefined,
videoCodec: string | undefined,
trackDuration: number
) {
const initData = parseInitSegment(initSegment);
const videoTrack = (this.videoTrack = dummyTrack(
'video',
1
) as PassthroughTrack);
const audioTrack = (this.audioTrack = dummyTrack(
'audio',
1
) as DemuxedAudioTrack);
const captionTrack = (this.txtTrack = dummyTrack(
'text',
1
) as DemuxedUserdataTrack);

resetContiguity(): void {}
this.id3Track = dummyTrack('id3', 1) as DemuxedMetadataTrack;
this.timeOffset = 0;

static probe(data) {
if (initData.video) {
const { id, timescale, codec } = initData.video;
videoTrack.id = id;
videoTrack.timescale = captionTrack.timescale = timescale;
videoTrack.codec = codec;
}

if (initData.audio) {
const { id, timescale, codec } = initData.audio;
audioTrack.id = id;
audioTrack.timescale = timescale;
audioTrack.codec = codec;
}

captionTrack.id = RemuxerTrackIdConfig.text;
videoTrack.sampleDuration = 0;
videoTrack.duration = audioTrack.duration = trackDuration;
}

public resetContiguity(): void {}

static probe(data: Uint8Array) {
// ensure we find a moof box in the first 16 kB
return (
findBox({ data: data, start: 0, end: Math.min(data.length, 16384) }, [
'moof',
]).length > 0
);
data = data.length > 16384 ? data.subarray(0, 16384) : data;
return findBox(data, ['moof']).length > 0;
}

demux(data: Uint8Array, timeOffset: number): DemuxerResult {
public demux(data: Uint8Array, timeOffset: number): DemuxerResult {
this.timeOffset = timeOffset;
// Load all data into the avc track. The CMAF remuxer will look for the data in the samples object; the rest of the fields do not matter
let avcSamples = data;
const avcTrack = dummyTrack() as PassthroughVideoTrack;
let videoSamples = data;
const videoTrack = this.videoTrack as PassthroughTrack;
const textTrack = this.txtTrack as DemuxedUserdataTrack;
if (this.config.progressive) {
// Split the bytestream into two ranges: one encompassing all data up until the start of the last moof, and everything else.
// This is done to guarantee that we're sending valid data to MSE - when demuxing progressively, we have no guarantee
// that the fetch loader gives us flush moof+mdat pairs. If we push jagged data to MSE, it will throw an exception.
if (this.remainderData) {
avcSamples = appendUint8Array(this.remainderData, data);
videoSamples = appendUint8Array(this.remainderData, data);
}
const segmentedData = segmentValidRange(avcSamples);
const segmentedData = segmentValidRange(videoSamples);
this.remainderData = segmentedData.remainder;
avcTrack.samples = segmentedData.valid || new Uint8Array();
videoTrack.samples = segmentedData.valid || new Uint8Array();
} else {
avcTrack.samples = avcSamples;
videoTrack.samples = videoSamples;
}

const id3Track = dummyTrack() as DemuxedMetadataTrack;
const emsgs = findBox(avcTrack.samples, ['emsg']);
if (emsgs) {
id3Track.inputTimeScale = 1;
emsgs.forEach(({ data, start, end }) => {
const emsgInfo = parseEmsg(data.subarray(start, end));
if (emsgSchemePattern.test(emsgInfo.schemeIdUri)) {
const pts = Number.isFinite(emsgInfo.presentationTime)
? emsgInfo.presentationTime! / emsgInfo.timeScale
: timeOffset + emsgInfo.presentationTimeDelta! / emsgInfo.timeScale;
const payload = emsgInfo.payload;
id3Track.samples.push({
data: payload,
len: payload.byteLength,
dts: pts,
pts: pts,
});
}
});
}
const id3Track = this.extractID3Track(videoTrack, timeOffset);
textTrack.samples = parseSamples(timeOffset, videoTrack);

return {
audioTrack: dummyTrack() as DemuxedAudioTrack,
avcTrack,
videoTrack,
audioTrack: this.audioTrack as DemuxedAudioTrack,
id3Track,
textTrack: dummyTrack() as DemuxedUserdataTrack,
textTrack: this.txtTrack as DemuxedUserdataTrack,
};
}

flush() {
const avcTrack = dummyTrack() as PassthroughVideoTrack;
avcTrack.samples = this.remainderData || new Uint8Array();
public flush() {
const timeOffset = this.timeOffset;
const videoTrack = this.videoTrack as PassthroughTrack;
const textTrack = this.txtTrack as DemuxedUserdataTrack;
videoTrack.samples = this.remainderData || new Uint8Array();
this.remainderData = null;

const id3Track = this.extractID3Track(videoTrack, this.timeOffset);
textTrack.samples = parseSamples(timeOffset, videoTrack);

return {
videoTrack,
audioTrack: dummyTrack() as DemuxedAudioTrack,
avcTrack,
id3Track: dummyTrack() as DemuxedMetadataTrack,
id3Track,
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
}

private extractID3Track(
videoTrack: PassthroughTrack,
timeOffset: number
): DemuxedMetadataTrack {
const id3Track = this.id3Track as DemuxedMetadataTrack;
if (videoTrack.samples.length) {
const emsgs = findBox(videoTrack.samples, ['emsg']);
if (emsgs) {
emsgs.forEach((data: Uint8Array) => {
const emsgInfo = parseEmsg(data);
if (emsgSchemePattern.test(emsgInfo.schemeIdUri)) {
const pts = Number.isFinite(emsgInfo.presentationTime)
? emsgInfo.presentationTime! / emsgInfo.timeScale
: timeOffset +
emsgInfo.presentationTimeDelta! / emsgInfo.timeScale;
const payload = emsgInfo.payload;
id3Track.samples.push({
data: payload,
len: payload.byteLength,
dts: pts,
pts: pts,
});
}
});
}
}
return id3Track;
}

demuxSampleAes(
data: Uint8Array,
keyData: KeyData,
Expand Down

0 comments on commit 864b55d

Please sign in to comment.