Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for CMAF SEI Captions #4459

Merged
merged 1 commit into from
Feb 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Expand Up @@ -113,7 +113,6 @@ The following tags are added to their respective fragment's attribute list but a

For a complete list of issues, see ["Top priorities" in the Release Planning and Backlog project tab](https://github.com/video-dev/hls.js/projects/6). Codec support is dependent on the runtime environment (for example, not all browsers on the same OS support HEVC).

- CMAF CC support [#2623](https://github.com/video-dev/hls.js/issues/2623)
- `#EXT-X-DATERANGE` in "metadata" TextTracks [#2218](https://github.com/video-dev/hls.js/issues/2218)
- `#EXT-X-GAP` filling [#2940](https://github.com/video-dev/hls.js/issues/2940)
- `#EXT-X-I-FRAME-STREAM-INF` I-frame Media Playlist files
Expand Down
12 changes: 11 additions & 1 deletion api-extractor/report/hls.js.api.md
Expand Up @@ -2143,9 +2143,19 @@ export type TSDemuxerConfig = {
// @public (undocumented)
export interface UserdataSample {
// (undocumented)
bytes: Uint8Array;
bytes?: Uint8Array;
// (undocumented)
payloadType?: number;
// (undocumented)
pts: number;
// (undocumented)
type?: number;
// (undocumented)
userData?: string;
// (undocumented)
userDataBytes?: Uint8Array;
// (undocumented)
uuid?: string;
}

// Warnings were encountered during analysis:
Expand Down
16 changes: 9 additions & 7 deletions src/controller/timeline-controller.ts
Expand Up @@ -664,23 +664,25 @@ export class TimelineController implements ComponentAPI {
}

private extractCea608Data(byteArray: Uint8Array): number[][] {
const count = byteArray[0] & 31;
let position = 2;
const actualCCBytes: number[][] = [[], []];
const count = byteArray[0] & 0x1f;
let position = 2;

for (let j = 0; j < count; j++) {
const tmpByte = byteArray[position++];
const ccbyte1 = 0x7f & byteArray[position++];
const ccbyte2 = 0x7f & byteArray[position++];
const ccValid = (4 & tmpByte) !== 0;
const ccType = 3 & tmpByte;

if (ccbyte1 === 0 && ccbyte2 === 0) {
continue;
}

const ccValid = (0x04 & tmpByte) !== 0; // Support all four channels
if (ccValid) {
if (ccType === 0 || ccType === 1) {
const ccType = 0x03 & tmpByte;
if (
0x00 /* CEA608 field1*/ === ccType ||
0x01 /* CEA608 field2*/ === ccType
) {
// Exclude CEA708 CC data.
actualCCBytes[ccType].push(ccbyte1);
actualCCBytes[ccType].push(ccbyte2);
}
Expand Down
11 changes: 8 additions & 3 deletions src/demux/aacdemuxer.ts
Expand Up @@ -19,8 +19,13 @@ class AACDemuxer extends BaseAudioDemuxer {
this.config = config;
}

resetInitSegment(audioCodec, videoCodec, duration) {
super.resetInitSegment(audioCodec, videoCodec, duration);
resetInitSegment(
initSegment: Uint8Array | undefined,
audioCodec: string | undefined,
videoCodec: string | undefined,
trackDuration: number
) {
super.resetInitSegment(initSegment, audioCodec, videoCodec, trackDuration);
this._audioTrack = {
container: 'audio/adts',
type: 'audio',
Expand All @@ -30,7 +35,7 @@ class AACDemuxer extends BaseAudioDemuxer {
isAAC: true,
samples: [],
manifestCodec: audioCodec,
duration: duration,
duration: trackDuration,
inputTimeScale: 90000,
dropped: 0,
};
Expand Down
13 changes: 9 additions & 4 deletions src/demux/base-audio-demuxer.ts
Expand Up @@ -5,7 +5,7 @@ import type {
DemuxedAudioTrack,
AudioFrame,
DemuxedMetadataTrack,
DemuxedAvcTrack,
DemuxedVideoTrack,
DemuxedUserdataTrack,
KeyData,
} from '../types/demuxer';
Expand All @@ -20,7 +20,12 @@ class BaseAudioDemuxer implements Demuxer {
protected cachedData: Uint8Array | null = null;
protected initPTS: number | null = null;

resetInitSegment(audioCodec: string, videoCodec: string, duration: number) {
resetInitSegment(
initSegment: Uint8Array | undefined,
audioCodec: string | undefined,
videoCodec: string | undefined,
trackDuration: number
) {
this._id3Track = {
type: 'id3',
id: 3,
Expand Down Expand Up @@ -109,7 +114,7 @@ class BaseAudioDemuxer implements Demuxer {

return {
audioTrack: track,
avcTrack: dummyTrack() as DemuxedAvcTrack,
videoTrack: dummyTrack() as DemuxedVideoTrack,
id3Track,
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
Expand Down Expand Up @@ -137,7 +142,7 @@ class BaseAudioDemuxer implements Demuxer {

return {
audioTrack: this._audioTrack,
avcTrack: dummyTrack() as DemuxedAvcTrack,
videoTrack: dummyTrack() as DemuxedVideoTrack,
id3Track: this._id3Track,
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
Expand Down
6 changes: 3 additions & 3 deletions src/demux/dummy-demuxed-track.ts
@@ -1,11 +1,11 @@
import type { DemuxedTrack } from '../types/demuxer';

export function dummyTrack(): DemuxedTrack {
export function dummyTrack(type = '', inputTimeScale = 90000): DemuxedTrack {
return {
type: '',
type,
id: -1,
pid: -1,
inputTimeScale: 90000,
inputTimeScale,
sequenceNumber: -1,
samples: [],
dropped: 0,
Expand Down
11 changes: 8 additions & 3 deletions src/demux/mp3demuxer.ts
Expand Up @@ -9,8 +9,13 @@ import * as MpegAudio from './mpegaudio';
class MP3Demuxer extends BaseAudioDemuxer {
static readonly minProbeByteLength: number = 4;

resetInitSegment(audioCodec, videoCodec, duration) {
super.resetInitSegment(audioCodec, videoCodec, duration);
resetInitSegment(
initSegment: Uint8Array | undefined,
audioCodec: string | undefined,
videoCodec: string | undefined,
trackDuration: number
) {
super.resetInitSegment(initSegment, audioCodec, videoCodec, trackDuration);
this._audioTrack = {
container: 'audio/mpeg',
type: 'audio',
Expand All @@ -20,7 +25,7 @@ class MP3Demuxer extends BaseAudioDemuxer {
isAAC: false,
samples: [],
manifestCodec: audioCodec,
duration: duration,
duration: trackDuration,
inputTimeScale: 90000,
dropped: 0,
};
Expand Down
153 changes: 108 additions & 45 deletions src/demux/mp4demuxer.ts
Expand Up @@ -4,7 +4,7 @@
import {
Demuxer,
DemuxerResult,
PassthroughVideoTrack,
PassthroughTrack,
DemuxedAudioTrack,
DemuxedUserdataTrack,
DemuxedMetadataTrack,
Expand All @@ -15,6 +15,9 @@ import {
segmentValidRange,
appendUint8Array,
parseEmsg,
parseSamples,
parseInitSegment,
RemuxerTrackIdConfig,
} from '../utils/mp4-tools';
import { dummyTrack } from './dummy-demuxed-track';
import type { HlsEventEmitter } from '../events';
Expand All @@ -25,87 +28,147 @@ const emsgSchemePattern = /\/emsg[-/]ID3/i;
class MP4Demuxer implements Demuxer {
static readonly minProbeByteLength = 1024;
private remainderData: Uint8Array | null = null;
private timeOffset: number = 0;
private config: HlsConfig;
private videoTrack?: PassthroughTrack;
private audioTrack?: DemuxedAudioTrack;
private id3Track?: DemuxedMetadataTrack;
private txtTrack?: DemuxedUserdataTrack;

constructor(observer: HlsEventEmitter, config: HlsConfig) {
this.config = config;
}

resetTimeStamp() {}
public resetTimeStamp() {}

resetInitSegment() {}
public resetInitSegment(
initSegment: Uint8Array,
audioCodec: string | undefined,
videoCodec: string | undefined,
trackDuration: number
) {
const initData = parseInitSegment(initSegment);
const videoTrack = (this.videoTrack = dummyTrack(
'video',
1
) as PassthroughTrack);
const audioTrack = (this.audioTrack = dummyTrack(
'audio',
1
) as DemuxedAudioTrack);
const captionTrack = (this.txtTrack = dummyTrack(
'text',
1
) as DemuxedUserdataTrack);

resetContiguity(): void {}
this.id3Track = dummyTrack('id3', 1) as DemuxedMetadataTrack;
this.timeOffset = 0;

static probe(data) {
if (initData.video) {
const { id, timescale, codec } = initData.video;
videoTrack.id = id;
videoTrack.timescale = captionTrack.timescale = timescale;
videoTrack.codec = codec;
}

if (initData.audio) {
const { id, timescale, codec } = initData.audio;
audioTrack.id = id;
audioTrack.timescale = timescale;
audioTrack.codec = codec;
}

captionTrack.id = RemuxerTrackIdConfig.text;
videoTrack.sampleDuration = 0;
videoTrack.duration = audioTrack.duration = trackDuration;
}

public resetContiguity(): void {}

static probe(data: Uint8Array) {
// ensure we find a moof box in the first 16 kB
return (
findBox({ data: data, start: 0, end: Math.min(data.length, 16384) }, [
'moof',
]).length > 0
);
data = data.length > 16384 ? data.subarray(0, 16384) : data;
return findBox(data, ['moof']).length > 0;
}

demux(data: Uint8Array, timeOffset: number): DemuxerResult {
public demux(data: Uint8Array, timeOffset: number): DemuxerResult {
this.timeOffset = timeOffset;
// Load all data into the avc track. The CMAF remuxer will look for the data in the samples object; the rest of the fields do not matter
let avcSamples = data;
const avcTrack = dummyTrack() as PassthroughVideoTrack;
let videoSamples = data;
const videoTrack = this.videoTrack as PassthroughTrack;
const textTrack = this.txtTrack as DemuxedUserdataTrack;
if (this.config.progressive) {
// Split the bytestream into two ranges: one encompassing all data up until the start of the last moof, and everything else.
// This is done to guarantee that we're sending valid data to MSE - when demuxing progressively, we have no guarantee
// that the fetch loader gives us flush moof+mdat pairs. If we push jagged data to MSE, it will throw an exception.
if (this.remainderData) {
avcSamples = appendUint8Array(this.remainderData, data);
videoSamples = appendUint8Array(this.remainderData, data);
}
const segmentedData = segmentValidRange(avcSamples);
const segmentedData = segmentValidRange(videoSamples);
this.remainderData = segmentedData.remainder;
avcTrack.samples = segmentedData.valid || new Uint8Array();
videoTrack.samples = segmentedData.valid || new Uint8Array();
} else {
avcTrack.samples = avcSamples;
videoTrack.samples = videoSamples;
}

const id3Track = dummyTrack() as DemuxedMetadataTrack;
const emsgs = findBox(avcTrack.samples, ['emsg']);
if (emsgs) {
id3Track.inputTimeScale = 1;
emsgs.forEach(({ data, start, end }) => {
const emsgInfo = parseEmsg(data.subarray(start, end));
if (emsgSchemePattern.test(emsgInfo.schemeIdUri)) {
const pts = Number.isFinite(emsgInfo.presentationTime)
? emsgInfo.presentationTime! / emsgInfo.timeScale
: timeOffset + emsgInfo.presentationTimeDelta! / emsgInfo.timeScale;
const payload = emsgInfo.payload;
id3Track.samples.push({
data: payload,
len: payload.byteLength,
dts: pts,
pts: pts,
});
}
});
}
const id3Track = this.extractID3Track(videoTrack, timeOffset);
textTrack.samples = parseSamples(timeOffset, videoTrack);

return {
audioTrack: dummyTrack() as DemuxedAudioTrack,
avcTrack,
videoTrack,
audioTrack: this.audioTrack as DemuxedAudioTrack,
id3Track,
textTrack: dummyTrack() as DemuxedUserdataTrack,
textTrack: this.txtTrack as DemuxedUserdataTrack,
};
}

flush() {
const avcTrack = dummyTrack() as PassthroughVideoTrack;
avcTrack.samples = this.remainderData || new Uint8Array();
public flush() {
const timeOffset = this.timeOffset;
const videoTrack = this.videoTrack as PassthroughTrack;
const textTrack = this.txtTrack as DemuxedUserdataTrack;
videoTrack.samples = this.remainderData || new Uint8Array();
this.remainderData = null;

const id3Track = this.extractID3Track(videoTrack, this.timeOffset);
textTrack.samples = parseSamples(timeOffset, videoTrack);

return {
videoTrack,
audioTrack: dummyTrack() as DemuxedAudioTrack,
avcTrack,
id3Track: dummyTrack() as DemuxedMetadataTrack,
id3Track,
textTrack: dummyTrack() as DemuxedUserdataTrack,
};
}

private extractID3Track(
videoTrack: PassthroughTrack,
timeOffset: number
): DemuxedMetadataTrack {
const id3Track = this.id3Track as DemuxedMetadataTrack;
if (videoTrack.samples.length) {
const emsgs = findBox(videoTrack.samples, ['emsg']);
if (emsgs) {
emsgs.forEach((data: Uint8Array) => {
const emsgInfo = parseEmsg(data);
if (emsgSchemePattern.test(emsgInfo.schemeIdUri)) {
const pts = Number.isFinite(emsgInfo.presentationTime)
? emsgInfo.presentationTime! / emsgInfo.timeScale
: timeOffset +
emsgInfo.presentationTimeDelta! / emsgInfo.timeScale;
const payload = emsgInfo.payload;
id3Track.samples.push({
data: payload,
len: payload.byteLength,
dts: pts,
pts: pts,
});
}
});
}
}
return id3Track;
}

demuxSampleAes(
data: Uint8Array,
keyData: KeyData,
Expand Down