video-dev · robwalch · May 22, 2021 · May 22, 2021
diff --git a/src/demux/aacdemuxer.ts b/src/demux/aacdemuxer.ts
@@ -70,13 +70,16 @@ class AACDemuxer extends BaseAudioDemuxer {
       offset,
       track.manifestCodec
     );
-    return ADTS.appendFrame(
+    const frame = ADTS.appendFrame(
       track,
       data,
       offset,
       this.initPTS as number,
       this.frameIndex
     );
+    if (frame && frame.missing === 0) {
+      return frame;
+    }
   }
 }
 

diff --git a/src/demux/adts.ts b/src/demux/adts.ts
@@ -6,7 +6,11 @@ import { logger } from '../utils/logger';
 import { ErrorTypes, ErrorDetails } from '../errors';
 import type { HlsEventEmitter } from '../events';
 import { Events } from '../events';
-import type { DemuxedAudioTrack, AppendedAudioFrame } from '../types/demuxer';
+import type {
+  DemuxedAudioTrack,
+  AudioFrame,
+  AudioSample,
+} from '../types/demuxer';
 
 type AudioConfig = {
   config: number[];
@@ -256,15 +260,13 @@ export function parseFrameHeader(
   frameIndex: number,
   frameDuration: number
 ): FrameHeader | void {
-  const length = data.length;
-
   // The protection skip bit tells us if we have 2 bytes of CRC data at the end of the ADTS header
   const headerLength = getHeaderLength(data, offset);
   // retrieve frame size
   let frameLength = getFullFrameLength(data, offset);
   frameLength -= headerLength;
 
-  if (frameLength > 0 && offset + headerLength + frameLength <= length) {
+  if (frameLength > 0) {
     const stamp = pts + frameIndex * frameDuration;
     // logger.log(`AAC frame, offset/length/total/pts:${offset+headerLength}/${frameLength}/${data.byteLength}/${(stamp/90).toFixed(0)}`);
     return { headerLength, frameLength, stamp };
@@ -277,25 +279,30 @@ export function appendFrame(
   offset: number,
   pts: number,
   frameIndex: number
-): AppendedAudioFrame | void {
+): AudioFrame | void {
   const frameDuration = getFrameDuration(track.samplerate as number);
   const header = parseFrameHeader(data, offset, pts, frameIndex, frameDuration);
   if (header) {
-    const stamp = header.stamp;
-    const headerLength = header.headerLength;
-    const frameLength = header.frameLength;
+    const { frameLength, headerLength, stamp } = header;
+    const length = headerLength + frameLength;
+    const missing = Math.max(0, offset + length - data.length);
+    // logger.log(`AAC frame ${frameIndex}, pts:${stamp} length@offset/total: ${frameLength}@${offset+headerLength}/${data.byteLength} missing: ${missing}`);
+    let unit: Uint8Array;
+    if (missing) {
+      unit = new Uint8Array(length - headerLength);
+      unit.set(data.subarray(offset + headerLength, data.length), 0);
+    } else {
+      unit = data.subarray(offset + headerLength, offset + length);
+    }
 
-    // logger.log(`AAC frame, offset/length/total/pts:${offset+headerLength}/${frameLength}/${data.byteLength}/${(stamp/90).toFixed(0)}`);
-    const aacSample = {
-      unit: data.subarray(
-        offset + headerLength,
-        offset + headerLength + frameLength
-      ),
+    const sample: AudioSample = {
+      unit,
       pts: stamp,
-      dts: stamp,
     };
+    if (!missing) {
+      track.samples.push(sample as AudioSample);
+    }
 
-    track.samples.push(aacSample);
-    return { sample: aacSample, length: frameLength + headerLength };
+    return { sample, length, missing };
   }
 }
diff --git a/src/demux/base-audio-demuxer.ts b/src/demux/base-audio-demuxer.ts
@@ -3,7 +3,7 @@ import type {
   DemuxerResult,
   Demuxer,
   DemuxedAudioTrack,
-  AppendedAudioFrame,
+  AudioFrame,
   DemuxedMetadataTrack,
   DemuxedAvcTrack,
   DemuxedUserdataTrack,
@@ -44,7 +44,7 @@ class BaseAudioDemuxer implements Demuxer {
     track: DemuxedAudioTrack,
     data: Uint8Array,
     offset: number
-  ): AppendedAudioFrame | void {}
+  ): AudioFrame | void {}
 
   // feed incoming data to the front of the parsing pipeline
   demux(data: Uint8Array, timeOffset: number): DemuxerResult {

diff --git a/src/demux/mpegaudio.ts b/src/demux/mpegaudio.ts
@@ -82,7 +82,7 @@ export function appendFrame(
     track.samplerate = header.sampleRate;
     track.samples.push(sample);
 
-    return { sample, length: header.frameLength };
+    return { sample, length: header.frameLength, missing: 0 };
   }
 }
 

diff --git a/src/demux/tsdemuxer.ts b/src/demux/tsdemuxer.ts
@@ -32,6 +32,7 @@ import type {
   ElementaryStreamData,
   KeyData,
 } from '../types/demuxer';
+import { AudioFrame } from '../types/demuxer';
 
 // We are using fixed track IDs for driving the MP4 remuxer
 // instead of following the TS PIDs.
@@ -87,7 +88,7 @@ class TSDemuxer implements Demuxer {
   private _audioTrack!: DemuxedAudioTrack;
   private _id3Track!: DemuxedMetadataTrack;
   private _txtTrack!: DemuxedUserdataTrack;
-  private aacOverFlow: Uint8Array | null = null;
+  private aacOverFlow: AudioFrame | null = null;
   private avcSample: ParsedAvcSample | null = null;
   private remainderData: Uint8Array | null = null;
 
@@ -934,17 +935,23 @@ class TSDemuxer implements Demuxer {
   }
 
   private parseAACPES(pes: PES) {
-    const startOffset = 0;
+    let startOffset = 0;
     const track = this._audioTrack;
-    const aacLastPTS = this.aacLastPTS;
     const aacOverFlow = this.aacOverFlow;
-    let data = pes.data;
+    const data = pes.data;
     if (aacOverFlow) {
-      const tmp = new Uint8Array(aacOverFlow.byteLength + data.byteLength);
-      tmp.set(aacOverFlow, 0);
-      tmp.set(data, aacOverFlow.byteLength);
-      // logger.log(`AAC: append overflowing ${aacOverFlow.byteLength} bytes to beginning of new PES`);
-      data = tmp;
+      this.aacOverFlow = null;
+      const frameMissingBytes = aacOverFlow.missing;
+      const frameOverflowBytes =
+        aacOverFlow.sample.unit.byteLength - frameMissingBytes;
+      aacOverFlow.sample.unit.set(
+        data.subarray(0, frameMissingBytes),
+        frameOverflowBytes
+      );
+      track.samples.push(aacOverFlow.sample);
+
+      // logger.log(`AAC: append overflowing ${frameOverflowBytes} bytes to beginning of new PES`);
+      startOffset = frameMissingBytes;
     }
     // look for ADTS header (0xFFFx)
     let offset: number;
@@ -955,7 +962,7 @@ class TSDemuxer implements Demuxer {
       }
     }
     // if ADTS header does not start straight from the beginning of the PES payload, raise an error
-    if (offset) {
+    if (offset !== startOffset) {
       let reason;
       let fatal;
       if (offset < len - 1) {
@@ -979,43 +986,33 @@ class TSDemuxer implements Demuxer {
 
     ADTS.initTrackConfig(track, this.observer, data, offset, this.audioCodec);
 
-    let frameIndex = 0;
-    const frameDuration = ADTS.getFrameDuration(track.samplerate as number);
-
-    // if last AAC frame is overflowing, we should ensure timestamps are contiguous:
-    // first sample PTS should be equal to last sample PTS + frameDuration
     let pts: number;
     if (pes.pts !== undefined) {
       pts = pes.pts;
-    } else if (aacLastPTS !== null) {
-      pts = aacLastPTS;
+    } else if (aacOverFlow) {
+      // if last AAC frame is overflowing, we should ensure timestamps are contiguous:
+      // first sample PTS should be equal to last sample PTS + frameDuration
+      const frameDuration = ADTS.getFrameDuration(track.samplerate as number);
+      pts = aacOverFlow.sample.pts + frameDuration;
     } else {
       logger.warn('[tsdemuxer]: AAC PES unknown PTS');
       return;
     }
-    if (aacOverFlow && aacLastPTS !== null) {
-      const newPTS = aacLastPTS + frameDuration;
-      if (Math.abs(newPTS - pts) > 1) {
-        logger.log(
-          `[tsdemuxer]: AAC: align PTS for overlapping frames by ${Math.round(
-            (newPTS - pts) / 90
-          )}`
-        );
-        pts = newPTS;
-      }
-    }
 
     // scan for aac samples
-    let stamp: number | null = null;
+    let frameIndex = 0;
     while (offset < len) {
       if (ADTS.isHeader(data, offset)) {
         if (offset + 5 < len) {
           const frame = ADTS.appendFrame(track, data, offset, pts, frameIndex);
           if (frame) {
-            offset += frame.length;
-            stamp = frame.sample.pts;
-            frameIndex++;
-            continue;
+            if (frame.missing) {
+              this.aacOverFlow = frame;
+            } else {
+              offset += frame.length;
+              frameIndex++;
+              continue;
+            }
           }
         }
         // We are at an ADTS header, but do not have enough data for a frame
@@ -1026,9 +1023,6 @@ class TSDemuxer implements Demuxer {
         offset++;
       }
     }
-
-    this.aacOverFlow = offset < len ? data.subarray(offset, len) : null;
-    this.aacLastPTS = stamp;
   }
 
   private parseMPEGPES(pes: PES) {

diff --git a/src/remux/mp4-remuxer.ts b/src/remux/mp4-remuxer.ts
@@ -711,10 +711,7 @@ export default class MP4Remuxer implements Remuxer {
 
     // compute normalized PTS
     inputSamples.forEach(function (sample) {
-      sample.pts = sample.dts = normalizePts(
-        sample.pts - initPTS,
-        timeOffsetMpegTS
-      );
+      sample.pts = normalizePts(sample.pts - initPTS, timeOffsetMpegTS);
     });
 
     if (!contiguous || nextAudioPts < 0) {
@@ -817,13 +814,12 @@ export default class MP4Remuxer implements Remuxer {
             inputSamples.splice(i, 0, {
               unit: fillFrame,
               pts: newStamp,
-              dts: newStamp,
             });
             nextPts += inputSampleDuration;
             i++;
           }
         }
-        sample.pts = sample.dts = nextPts;
+        sample.pts = nextPts;
         nextPts += inputSampleDuration;
       }
     }

diff --git a/src/types/demuxer.ts b/src/types/demuxer.ts
@@ -114,12 +114,12 @@ export interface AvcSampleUnit {
 export type AudioSample = {
   unit: Uint8Array;
   pts: number;
-  dts: number;
 };
 
-export type AppendedAudioFrame = {
+export type AudioFrame = {
   sample: AudioSample;
   length: number;
+  missing: number;
 };
 
 export interface ElementaryStreamData {

diff --git a/tests/unit/demuxer/adts.js b/tests/unit/demuxer/adts.js
@@ -381,20 +381,11 @@ describe('parseFrameHeader', function () {
     });
   });
 
-  it('should return undefined if there is only the header part', function () {
-    const data = new Uint8Array(new ArrayBuffer(9));
-    data[0] = 0xff;
-    data[1] = 0xf0; // protection_absent = 0
-    data[4] = 0x01;
-    data[5] = 0x40; // frame_length is 9
-    expect(parseFrameHeader(data, 0, 0, 0, 0)).to.be.undefined;
-  });
-
-  it('should return undefined if data does not contain the entire frame', function () {
+  it('should return undefined if frame length is 0', function () {
     const data = new Uint8Array(new ArrayBuffer(12));
     data[0] = 0xff;
     data[1] = 0xf0; // protection_absent = 0
-    data[4] = 0x02; // frame_length is 16
+    data[4] = 0x00; // frame_length is 0
     expect(parseFrameHeader(data, 0, 0, 0, 0)).to.be.undefined;
   });
 });
@@ -411,29 +402,41 @@ describe('appendFrame', function () {
     data[1] = 0xf0; // protection_absent = 0
     data[4] = 0x02; // frame_length is 16
 
-    expect(appendFrame(track, data, 0, 0, 0)).to.deep.equal({
+    const frame = appendFrame(track, data, 0, 0, 0);
+    expect(frame, JSON.stringify(frame)).to.deep.equal({
       sample: {
         unit: data.subarray(9, 16),
         pts: 0,
-        dts: 0,
       },
       length: 16,
+      missing: 0,
     });
     expect(track.samples.length).to.equal(1);
   });
 
-  it('should not append sample if `parseFrameHeader` fails', function () {
+  it('should not append sample when incomplete (aac overflow or progressive streaming)', function () {
     const track = {
       samplerate: 64000,
       samples: [],
       len: 0,
     };
-    const data = new Uint8Array(new ArrayBuffer(12));
+    const data = new Uint8Array(new ArrayBuffer(20));
     data[0] = 0xff;
     data[1] = 0xf0; // protection_absent = 0
-    data[4] = 0x02; // frame_length is 16
+    data[4] = 0x03; // frame_length is 24
 
-    expect(appendFrame(track, data, 0, 0, 0)).to.be.undefined;
+    const frame = appendFrame(track, data, 0, 0, 0);
+    const unit = new Uint8Array(15);
+    unit.set(data.subarray(9, 20), 0);
+
+    expect(frame, JSON.stringify(frame)).to.deep.equal({
+      sample: {
+        unit,
+        pts: 0,
+      },
+      length: 24,
+      missing: 4,
+    });
     expect(track.samples.length).to.equal(0);
   });
 });