Merge pull request #3901 from video-dev/bugfix/dai-overlapping-audio-…

…frames-in-tracks Fix audio drift in DAI streams with overlapping AAC samples
video-dev · May 17, 2021 · aa9bac1 · aa9bac1
2 parents 837314a + 74ac100
commit aa9bac1
Show file tree

Hide file tree

Showing 6 changed files with 57 additions and 101 deletions.
diff --git a/demo/chart/timeline-chart.ts b/demo/chart/timeline-chart.ts
@@ -107,7 +107,7 @@ export class TimelineChart {
       if (pos.x > chartArea.left - 11) {
         const scale = this.chartScales[X_AXIS_SECONDS];
         if (event.deltaY) {
-          const direction = event.deltaY / Math.abs(event.deltaY);
+          const direction = -event.deltaY / Math.abs(event.deltaY);
           const normal = Math.min(333, Math.abs(event.deltaY)) / 1000;
           const ease = 1 - (1 - normal) * (1 - normal);
           this.zoom(scale, pos, ease * direction);

diff --git a/src/demux/transmuxer-interface.ts b/src/demux/transmuxer-interface.ts
@@ -14,6 +14,7 @@ import type { ChunkMetadata, TransmuxerResult } from '../types/transmuxer';
 import type Hls from '../hls';
 import type { HlsEventEmitter } from '../events';
 import type { PlaylistLevelType } from '../types/loader';
+import type { TypeSupported } from './tsdemuxer';
 
 const MediaSource = getMediaSource() || { isTypeSupported: () => false };
 
@@ -54,7 +55,7 @@ export default class TransmuxerInterface {
     this.observer.on(Events.FRAG_DECRYPTED, forwardMessage);
     this.observer.on(Events.ERROR, forwardMessage);
 
-    const typeSupported = {
+    const typeSupported: TypeSupported = {
       mp4: MediaSource.isTypeSupported('video/mp4'),
       mpeg: MediaSource.isTypeSupported('audio/mpeg'),
       mp3: MediaSource.isTypeSupported('audio/mp4; codecs="mp3"'),
@@ -102,7 +103,8 @@ export default class TransmuxerInterface {
           this.observer,
           typeSupported,
           config,
-          vendor
+          vendor,
+          id
         );
         this.worker = null;
       }
@@ -111,7 +113,8 @@ export default class TransmuxerInterface {
         this.observer,
         typeSupported,
         config,
-        vendor
+        vendor,
+        id
       );
     }
   }

diff --git a/src/demux/transmuxer-worker.ts b/src/demux/transmuxer-worker.ts
@@ -24,7 +24,8 @@ export default function TransmuxerWorker(self) {
           observer,
           data.typeSupported,
           config,
-          data.vendor
+          data.vendor,
+          data.id
         );
         enableLogs(config.debug);
         forwardMessage('init', null);

diff --git a/src/demux/transmuxer.ts b/src/demux/transmuxer.ts
@@ -4,19 +4,19 @@ import { ErrorTypes, ErrorDetails } from '../errors';
 import Decrypter from '../crypt/decrypter';
 import AACDemuxer from '../demux/aacdemuxer';
 import MP4Demuxer from '../demux/mp4demuxer';
-import TSDemuxer from '../demux/tsdemuxer';
+import TSDemuxer, { TypeSupported } from '../demux/tsdemuxer';
 import MP3Demuxer from '../demux/mp3demuxer';
 import MP4Remuxer from '../remux/mp4-remuxer';
 import PassThroughRemuxer from '../remux/passthrough-remuxer';
-import type { Demuxer, KeyData } from '../types/demuxer';
-import type { Remuxer } from '../types/remuxer';
-import type { TransmuxerResult, ChunkMetadata } from '../types/transmuxer';
 import ChunkCache from './chunk-cache';
 import { appendUint8Array } from '../utils/mp4-tools';
-
 import { logger } from '../utils/logger';
+import type { Demuxer, KeyData } from '../types/demuxer';
+import type { Remuxer } from '../types/remuxer';
+import type { TransmuxerResult, ChunkMetadata } from '../types/transmuxer';
 import type { HlsConfig } from '../config';
-import { LevelKey } from '../loader/level-key';
+import type { LevelKey } from '../loader/level-key';
+import type { PlaylistLevelType } from '../types/loader';
 
 let now;
 // performance.now() not available on WebWorker, at least on Safari Desktop
@@ -47,9 +47,10 @@ muxConfig.forEach(({ demux }) => {
 
 export default class Transmuxer {
   private observer: HlsEventEmitter;
-  private typeSupported: any;
+  private typeSupported: TypeSupported;
   private config: HlsConfig;
-  private vendor: any;
+  private vendor: string;
+  private id: PlaylistLevelType;
   private demuxer?: Demuxer;
   private remuxer?: Remuxer;
   private decrypter?: Decrypter;
@@ -61,14 +62,16 @@ export default class Transmuxer {
 
   constructor(
     observer: HlsEventEmitter,
-    typeSupported,
+    typeSupported: TypeSupported,
     config: HlsConfig,
-    vendor
+    vendor: string,
+    id: PlaylistLevelType
   ) {
     this.observer = observer;
     this.typeSupported = typeSupported;
     this.config = config;
     this.vendor = vendor;
+    this.id = id;
   }
 
   configure(transmuxConfig: TransmuxConfig) {
@@ -258,7 +261,8 @@ export default class Transmuxer {
       textTrack,
       timeOffset,
       accurateTimeOffset,
-      true
+      true,
+      this.id
     );
     transmuxResults.push({
       remuxResult,
@@ -354,7 +358,8 @@ export default class Transmuxer {
       textTrack,
       timeOffset,
       accurateTimeOffset,
-      false
+      false,
+      this.id
     );
     return {
       remuxResult,
@@ -379,7 +384,8 @@ export default class Transmuxer {
           demuxResult.textTrack,
           timeOffset,
           accurateTimeOffset,
-          false
+          false,
+          this.id
         );
         return {
           remuxResult,

diff --git a/src/remux/mp4-remuxer.ts b/src/remux/mp4-remuxer.ts
@@ -12,6 +12,8 @@ import {
   RemuxedTrack,
   RemuxedUserdata,
 } from '../types/remuxer';
+import { PlaylistLevelType } from '../types/loader';
+import { toMsFromMpegTsClock } from '../utils/timescale-conversion';
 import type {
   AudioSample,
   AvcSample,
@@ -24,7 +26,6 @@ import type { TrackSet } from '../types/track';
 import type { SourceBufferName } from '../types/buffer';
 import type { Fragment } from '../loader/fragment';
 import type { HlsConfig } from '../config';
-import { toMsFromMpegTsClock } from '../utils/timescale-conversion';
 
 const MAX_SILENT_FRAME_DURATION = 10 * 1000; // 10 seconds
 const AAC_SAMPLES_PER_FRAME = 1024;
@@ -116,7 +117,8 @@ export default class MP4Remuxer implements Remuxer {
     textTrack: DemuxedUserdataTrack,
     timeOffset: number,
     accurateTimeOffset: boolean,
-    flush: boolean
+    flush: boolean,
+    playlistType: PlaylistLevelType
   ): RemuxerResult {
     let video: RemuxedTrack | undefined;
     let audio: RemuxedTrack | undefined;
@@ -202,7 +204,11 @@ export default class MP4Remuxer implements Remuxer {
             audioTimeOffset,
             this.isAudioContiguous,
             accurateTimeOffset,
-            enoughVideoSamples ? videoTimeOffset : undefined
+            hasVideo ||
+              enoughVideoSamples ||
+              playlistType === PlaylistLevelType.AUDIO
+              ? videoTimeOffset
+              : undefined
           );
           if (enoughVideoSamples) {
             const audioTrackLength = audio ? audio.endPTS - audio.startPTS : 0;
@@ -741,8 +747,9 @@ export default class MP4Remuxer implements Remuxer {
     // frame.
 
     if (track.isAAC) {
+      const alignedWithVideo = videoTimeOffset !== undefined;
       const maxAudioFramesDrift = this.config.maxAudioFramesDrift;
-      for (let i = 0, nextPts = nextAudioPts; i < inputSamples.length; ) {
+      for (let i = 0, nextPts = nextAudioPts; i < inputSamples.length; i++) {
         // First, let's see how far off this frame is from where we expect it to be
         const sample = inputSamples[i];
         const pts = sample.pts;
@@ -752,29 +759,19 @@ export default class MP4Remuxer implements Remuxer {
         // When remuxing with video, if we're overlapping by more than a duration, drop this sample to stay in sync
         if (
           delta <= -maxAudioFramesDrift * inputSampleDuration &&
-          videoTimeOffset !== undefined
+          alignedWithVideo
         ) {
-          if (contiguous || i > 0) {
-            logger.warn(
-              `[mp4-remuxer]: Dropping 1 audio frame @ ${(
-                nextPts / inputTimeScale
-              ).toFixed(3)}s due to ${Math.round(duration)} ms overlap.`
-            );
-            inputSamples.splice(i, 1);
-            // Don't touch nextPtsNorm or i
-          } else {
-            // When changing qualities we can't trust that audio has been appended up to nextAudioPts
-            // Warn about the overlap but do not drop samples as that can introduce buffer gaps
+          if (i === 0) {
             logger.warn(
               `Audio frame @ ${(pts / inputTimeScale).toFixed(
                 3
               )}s overlaps nextAudioPts by ${Math.round(
                 (1000 * delta) / inputTimeScale
               )} ms.`
             );
-            nextPts = pts + inputSampleDuration;
-            i++;
+            this.nextAudioPts = nextAudioPts = pts;
           }
+          nextPts = pts;
         } // eslint-disable-line brace-style
 
         // Insert missing frames if:
@@ -785,12 +782,19 @@ export default class MP4Remuxer implements Remuxer {
         else if (
           delta >= maxAudioFramesDrift * inputSampleDuration &&
           duration < MAX_SILENT_FRAME_DURATION &&
-          videoTimeOffset !== undefined
+          alignedWithVideo
         ) {
-          const missing = Math.floor(delta / inputSampleDuration);
+          let missing = Math.round(delta / inputSampleDuration);
           // Adjust nextPts so that silent samples are aligned with media pts. This will prevent media samples from
           // later being shifted if nextPts is based on timeOffset and delta is not a multiple of inputSampleDuration.
           nextPts = pts - missing * inputSampleDuration;
+          if (nextPts < 0) {
+            missing--;
+            nextPts += inputSampleDuration;
+          }
+          if (i === 0) {
+            this.nextAudioPts = nextAudioPts = nextPts;
+          }
           logger.warn(
             `[mp4-remuxer]: Injecting ${missing} audio frame @ ${(
               nextPts / inputTimeScale
@@ -818,17 +822,9 @@ export default class MP4Remuxer implements Remuxer {
             nextPts += inputSampleDuration;
             i++;
           }
-
-          // Adjust sample to next expected pts
-          sample.pts = sample.dts = nextPts;
-          nextPts += inputSampleDuration;
-          i++;
-        } else {
-          // Otherwise, just adjust pts
-          sample.pts = sample.dts = nextPts;
-          nextPts += inputSampleDuration;
-          i++;
         }
+        sample.pts = sample.dts = nextPts;
+        nextPts += inputSampleDuration;
       }
     }
     let firstPTS: number | null = null;
@@ -849,42 +845,7 @@ export default class MP4Remuxer implements Remuxer {
         const prevSample = outputSamples[j - 1];
         prevSample.duration = Math.round((pts - lastPTS) / scaleFactor);
       } else {
-        const delta = Math.round(
-          (1000 * (pts - nextAudioPts)) / inputTimeScale
-        );
-        let numMissingFrames = 0;
-        // if fragment are contiguous, detect hole/overlapping between fragments
-        // contiguous fragments are consecutive fragments from same quality level (same level, new SN = old SN + 1)
         if (contiguous && track.isAAC) {
-          if (delta > 0 && delta < MAX_SILENT_FRAME_DURATION) {
-            numMissingFrames = Math.round(
-              (pts - nextAudioPts) / inputSampleDuration
-            );
-            logger.log(
-              `[mp4-remuxer]: ${delta} ms hole between AAC samples detected,filling it`
-            );
-            if (numMissingFrames > 0) {
-              fillFrame = AAC.getSilentFrame(
-                track.manifestCodec || track.codec,
-                track.channelCount
-              );
-              if (!fillFrame) {
-                fillFrame = unit.subarray();
-              }
-
-              mdatSize += numMissingFrames * fillFrame.length;
-            }
-            // if we have frame overlap, overlapping for more than half a frame duraion
-          } else if (delta < -12) {
-            // drop overlapping audio frames... browser will deal with it
-            logger.log(
-              `[mp4-remuxer]: drop overlapping AAC sample, expected/parsed/delta:${(
-                nextAudioPts / inputTimeScale
-              ).toFixed(3)}s/${(pts / inputTimeScale).toFixed(3)}s/${-delta}ms`
-            );
-            mdatSize -= unit.byteLength;
-            continue;
-          }
           // set PTS/DTS to expected PTS/DTS
           pts = nextAudioPts;
         }
@@ -915,23 +876,6 @@ export default class MP4Remuxer implements Remuxer {
           // no audio samples
           return;
         }
-        for (let i = 0; i < numMissingFrames; i++) {
-          fillFrame = AAC.getSilentFrame(
-            track.manifestCodec || track.codec,
-            track.channelCount
-          );
-          if (!fillFrame) {
-            logger.log(
-              '[mp4-remuxer]: Unable to get silent frame for given audio codec; duplicating the current frame instead'
-            );
-            fillFrame = unit.subarray();
-          }
-          mdat.set(fillFrame, offset);
-          offset += fillFrame.byteLength;
-          outputSamples.push(
-            new Mp4Sample(true, AAC_SAMPLES_PER_FRAME, fillFrame.byteLength, 0)
-          );
-        }
       }
       mdat.set(unit, offset);
       const unitLen = unit.byteLength;

diff --git a/src/types/remuxer.ts b/src/types/remuxer.ts
@@ -8,6 +8,7 @@ import {
   UserdataSample,
 } from './demuxer';
 import type { SourceBufferName } from './buffer';
+import type { PlaylistLevelType } from './loader';
 
 export interface Remuxer {
   remux(
@@ -17,7 +18,8 @@ export interface Remuxer {
     textTrack: DemuxedUserdataTrack,
     timeOffset: number,
     accurateTimeOffset: boolean,
-    flush: boolean
+    flush: boolean,
+    playlistType: PlaylistLevelType
   ): RemuxerResult;
   resetInitSegment(
     initSegment: Uint8Array | undefined,