transmuxer.js 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * A stream-based mp2t to mp4 converter. This utility can be used to
  8. * deliver mp4s to a SourceBuffer on platforms that support native
  9. * Media Source Extensions.
  10. */
  11. 'use strict';
  12. var Stream = require('../utils/stream.js');
  13. var mp4 = require('./mp4-generator.js');
  14. var frameUtils = require('./frame-utils');
  15. var audioFrameUtils = require('./audio-frame-utils');
  16. var trackDecodeInfo = require('./track-decode-info');
  17. var m2ts = require('../m2ts/m2ts.js');
  18. var clock = require('../utils/clock');
  19. var AdtsStream = require('../codecs/adts.js');
  20. var H264Stream = require('../codecs/h264').H264Stream;
  21. var AacStream = require('../aac');
  22. var isLikelyAacData = require('../aac/utils').isLikelyAacData;
  23. var ONE_SECOND_IN_TS = require('../utils/clock').ONE_SECOND_IN_TS;
  24. var AUDIO_PROPERTIES = require('../constants/audio-properties.js');
  25. var VIDEO_PROPERTIES = require('../constants/video-properties.js');
  26. // object types
  27. var VideoSegmentStream, AudioSegmentStream, Transmuxer, CoalesceStream;
  28. var retriggerForStream = function(key, event) {
  29. event.stream = key;
  30. this.trigger('log', event);
  31. };
  32. var addPipelineLogRetriggers = function(transmuxer, pipeline) {
  33. var keys = Object.keys(pipeline);
  34. for (var i = 0; i < keys.length; i++) {
  35. var key = keys[i];
  36. // skip non-stream keys and headOfPipeline
  37. // which is just a duplicate
  38. if (key === 'headOfPipeline' || !pipeline[key].on) {
  39. continue;
  40. }
  41. pipeline[key].on('log', retriggerForStream.bind(transmuxer, key));
  42. }
  43. };
  44. /**
  45. * Compare two arrays (even typed) for same-ness
  46. */
  47. var arrayEquals = function(a, b) {
  48. var
  49. i;
  50. if (a.length !== b.length) {
  51. return false;
  52. }
  53. // compare the value of each element in the array
  54. for (i = 0; i < a.length; i++) {
  55. if (a[i] !== b[i]) {
  56. return false;
  57. }
  58. }
  59. return true;
  60. };
  61. var generateSegmentTimingInfo = function(
  62. baseMediaDecodeTime,
  63. startDts,
  64. startPts,
  65. endDts,
  66. endPts,
  67. prependedContentDuration
  68. ) {
  69. var
  70. ptsOffsetFromDts = startPts - startDts,
  71. decodeDuration = endDts - startDts,
  72. presentationDuration = endPts - startPts;
  73. // The PTS and DTS values are based on the actual stream times from the segment,
  74. // however, the player time values will reflect a start from the baseMediaDecodeTime.
  75. // In order to provide relevant values for the player times, base timing info on the
  76. // baseMediaDecodeTime and the DTS and PTS durations of the segment.
  77. return {
  78. start: {
  79. dts: baseMediaDecodeTime,
  80. pts: baseMediaDecodeTime + ptsOffsetFromDts
  81. },
  82. end: {
  83. dts: baseMediaDecodeTime + decodeDuration,
  84. pts: baseMediaDecodeTime + presentationDuration
  85. },
  86. prependedContentDuration: prependedContentDuration,
  87. baseMediaDecodeTime: baseMediaDecodeTime
  88. };
  89. };
  90. /**
  91. * Constructs a single-track, ISO BMFF media segment from AAC data
  92. * events. The output of this stream can be fed to a SourceBuffer
  93. * configured with a suitable initialization segment.
  94. * @param track {object} track metadata configuration
  95. * @param options {object} transmuxer options object
  96. * @param options.keepOriginalTimestamps {boolean} If true, keep the timestamps
  97. * in the source; false to adjust the first segment to start at 0.
  98. */
  99. AudioSegmentStream = function(track, options) {
  100. var
  101. adtsFrames = [],
  102. sequenceNumber,
  103. earliestAllowedDts = 0,
  104. audioAppendStartTs = 0,
  105. videoBaseMediaDecodeTime = Infinity;
  106. options = options || {};
  107. sequenceNumber = options.firstSequenceNumber || 0;
  108. AudioSegmentStream.prototype.init.call(this);
  109. this.push = function(data) {
  110. trackDecodeInfo.collectDtsInfo(track, data);
  111. if (track) {
  112. AUDIO_PROPERTIES.forEach(function(prop) {
  113. track[prop] = data[prop];
  114. });
  115. }
  116. // buffer audio data until end() is called
  117. adtsFrames.push(data);
  118. };
  119. this.setEarliestDts = function(earliestDts) {
  120. earliestAllowedDts = earliestDts;
  121. };
  122. this.setVideoBaseMediaDecodeTime = function(baseMediaDecodeTime) {
  123. videoBaseMediaDecodeTime = baseMediaDecodeTime;
  124. };
  125. this.setAudioAppendStart = function(timestamp) {
  126. audioAppendStartTs = timestamp;
  127. };
  128. this.flush = function() {
  129. var
  130. frames,
  131. moof,
  132. mdat,
  133. boxes,
  134. frameDuration,
  135. segmentDuration,
  136. videoClockCyclesOfSilencePrefixed;
  137. // return early if no audio data has been observed
  138. if (adtsFrames.length === 0) {
  139. this.trigger('done', 'AudioSegmentStream');
  140. return;
  141. }
  142. frames = audioFrameUtils.trimAdtsFramesByEarliestDts(
  143. adtsFrames, track, earliestAllowedDts);
  144. track.baseMediaDecodeTime = trackDecodeInfo.calculateTrackBaseMediaDecodeTime(
  145. track, options.keepOriginalTimestamps);
  146. // amount of audio filled but the value is in video clock rather than audio clock
  147. videoClockCyclesOfSilencePrefixed = audioFrameUtils.prefixWithSilence(
  148. track, frames, audioAppendStartTs, videoBaseMediaDecodeTime);
  149. // we have to build the index from byte locations to
  150. // samples (that is, adts frames) in the audio data
  151. track.samples = audioFrameUtils.generateSampleTable(frames);
  152. // concatenate the audio data to constuct the mdat
  153. mdat = mp4.mdat(audioFrameUtils.concatenateFrameData(frames));
  154. adtsFrames = [];
  155. moof = mp4.moof(sequenceNumber, [track]);
  156. boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
  157. // bump the sequence number for next time
  158. sequenceNumber++;
  159. boxes.set(moof);
  160. boxes.set(mdat, moof.byteLength);
  161. trackDecodeInfo.clearDtsInfo(track);
  162. frameDuration = Math.ceil(ONE_SECOND_IN_TS * 1024 / track.samplerate);
  163. // TODO this check was added to maintain backwards compatibility (particularly with
  164. // tests) on adding the timingInfo event. However, it seems unlikely that there's a
  165. // valid use-case where an init segment/data should be triggered without associated
  166. // frames. Leaving for now, but should be looked into.
  167. if (frames.length) {
  168. segmentDuration = frames.length * frameDuration;
  169. this.trigger(
  170. 'segmentTimingInfo',
  171. generateSegmentTimingInfo(
  172. // The audio track's baseMediaDecodeTime is in audio clock cycles, but the
  173. // frame info is in video clock cycles. Convert to match expectation of
  174. // listeners (that all timestamps will be based on video clock cycles).
  175. clock.audioTsToVideoTs(track.baseMediaDecodeTime, track.samplerate),
  176. // frame times are already in video clock, as is segment duration
  177. frames[0].dts,
  178. frames[0].pts,
  179. frames[0].dts + segmentDuration,
  180. frames[0].pts + segmentDuration,
  181. videoClockCyclesOfSilencePrefixed || 0
  182. )
  183. );
  184. this.trigger('timingInfo', {
  185. start: frames[0].pts,
  186. end: frames[0].pts + segmentDuration
  187. });
  188. }
  189. this.trigger('data', {track: track, boxes: boxes});
  190. this.trigger('done', 'AudioSegmentStream');
  191. };
  192. this.reset = function() {
  193. trackDecodeInfo.clearDtsInfo(track);
  194. adtsFrames = [];
  195. this.trigger('reset');
  196. };
  197. };
  198. AudioSegmentStream.prototype = new Stream();
  199. /**
  200. * Constructs a single-track, ISO BMFF media segment from H264 data
  201. * events. The output of this stream can be fed to a SourceBuffer
  202. * configured with a suitable initialization segment.
  203. * @param track {object} track metadata configuration
  204. * @param options {object} transmuxer options object
  205. * @param options.alignGopsAtEnd {boolean} If true, start from the end of the
  206. * gopsToAlignWith list when attempting to align gop pts
  207. * @param options.keepOriginalTimestamps {boolean} If true, keep the timestamps
  208. * in the source; false to adjust the first segment to start at 0.
  209. */
  210. VideoSegmentStream = function(track, options) {
  211. var
  212. sequenceNumber,
  213. nalUnits = [],
  214. gopsToAlignWith = [],
  215. config,
  216. pps;
  217. options = options || {};
  218. sequenceNumber = options.firstSequenceNumber || 0;
  219. VideoSegmentStream.prototype.init.call(this);
  220. delete track.minPTS;
  221. this.gopCache_ = [];
  222. /**
  223. * Constructs a ISO BMFF segment given H264 nalUnits
  224. * @param {Object} nalUnit A data event representing a nalUnit
  225. * @param {String} nalUnit.nalUnitType
  226. * @param {Object} nalUnit.config Properties for a mp4 track
  227. * @param {Uint8Array} nalUnit.data The nalUnit bytes
  228. * @see lib/codecs/h264.js
  229. **/
  230. this.push = function(nalUnit) {
  231. trackDecodeInfo.collectDtsInfo(track, nalUnit);
  232. // record the track config
  233. if (nalUnit.nalUnitType === 'seq_parameter_set_rbsp' && !config) {
  234. config = nalUnit.config;
  235. track.sps = [nalUnit.data];
  236. VIDEO_PROPERTIES.forEach(function(prop) {
  237. track[prop] = config[prop];
  238. }, this);
  239. }
  240. if (nalUnit.nalUnitType === 'pic_parameter_set_rbsp' &&
  241. !pps) {
  242. pps = nalUnit.data;
  243. track.pps = [nalUnit.data];
  244. }
  245. // buffer video until flush() is called
  246. nalUnits.push(nalUnit);
  247. };
  248. /**
  249. * Pass constructed ISO BMFF track and boxes on to the
  250. * next stream in the pipeline
  251. **/
  252. this.flush = function() {
  253. var
  254. frames,
  255. gopForFusion,
  256. gops,
  257. moof,
  258. mdat,
  259. boxes,
  260. prependedContentDuration = 0,
  261. firstGop,
  262. lastGop;
  263. // Throw away nalUnits at the start of the byte stream until
  264. // we find the first AUD
  265. while (nalUnits.length) {
  266. if (nalUnits[0].nalUnitType === 'access_unit_delimiter_rbsp') {
  267. break;
  268. }
  269. nalUnits.shift();
  270. }
  271. // Return early if no video data has been observed
  272. if (nalUnits.length === 0) {
  273. this.resetStream_();
  274. this.trigger('done', 'VideoSegmentStream');
  275. return;
  276. }
  277. // Organize the raw nal-units into arrays that represent
  278. // higher-level constructs such as frames and gops
  279. // (group-of-pictures)
  280. frames = frameUtils.groupNalsIntoFrames(nalUnits);
  281. gops = frameUtils.groupFramesIntoGops(frames);
  282. // If the first frame of this fragment is not a keyframe we have
  283. // a problem since MSE (on Chrome) requires a leading keyframe.
  284. //
  285. // We have two approaches to repairing this situation:
  286. // 1) GOP-FUSION:
  287. // This is where we keep track of the GOPS (group-of-pictures)
  288. // from previous fragments and attempt to find one that we can
  289. // prepend to the current fragment in order to create a valid
  290. // fragment.
  291. // 2) KEYFRAME-PULLING:
  292. // Here we search for the first keyframe in the fragment and
  293. // throw away all the frames between the start of the fragment
  294. // and that keyframe. We then extend the duration and pull the
  295. // PTS of the keyframe forward so that it covers the time range
  296. // of the frames that were disposed of.
  297. //
  298. // #1 is far prefereable over #2 which can cause "stuttering" but
  299. // requires more things to be just right.
  300. if (!gops[0][0].keyFrame) {
  301. // Search for a gop for fusion from our gopCache
  302. gopForFusion = this.getGopForFusion_(nalUnits[0], track);
  303. if (gopForFusion) {
  304. // in order to provide more accurate timing information about the segment, save
  305. // the number of seconds prepended to the original segment due to GOP fusion
  306. prependedContentDuration = gopForFusion.duration;
  307. gops.unshift(gopForFusion);
  308. // Adjust Gops' metadata to account for the inclusion of the
  309. // new gop at the beginning
  310. gops.byteLength += gopForFusion.byteLength;
  311. gops.nalCount += gopForFusion.nalCount;
  312. gops.pts = gopForFusion.pts;
  313. gops.dts = gopForFusion.dts;
  314. gops.duration += gopForFusion.duration;
  315. } else {
  316. // If we didn't find a candidate gop fall back to keyframe-pulling
  317. gops = frameUtils.extendFirstKeyFrame(gops);
  318. }
  319. }
  320. // Trim gops to align with gopsToAlignWith
  321. if (gopsToAlignWith.length) {
  322. var alignedGops;
  323. if (options.alignGopsAtEnd) {
  324. alignedGops = this.alignGopsAtEnd_(gops);
  325. } else {
  326. alignedGops = this.alignGopsAtStart_(gops);
  327. }
  328. if (!alignedGops) {
  329. // save all the nals in the last GOP into the gop cache
  330. this.gopCache_.unshift({
  331. gop: gops.pop(),
  332. pps: track.pps,
  333. sps: track.sps
  334. });
  335. // Keep a maximum of 6 GOPs in the cache
  336. this.gopCache_.length = Math.min(6, this.gopCache_.length);
  337. // Clear nalUnits
  338. nalUnits = [];
  339. // return early no gops can be aligned with desired gopsToAlignWith
  340. this.resetStream_();
  341. this.trigger('done', 'VideoSegmentStream');
  342. return;
  343. }
  344. // Some gops were trimmed. clear dts info so minSegmentDts and pts are correct
  345. // when recalculated before sending off to CoalesceStream
  346. trackDecodeInfo.clearDtsInfo(track);
  347. gops = alignedGops;
  348. }
  349. trackDecodeInfo.collectDtsInfo(track, gops);
  350. // First, we have to build the index from byte locations to
  351. // samples (that is, frames) in the video data
  352. track.samples = frameUtils.generateSampleTable(gops);
  353. // Concatenate the video data and construct the mdat
  354. mdat = mp4.mdat(frameUtils.concatenateNalData(gops));
  355. track.baseMediaDecodeTime = trackDecodeInfo.calculateTrackBaseMediaDecodeTime(
  356. track, options.keepOriginalTimestamps);
  357. this.trigger('processedGopsInfo', gops.map(function(gop) {
  358. return {
  359. pts: gop.pts,
  360. dts: gop.dts,
  361. byteLength: gop.byteLength
  362. };
  363. }));
  364. firstGop = gops[0];
  365. lastGop = gops[gops.length - 1];
  366. this.trigger(
  367. 'segmentTimingInfo',
  368. generateSegmentTimingInfo(
  369. track.baseMediaDecodeTime,
  370. firstGop.dts,
  371. firstGop.pts,
  372. lastGop.dts + lastGop.duration,
  373. lastGop.pts + lastGop.duration,
  374. prependedContentDuration));
  375. this.trigger('timingInfo', {
  376. start: gops[0].pts,
  377. end: gops[gops.length - 1].pts + gops[gops.length - 1].duration
  378. });
  379. // save all the nals in the last GOP into the gop cache
  380. this.gopCache_.unshift({
  381. gop: gops.pop(),
  382. pps: track.pps,
  383. sps: track.sps
  384. });
  385. // Keep a maximum of 6 GOPs in the cache
  386. this.gopCache_.length = Math.min(6, this.gopCache_.length);
  387. // Clear nalUnits
  388. nalUnits = [];
  389. this.trigger('baseMediaDecodeTime', track.baseMediaDecodeTime);
  390. this.trigger('timelineStartInfo', track.timelineStartInfo);
  391. moof = mp4.moof(sequenceNumber, [track]);
  392. // it would be great to allocate this array up front instead of
  393. // throwing away hundreds of media segment fragments
  394. boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
  395. // Bump the sequence number for next time
  396. sequenceNumber++;
  397. boxes.set(moof);
  398. boxes.set(mdat, moof.byteLength);
  399. this.trigger('data', {track: track, boxes: boxes});
  400. this.resetStream_();
  401. // Continue with the flush process now
  402. this.trigger('done', 'VideoSegmentStream');
  403. };
  404. this.reset = function() {
  405. this.resetStream_();
  406. nalUnits = [];
  407. this.gopCache_.length = 0;
  408. gopsToAlignWith.length = 0;
  409. this.trigger('reset');
  410. };
  411. this.resetStream_ = function() {
  412. trackDecodeInfo.clearDtsInfo(track);
  413. // reset config and pps because they may differ across segments
  414. // for instance, when we are rendition switching
  415. config = undefined;
  416. pps = undefined;
  417. };
  418. // Search for a candidate Gop for gop-fusion from the gop cache and
  419. // return it or return null if no good candidate was found
  420. this.getGopForFusion_ = function(nalUnit) {
  421. var
  422. halfSecond = 45000, // Half-a-second in a 90khz clock
  423. allowableOverlap = 10000, // About 3 frames @ 30fps
  424. nearestDistance = Infinity,
  425. dtsDistance,
  426. nearestGopObj,
  427. currentGop,
  428. currentGopObj,
  429. i;
  430. // Search for the GOP nearest to the beginning of this nal unit
  431. for (i = 0; i < this.gopCache_.length; i++) {
  432. currentGopObj = this.gopCache_[i];
  433. currentGop = currentGopObj.gop;
  434. // Reject Gops with different SPS or PPS
  435. if (!(track.pps && arrayEquals(track.pps[0], currentGopObj.pps[0])) ||
  436. !(track.sps && arrayEquals(track.sps[0], currentGopObj.sps[0]))) {
  437. continue;
  438. }
  439. // Reject Gops that would require a negative baseMediaDecodeTime
  440. if (currentGop.dts < track.timelineStartInfo.dts) {
  441. continue;
  442. }
  443. // The distance between the end of the gop and the start of the nalUnit
  444. dtsDistance = (nalUnit.dts - currentGop.dts) - currentGop.duration;
  445. // Only consider GOPS that start before the nal unit and end within
  446. // a half-second of the nal unit
  447. if (dtsDistance >= -allowableOverlap &&
  448. dtsDistance <= halfSecond) {
  449. // Always use the closest GOP we found if there is more than
  450. // one candidate
  451. if (!nearestGopObj ||
  452. nearestDistance > dtsDistance) {
  453. nearestGopObj = currentGopObj;
  454. nearestDistance = dtsDistance;
  455. }
  456. }
  457. }
  458. if (nearestGopObj) {
  459. return nearestGopObj.gop;
  460. }
  461. return null;
  462. };
  463. // trim gop list to the first gop found that has a matching pts with a gop in the list
  464. // of gopsToAlignWith starting from the START of the list
  465. this.alignGopsAtStart_ = function(gops) {
  466. var alignIndex, gopIndex, align, gop, byteLength, nalCount, duration, alignedGops;
  467. byteLength = gops.byteLength;
  468. nalCount = gops.nalCount;
  469. duration = gops.duration;
  470. alignIndex = gopIndex = 0;
  471. while (alignIndex < gopsToAlignWith.length && gopIndex < gops.length) {
  472. align = gopsToAlignWith[alignIndex];
  473. gop = gops[gopIndex];
  474. if (align.pts === gop.pts) {
  475. break;
  476. }
  477. if (gop.pts > align.pts) {
  478. // this current gop starts after the current gop we want to align on, so increment
  479. // align index
  480. alignIndex++;
  481. continue;
  482. }
  483. // current gop starts before the current gop we want to align on. so increment gop
  484. // index
  485. gopIndex++;
  486. byteLength -= gop.byteLength;
  487. nalCount -= gop.nalCount;
  488. duration -= gop.duration;
  489. }
  490. if (gopIndex === 0) {
  491. // no gops to trim
  492. return gops;
  493. }
  494. if (gopIndex === gops.length) {
  495. // all gops trimmed, skip appending all gops
  496. return null;
  497. }
  498. alignedGops = gops.slice(gopIndex);
  499. alignedGops.byteLength = byteLength;
  500. alignedGops.duration = duration;
  501. alignedGops.nalCount = nalCount;
  502. alignedGops.pts = alignedGops[0].pts;
  503. alignedGops.dts = alignedGops[0].dts;
  504. return alignedGops;
  505. };
  506. // trim gop list to the first gop found that has a matching pts with a gop in the list
  507. // of gopsToAlignWith starting from the END of the list
  508. this.alignGopsAtEnd_ = function(gops) {
  509. var alignIndex, gopIndex, align, gop, alignEndIndex, matchFound;
  510. alignIndex = gopsToAlignWith.length - 1;
  511. gopIndex = gops.length - 1;
  512. alignEndIndex = null;
  513. matchFound = false;
  514. while (alignIndex >= 0 && gopIndex >= 0) {
  515. align = gopsToAlignWith[alignIndex];
  516. gop = gops[gopIndex];
  517. if (align.pts === gop.pts) {
  518. matchFound = true;
  519. break;
  520. }
  521. if (align.pts > gop.pts) {
  522. alignIndex--;
  523. continue;
  524. }
  525. if (alignIndex === gopsToAlignWith.length - 1) {
  526. // gop.pts is greater than the last alignment candidate. If no match is found
  527. // by the end of this loop, we still want to append gops that come after this
  528. // point
  529. alignEndIndex = gopIndex;
  530. }
  531. gopIndex--;
  532. }
  533. if (!matchFound && alignEndIndex === null) {
  534. return null;
  535. }
  536. var trimIndex;
  537. if (matchFound) {
  538. trimIndex = gopIndex;
  539. } else {
  540. trimIndex = alignEndIndex;
  541. }
  542. if (trimIndex === 0) {
  543. return gops;
  544. }
  545. var alignedGops = gops.slice(trimIndex);
  546. var metadata = alignedGops.reduce(function(total, gop) {
  547. total.byteLength += gop.byteLength;
  548. total.duration += gop.duration;
  549. total.nalCount += gop.nalCount;
  550. return total;
  551. }, { byteLength: 0, duration: 0, nalCount: 0 });
  552. alignedGops.byteLength = metadata.byteLength;
  553. alignedGops.duration = metadata.duration;
  554. alignedGops.nalCount = metadata.nalCount;
  555. alignedGops.pts = alignedGops[0].pts;
  556. alignedGops.dts = alignedGops[0].dts;
  557. return alignedGops;
  558. };
  559. this.alignGopsWith = function(newGopsToAlignWith) {
  560. gopsToAlignWith = newGopsToAlignWith;
  561. };
  562. };
  563. VideoSegmentStream.prototype = new Stream();
  564. /**
  565. * A Stream that can combine multiple streams (ie. audio & video)
  566. * into a single output segment for MSE. Also supports audio-only
  567. * and video-only streams.
  568. * @param options {object} transmuxer options object
  569. * @param options.keepOriginalTimestamps {boolean} If true, keep the timestamps
  570. * in the source; false to adjust the first segment to start at media timeline start.
  571. */
  572. CoalesceStream = function(options, metadataStream) {
  573. // Number of Tracks per output segment
  574. // If greater than 1, we combine multiple
  575. // tracks into a single segment
  576. this.numberOfTracks = 0;
  577. this.metadataStream = metadataStream;
  578. options = options || {};
  579. if (typeof options.remux !== 'undefined') {
  580. this.remuxTracks = !!options.remux;
  581. } else {
  582. this.remuxTracks = true;
  583. }
  584. if (typeof options.keepOriginalTimestamps === 'boolean') {
  585. this.keepOriginalTimestamps = options.keepOriginalTimestamps;
  586. } else {
  587. this.keepOriginalTimestamps = false;
  588. }
  589. this.pendingTracks = [];
  590. this.videoTrack = null;
  591. this.pendingBoxes = [];
  592. this.pendingCaptions = [];
  593. this.pendingMetadata = [];
  594. this.pendingBytes = 0;
  595. this.emittedTracks = 0;
  596. CoalesceStream.prototype.init.call(this);
  597. // Take output from multiple
  598. this.push = function(output) {
  599. // buffer incoming captions until the associated video segment
  600. // finishes
  601. if (output.text) {
  602. return this.pendingCaptions.push(output);
  603. }
  604. // buffer incoming id3 tags until the final flush
  605. if (output.frames) {
  606. return this.pendingMetadata.push(output);
  607. }
  608. // Add this track to the list of pending tracks and store
  609. // important information required for the construction of
  610. // the final segment
  611. this.pendingTracks.push(output.track);
  612. this.pendingBytes += output.boxes.byteLength;
  613. // TODO: is there an issue for this against chrome?
  614. // We unshift audio and push video because
  615. // as of Chrome 75 when switching from
  616. // one init segment to another if the video
  617. // mdat does not appear after the audio mdat
  618. // only audio will play for the duration of our transmux.
  619. if (output.track.type === 'video') {
  620. this.videoTrack = output.track;
  621. this.pendingBoxes.push(output.boxes);
  622. }
  623. if (output.track.type === 'audio') {
  624. this.audioTrack = output.track;
  625. this.pendingBoxes.unshift(output.boxes);
  626. }
  627. };
  628. };
  629. CoalesceStream.prototype = new Stream();
  630. CoalesceStream.prototype.flush = function(flushSource) {
  631. var
  632. offset = 0,
  633. event = {
  634. captions: [],
  635. captionStreams: {},
  636. metadata: [],
  637. info: {}
  638. },
  639. caption,
  640. id3,
  641. initSegment,
  642. timelineStartPts = 0,
  643. i;
  644. if (this.pendingTracks.length < this.numberOfTracks) {
  645. if (flushSource !== 'VideoSegmentStream' &&
  646. flushSource !== 'AudioSegmentStream') {
  647. // Return because we haven't received a flush from a data-generating
  648. // portion of the segment (meaning that we have only recieved meta-data
  649. // or captions.)
  650. return;
  651. } else if (this.remuxTracks) {
  652. // Return until we have enough tracks from the pipeline to remux (if we
  653. // are remuxing audio and video into a single MP4)
  654. return;
  655. } else if (this.pendingTracks.length === 0) {
  656. // In the case where we receive a flush without any data having been
  657. // received we consider it an emitted track for the purposes of coalescing
  658. // `done` events.
  659. // We do this for the case where there is an audio and video track in the
  660. // segment but no audio data. (seen in several playlists with alternate
  661. // audio tracks and no audio present in the main TS segments.)
  662. this.emittedTracks++;
  663. if (this.emittedTracks >= this.numberOfTracks) {
  664. this.trigger('done');
  665. this.emittedTracks = 0;
  666. }
  667. return;
  668. }
  669. }
  670. if (this.videoTrack) {
  671. timelineStartPts = this.videoTrack.timelineStartInfo.pts;
  672. VIDEO_PROPERTIES.forEach(function(prop) {
  673. event.info[prop] = this.videoTrack[prop];
  674. }, this);
  675. } else if (this.audioTrack) {
  676. timelineStartPts = this.audioTrack.timelineStartInfo.pts;
  677. AUDIO_PROPERTIES.forEach(function(prop) {
  678. event.info[prop] = this.audioTrack[prop];
  679. }, this);
  680. }
  681. if (this.videoTrack || this.audioTrack) {
  682. if (this.pendingTracks.length === 1) {
  683. event.type = this.pendingTracks[0].type;
  684. } else {
  685. event.type = 'combined';
  686. }
  687. this.emittedTracks += this.pendingTracks.length;
  688. initSegment = mp4.initSegment(this.pendingTracks);
  689. // Create a new typed array to hold the init segment
  690. event.initSegment = new Uint8Array(initSegment.byteLength);
  691. // Create an init segment containing a moov
  692. // and track definitions
  693. event.initSegment.set(initSegment);
  694. // Create a new typed array to hold the moof+mdats
  695. event.data = new Uint8Array(this.pendingBytes);
  696. // Append each moof+mdat (one per track) together
  697. for (i = 0; i < this.pendingBoxes.length; i++) {
  698. event.data.set(this.pendingBoxes[i], offset);
  699. offset += this.pendingBoxes[i].byteLength;
  700. }
  701. // Translate caption PTS times into second offsets to match the
  702. // video timeline for the segment, and add track info
  703. for (i = 0; i < this.pendingCaptions.length; i++) {
  704. caption = this.pendingCaptions[i];
  705. caption.startTime = clock.metadataTsToSeconds(
  706. caption.startPts, timelineStartPts, this.keepOriginalTimestamps);
  707. caption.endTime = clock.metadataTsToSeconds(
  708. caption.endPts, timelineStartPts, this.keepOriginalTimestamps);
  709. event.captionStreams[caption.stream] = true;
  710. event.captions.push(caption);
  711. }
  712. // Translate ID3 frame PTS times into second offsets to match the
  713. // video timeline for the segment
  714. for (i = 0; i < this.pendingMetadata.length; i++) {
  715. id3 = this.pendingMetadata[i];
  716. id3.cueTime = clock.metadataTsToSeconds(
  717. id3.pts, timelineStartPts, this.keepOriginalTimestamps);
  718. event.metadata.push(id3);
  719. }
  720. // We add this to every single emitted segment even though we only need
  721. // it for the first
  722. event.metadata.dispatchType = this.metadataStream.dispatchType;
  723. // Reset stream state
  724. this.pendingTracks.length = 0;
  725. this.videoTrack = null;
  726. this.pendingBoxes.length = 0;
  727. this.pendingCaptions.length = 0;
  728. this.pendingBytes = 0;
  729. this.pendingMetadata.length = 0;
  730. // Emit the built segment
  731. // We include captions and ID3 tags for backwards compatibility,
  732. // ideally we should send only video and audio in the data event
  733. this.trigger('data', event);
  734. // Emit each caption to the outside world
  735. // Ideally, this would happen immediately on parsing captions,
  736. // but we need to ensure that video data is sent back first
  737. // so that caption timing can be adjusted to match video timing
  738. for (i = 0; i < event.captions.length; i++) {
  739. caption = event.captions[i];
  740. this.trigger('caption', caption);
  741. }
  742. // Emit each id3 tag to the outside world
  743. // Ideally, this would happen immediately on parsing the tag,
  744. // but we need to ensure that video data is sent back first
  745. // so that ID3 frame timing can be adjusted to match video timing
  746. for (i = 0; i < event.metadata.length; i++) {
  747. id3 = event.metadata[i];
  748. this.trigger('id3Frame', id3);
  749. }
  750. }
  751. // Only emit `done` if all tracks have been flushed and emitted
  752. if (this.emittedTracks >= this.numberOfTracks) {
  753. this.trigger('done');
  754. this.emittedTracks = 0;
  755. }
  756. };
  757. CoalesceStream.prototype.setRemux = function(val) {
  758. this.remuxTracks = val;
  759. };
  760. /**
  761. * A Stream that expects MP2T binary data as input and produces
  762. * corresponding media segments, suitable for use with Media Source
  763. * Extension (MSE) implementations that support the ISO BMFF byte
  764. * stream format, like Chrome.
  765. */
  766. Transmuxer = function(options) {
  767. var
  768. self = this,
  769. hasFlushed = true,
  770. videoTrack,
  771. audioTrack;
  772. Transmuxer.prototype.init.call(this);
  773. options = options || {};
  774. this.baseMediaDecodeTime = options.baseMediaDecodeTime || 0;
  775. this.transmuxPipeline_ = {};
  776. this.setupAacPipeline = function() {
  777. var pipeline = {};
  778. this.transmuxPipeline_ = pipeline;
  779. pipeline.type = 'aac';
  780. pipeline.metadataStream = new m2ts.MetadataStream();
  781. // set up the parsing pipeline
  782. pipeline.aacStream = new AacStream();
  783. pipeline.audioTimestampRolloverStream = new m2ts.TimestampRolloverStream('audio');
  784. pipeline.timedMetadataTimestampRolloverStream = new m2ts.TimestampRolloverStream('timed-metadata');
  785. pipeline.adtsStream = new AdtsStream();
  786. pipeline.coalesceStream = new CoalesceStream(options, pipeline.metadataStream);
  787. pipeline.headOfPipeline = pipeline.aacStream;
  788. pipeline.aacStream
  789. .pipe(pipeline.audioTimestampRolloverStream)
  790. .pipe(pipeline.adtsStream);
  791. pipeline.aacStream
  792. .pipe(pipeline.timedMetadataTimestampRolloverStream)
  793. .pipe(pipeline.metadataStream)
  794. .pipe(pipeline.coalesceStream);
  795. pipeline.metadataStream.on('timestamp', function(frame) {
  796. pipeline.aacStream.setTimestamp(frame.timeStamp);
  797. });
  798. pipeline.aacStream.on('data', function(data) {
  799. if ((data.type !== 'timed-metadata' && data.type !== 'audio') || pipeline.audioSegmentStream) {
  800. return;
  801. }
  802. audioTrack = audioTrack || {
  803. timelineStartInfo: {
  804. baseMediaDecodeTime: self.baseMediaDecodeTime
  805. },
  806. codec: 'adts',
  807. type: 'audio'
  808. };
  809. // hook up the audio segment stream to the first track with aac data
  810. pipeline.coalesceStream.numberOfTracks++;
  811. pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack, options);
  812. pipeline.audioSegmentStream.on('log', self.getLogTrigger_('audioSegmentStream'));
  813. pipeline.audioSegmentStream.on('timingInfo',
  814. self.trigger.bind(self, 'audioTimingInfo'));
  815. // Set up the final part of the audio pipeline
  816. pipeline.adtsStream
  817. .pipe(pipeline.audioSegmentStream)
  818. .pipe(pipeline.coalesceStream);
  819. // emit pmt info
  820. self.trigger('trackinfo', {
  821. hasAudio: !!audioTrack,
  822. hasVideo: !!videoTrack
  823. });
  824. });
  825. // Re-emit any data coming from the coalesce stream to the outside world
  826. pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data'));
  827. // Let the consumer know we have finished flushing the entire pipeline
  828. pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
  829. addPipelineLogRetriggers(this, pipeline);
  830. };
  831. this.setupTsPipeline = function() {
  832. var pipeline = {};
  833. this.transmuxPipeline_ = pipeline;
  834. pipeline.type = 'ts';
  835. pipeline.metadataStream = new m2ts.MetadataStream();
  836. // set up the parsing pipeline
  837. pipeline.packetStream = new m2ts.TransportPacketStream();
  838. pipeline.parseStream = new m2ts.TransportParseStream();
  839. pipeline.elementaryStream = new m2ts.ElementaryStream();
  840. pipeline.timestampRolloverStream = new m2ts.TimestampRolloverStream();
  841. pipeline.adtsStream = new AdtsStream();
  842. pipeline.h264Stream = new H264Stream();
  843. pipeline.captionStream = new m2ts.CaptionStream(options);
  844. pipeline.coalesceStream = new CoalesceStream(options, pipeline.metadataStream);
  845. pipeline.headOfPipeline = pipeline.packetStream;
  846. // disassemble MPEG2-TS packets into elementary streams
  847. pipeline.packetStream
  848. .pipe(pipeline.parseStream)
  849. .pipe(pipeline.elementaryStream)
  850. .pipe(pipeline.timestampRolloverStream);
  851. // !!THIS ORDER IS IMPORTANT!!
  852. // demux the streams
  853. pipeline.timestampRolloverStream
  854. .pipe(pipeline.h264Stream);
  855. pipeline.timestampRolloverStream
  856. .pipe(pipeline.adtsStream);
  857. pipeline.timestampRolloverStream
  858. .pipe(pipeline.metadataStream)
  859. .pipe(pipeline.coalesceStream);
  860. // Hook up CEA-608/708 caption stream
  861. pipeline.h264Stream.pipe(pipeline.captionStream)
  862. .pipe(pipeline.coalesceStream);
  863. pipeline.elementaryStream.on('data', function(data) {
  864. var i;
  865. if (data.type === 'metadata') {
  866. i = data.tracks.length;
  867. // scan the tracks listed in the metadata
  868. while (i--) {
  869. if (!videoTrack && data.tracks[i].type === 'video') {
  870. videoTrack = data.tracks[i];
  871. videoTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
  872. } else if (!audioTrack && data.tracks[i].type === 'audio') {
  873. audioTrack = data.tracks[i];
  874. audioTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
  875. }
  876. }
  877. // hook up the video segment stream to the first track with h264 data
  878. if (videoTrack && !pipeline.videoSegmentStream) {
  879. pipeline.coalesceStream.numberOfTracks++;
  880. pipeline.videoSegmentStream = new VideoSegmentStream(videoTrack, options);
  881. pipeline.videoSegmentStream.on('log', self.getLogTrigger_('videoSegmentStream'));
  882. pipeline.videoSegmentStream.on('timelineStartInfo', function(timelineStartInfo) {
  883. // When video emits timelineStartInfo data after a flush, we forward that
  884. // info to the AudioSegmentStream, if it exists, because video timeline
  885. // data takes precedence. Do not do this if keepOriginalTimestamps is set,
  886. // because this is a particularly subtle form of timestamp alteration.
  887. if (audioTrack && !options.keepOriginalTimestamps) {
  888. audioTrack.timelineStartInfo = timelineStartInfo;
  889. // On the first segment we trim AAC frames that exist before the
  890. // very earliest DTS we have seen in video because Chrome will
  891. // interpret any video track with a baseMediaDecodeTime that is
  892. // non-zero as a gap.
  893. pipeline.audioSegmentStream.setEarliestDts(timelineStartInfo.dts - self.baseMediaDecodeTime);
  894. }
  895. });
  896. pipeline.videoSegmentStream.on('processedGopsInfo',
  897. self.trigger.bind(self, 'gopInfo'));
  898. pipeline.videoSegmentStream.on('segmentTimingInfo',
  899. self.trigger.bind(self, 'videoSegmentTimingInfo'));
  900. pipeline.videoSegmentStream.on('baseMediaDecodeTime', function(baseMediaDecodeTime) {
  901. if (audioTrack) {
  902. pipeline.audioSegmentStream.setVideoBaseMediaDecodeTime(baseMediaDecodeTime);
  903. }
  904. });
  905. pipeline.videoSegmentStream.on('timingInfo',
  906. self.trigger.bind(self, 'videoTimingInfo'));
  907. // Set up the final part of the video pipeline
  908. pipeline.h264Stream
  909. .pipe(pipeline.videoSegmentStream)
  910. .pipe(pipeline.coalesceStream);
  911. }
  912. if (audioTrack && !pipeline.audioSegmentStream) {
  913. // hook up the audio segment stream to the first track with aac data
  914. pipeline.coalesceStream.numberOfTracks++;
  915. pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack, options);
  916. pipeline.audioSegmentStream.on('log', self.getLogTrigger_('audioSegmentStream'));
  917. pipeline.audioSegmentStream.on('timingInfo',
  918. self.trigger.bind(self, 'audioTimingInfo'));
  919. pipeline.audioSegmentStream.on('segmentTimingInfo',
  920. self.trigger.bind(self, 'audioSegmentTimingInfo'));
  921. // Set up the final part of the audio pipeline
  922. pipeline.adtsStream
  923. .pipe(pipeline.audioSegmentStream)
  924. .pipe(pipeline.coalesceStream);
  925. }
  926. // emit pmt info
  927. self.trigger('trackinfo', {
  928. hasAudio: !!audioTrack,
  929. hasVideo: !!videoTrack
  930. });
  931. }
  932. });
  933. // Re-emit any data coming from the coalesce stream to the outside world
  934. pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data'));
  935. pipeline.coalesceStream.on('id3Frame', function(id3Frame) {
  936. id3Frame.dispatchType = pipeline.metadataStream.dispatchType;
  937. self.trigger('id3Frame', id3Frame);
  938. });
  939. pipeline.coalesceStream.on('caption', this.trigger.bind(this, 'caption'));
  940. // Let the consumer know we have finished flushing the entire pipeline
  941. pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
  942. addPipelineLogRetriggers(this, pipeline);
  943. };
  944. // hook up the segment streams once track metadata is delivered
  945. this.setBaseMediaDecodeTime = function(baseMediaDecodeTime) {
  946. var pipeline = this.transmuxPipeline_;
  947. if (!options.keepOriginalTimestamps) {
  948. this.baseMediaDecodeTime = baseMediaDecodeTime;
  949. }
  950. if (audioTrack) {
  951. audioTrack.timelineStartInfo.dts = undefined;
  952. audioTrack.timelineStartInfo.pts = undefined;
  953. trackDecodeInfo.clearDtsInfo(audioTrack);
  954. if (pipeline.audioTimestampRolloverStream) {
  955. pipeline.audioTimestampRolloverStream.discontinuity();
  956. }
  957. }
  958. if (videoTrack) {
  959. if (pipeline.videoSegmentStream) {
  960. pipeline.videoSegmentStream.gopCache_ = [];
  961. }
  962. videoTrack.timelineStartInfo.dts = undefined;
  963. videoTrack.timelineStartInfo.pts = undefined;
  964. trackDecodeInfo.clearDtsInfo(videoTrack);
  965. pipeline.captionStream.reset();
  966. }
  967. if (pipeline.timestampRolloverStream) {
  968. pipeline.timestampRolloverStream.discontinuity();
  969. }
  970. };
  971. this.setAudioAppendStart = function(timestamp) {
  972. if (audioTrack) {
  973. this.transmuxPipeline_.audioSegmentStream.setAudioAppendStart(timestamp);
  974. }
  975. };
  976. this.setRemux = function(val) {
  977. var pipeline = this.transmuxPipeline_;
  978. options.remux = val;
  979. if (pipeline && pipeline.coalesceStream) {
  980. pipeline.coalesceStream.setRemux(val);
  981. }
  982. };
  983. this.alignGopsWith = function(gopsToAlignWith) {
  984. if (videoTrack && this.transmuxPipeline_.videoSegmentStream) {
  985. this.transmuxPipeline_.videoSegmentStream.alignGopsWith(gopsToAlignWith);
  986. }
  987. };
  988. this.getLogTrigger_ = function(key) {
  989. var self = this;
  990. return function(event) {
  991. event.stream = key;
  992. self.trigger('log', event);
  993. };
  994. };
  995. // feed incoming data to the front of the parsing pipeline
  996. this.push = function(data) {
  997. if (hasFlushed) {
  998. var isAac = isLikelyAacData(data);
  999. if (isAac && this.transmuxPipeline_.type !== 'aac') {
  1000. this.setupAacPipeline();
  1001. } else if (!isAac && this.transmuxPipeline_.type !== 'ts') {
  1002. this.setupTsPipeline();
  1003. }
  1004. hasFlushed = false;
  1005. }
  1006. this.transmuxPipeline_.headOfPipeline.push(data);
  1007. };
  1008. // flush any buffered data
  1009. this.flush = function() {
  1010. hasFlushed = true;
  1011. // Start at the top of the pipeline and flush all pending work
  1012. this.transmuxPipeline_.headOfPipeline.flush();
  1013. };
  1014. this.endTimeline = function() {
  1015. this.transmuxPipeline_.headOfPipeline.endTimeline();
  1016. };
  1017. this.reset = function() {
  1018. if (this.transmuxPipeline_.headOfPipeline) {
  1019. this.transmuxPipeline_.headOfPipeline.reset();
  1020. }
  1021. };
  1022. // Caption data has to be reset when seeking outside buffered range
  1023. this.resetCaptions = function() {
  1024. if (this.transmuxPipeline_.captionStream) {
  1025. this.transmuxPipeline_.captionStream.reset();
  1026. }
  1027. };
  1028. };
  1029. Transmuxer.prototype = new Stream();
  1030. module.exports = {
  1031. Transmuxer: Transmuxer,
  1032. VideoSegmentStream: VideoSegmentStream,
  1033. AudioSegmentStream: AudioSegmentStream,
  1034. AUDIO_PROPERTIES: AUDIO_PROPERTIES,
  1035. VIDEO_PROPERTIES: VIDEO_PROPERTIES,
  1036. // exported for testing
  1037. generateSegmentTimingInfo: generateSegmentTimingInfo
  1038. };