transmuxer.js 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * A stream-based mp2t to mp4 converter. This utility can be used to
  8. * deliver mp4s to a SourceBuffer on platforms that support native
  9. * Media Source Extensions.
  10. */
  11. 'use strict';
  12. var Stream = require('../utils/stream.js');
  13. var mp4 = require('./mp4-generator.js');
  14. var frameUtils = require('./frame-utils');
  15. var audioFrameUtils = require('./audio-frame-utils');
  16. var trackDecodeInfo = require('./track-decode-info');
  17. var m2ts = require('../m2ts/m2ts.js');
  18. var clock = require('../utils/clock');
  19. var AdtsStream = require('../codecs/adts.js');
  20. var H264Stream = require('../codecs/h264').H264Stream;
  21. var AacStream = require('../aac');
  22. var isLikelyAacData = require('../aac/utils').isLikelyAacData;
  23. var ONE_SECOND_IN_TS = require('../utils/clock').ONE_SECOND_IN_TS;
  24. var AUDIO_PROPERTIES = require('../constants/audio-properties.js');
  25. var VIDEO_PROPERTIES = require('../constants/video-properties.js'); // object types
  26. var _VideoSegmentStream, _AudioSegmentStream, _Transmuxer, _CoalesceStream;
  27. var retriggerForStream = function retriggerForStream(key, event) {
  28. event.stream = key;
  29. this.trigger('log', event);
  30. };
  31. var addPipelineLogRetriggers = function addPipelineLogRetriggers(transmuxer, pipeline) {
  32. var keys = Object.keys(pipeline);
  33. for (var i = 0; i < keys.length; i++) {
  34. var key = keys[i]; // skip non-stream keys and headOfPipeline
  35. // which is just a duplicate
  36. if (key === 'headOfPipeline' || !pipeline[key].on) {
  37. continue;
  38. }
  39. pipeline[key].on('log', retriggerForStream.bind(transmuxer, key));
  40. }
  41. };
  42. /**
  43. * Compare two arrays (even typed) for same-ness
  44. */
  45. var arrayEquals = function arrayEquals(a, b) {
  46. var i;
  47. if (a.length !== b.length) {
  48. return false;
  49. } // compare the value of each element in the array
  50. for (i = 0; i < a.length; i++) {
  51. if (a[i] !== b[i]) {
  52. return false;
  53. }
  54. }
  55. return true;
  56. };
  57. var generateSegmentTimingInfo = function generateSegmentTimingInfo(baseMediaDecodeTime, startDts, startPts, endDts, endPts, prependedContentDuration) {
  58. var ptsOffsetFromDts = startPts - startDts,
  59. decodeDuration = endDts - startDts,
  60. presentationDuration = endPts - startPts; // The PTS and DTS values are based on the actual stream times from the segment,
  61. // however, the player time values will reflect a start from the baseMediaDecodeTime.
  62. // In order to provide relevant values for the player times, base timing info on the
  63. // baseMediaDecodeTime and the DTS and PTS durations of the segment.
  64. return {
  65. start: {
  66. dts: baseMediaDecodeTime,
  67. pts: baseMediaDecodeTime + ptsOffsetFromDts
  68. },
  69. end: {
  70. dts: baseMediaDecodeTime + decodeDuration,
  71. pts: baseMediaDecodeTime + presentationDuration
  72. },
  73. prependedContentDuration: prependedContentDuration,
  74. baseMediaDecodeTime: baseMediaDecodeTime
  75. };
  76. };
  77. /**
  78. * Constructs a single-track, ISO BMFF media segment from AAC data
  79. * events. The output of this stream can be fed to a SourceBuffer
  80. * configured with a suitable initialization segment.
  81. * @param track {object} track metadata configuration
  82. * @param options {object} transmuxer options object
  83. * @param options.keepOriginalTimestamps {boolean} If true, keep the timestamps
  84. * in the source; false to adjust the first segment to start at 0.
  85. */
  86. _AudioSegmentStream = function AudioSegmentStream(track, options) {
  87. var adtsFrames = [],
  88. sequenceNumber,
  89. earliestAllowedDts = 0,
  90. audioAppendStartTs = 0,
  91. videoBaseMediaDecodeTime = Infinity;
  92. options = options || {};
  93. sequenceNumber = options.firstSequenceNumber || 0;
  94. _AudioSegmentStream.prototype.init.call(this);
  95. this.push = function (data) {
  96. trackDecodeInfo.collectDtsInfo(track, data);
  97. if (track) {
  98. AUDIO_PROPERTIES.forEach(function (prop) {
  99. track[prop] = data[prop];
  100. });
  101. } // buffer audio data until end() is called
  102. adtsFrames.push(data);
  103. };
  104. this.setEarliestDts = function (earliestDts) {
  105. earliestAllowedDts = earliestDts;
  106. };
  107. this.setVideoBaseMediaDecodeTime = function (baseMediaDecodeTime) {
  108. videoBaseMediaDecodeTime = baseMediaDecodeTime;
  109. };
  110. this.setAudioAppendStart = function (timestamp) {
  111. audioAppendStartTs = timestamp;
  112. };
  113. this.flush = function () {
  114. var frames, moof, mdat, boxes, frameDuration, segmentDuration, videoClockCyclesOfSilencePrefixed; // return early if no audio data has been observed
  115. if (adtsFrames.length === 0) {
  116. this.trigger('done', 'AudioSegmentStream');
  117. return;
  118. }
  119. frames = audioFrameUtils.trimAdtsFramesByEarliestDts(adtsFrames, track, earliestAllowedDts);
  120. track.baseMediaDecodeTime = trackDecodeInfo.calculateTrackBaseMediaDecodeTime(track, options.keepOriginalTimestamps); // amount of audio filled but the value is in video clock rather than audio clock
  121. videoClockCyclesOfSilencePrefixed = audioFrameUtils.prefixWithSilence(track, frames, audioAppendStartTs, videoBaseMediaDecodeTime); // we have to build the index from byte locations to
  122. // samples (that is, adts frames) in the audio data
  123. track.samples = audioFrameUtils.generateSampleTable(frames); // concatenate the audio data to constuct the mdat
  124. mdat = mp4.mdat(audioFrameUtils.concatenateFrameData(frames));
  125. adtsFrames = [];
  126. moof = mp4.moof(sequenceNumber, [track]);
  127. boxes = new Uint8Array(moof.byteLength + mdat.byteLength); // bump the sequence number for next time
  128. sequenceNumber++;
  129. boxes.set(moof);
  130. boxes.set(mdat, moof.byteLength);
  131. trackDecodeInfo.clearDtsInfo(track);
  132. frameDuration = Math.ceil(ONE_SECOND_IN_TS * 1024 / track.samplerate); // TODO this check was added to maintain backwards compatibility (particularly with
  133. // tests) on adding the timingInfo event. However, it seems unlikely that there's a
  134. // valid use-case where an init segment/data should be triggered without associated
  135. // frames. Leaving for now, but should be looked into.
  136. if (frames.length) {
  137. segmentDuration = frames.length * frameDuration;
  138. this.trigger('segmentTimingInfo', generateSegmentTimingInfo( // The audio track's baseMediaDecodeTime is in audio clock cycles, but the
  139. // frame info is in video clock cycles. Convert to match expectation of
  140. // listeners (that all timestamps will be based on video clock cycles).
  141. clock.audioTsToVideoTs(track.baseMediaDecodeTime, track.samplerate), // frame times are already in video clock, as is segment duration
  142. frames[0].dts, frames[0].pts, frames[0].dts + segmentDuration, frames[0].pts + segmentDuration, videoClockCyclesOfSilencePrefixed || 0));
  143. this.trigger('timingInfo', {
  144. start: frames[0].pts,
  145. end: frames[0].pts + segmentDuration
  146. });
  147. }
  148. this.trigger('data', {
  149. track: track,
  150. boxes: boxes
  151. });
  152. this.trigger('done', 'AudioSegmentStream');
  153. };
  154. this.reset = function () {
  155. trackDecodeInfo.clearDtsInfo(track);
  156. adtsFrames = [];
  157. this.trigger('reset');
  158. };
  159. };
  160. _AudioSegmentStream.prototype = new Stream();
  161. /**
  162. * Constructs a single-track, ISO BMFF media segment from H264 data
  163. * events. The output of this stream can be fed to a SourceBuffer
  164. * configured with a suitable initialization segment.
  165. * @param track {object} track metadata configuration
  166. * @param options {object} transmuxer options object
  167. * @param options.alignGopsAtEnd {boolean} If true, start from the end of the
  168. * gopsToAlignWith list when attempting to align gop pts
  169. * @param options.keepOriginalTimestamps {boolean} If true, keep the timestamps
  170. * in the source; false to adjust the first segment to start at 0.
  171. */
  172. _VideoSegmentStream = function VideoSegmentStream(track, options) {
  173. var sequenceNumber,
  174. nalUnits = [],
  175. gopsToAlignWith = [],
  176. config,
  177. pps;
  178. options = options || {};
  179. sequenceNumber = options.firstSequenceNumber || 0;
  180. _VideoSegmentStream.prototype.init.call(this);
  181. delete track.minPTS;
  182. this.gopCache_ = [];
  183. /**
  184. * Constructs a ISO BMFF segment given H264 nalUnits
  185. * @param {Object} nalUnit A data event representing a nalUnit
  186. * @param {String} nalUnit.nalUnitType
  187. * @param {Object} nalUnit.config Properties for a mp4 track
  188. * @param {Uint8Array} nalUnit.data The nalUnit bytes
  189. * @see lib/codecs/h264.js
  190. **/
  191. this.push = function (nalUnit) {
  192. trackDecodeInfo.collectDtsInfo(track, nalUnit); // record the track config
  193. if (nalUnit.nalUnitType === 'seq_parameter_set_rbsp' && !config) {
  194. config = nalUnit.config;
  195. track.sps = [nalUnit.data];
  196. VIDEO_PROPERTIES.forEach(function (prop) {
  197. track[prop] = config[prop];
  198. }, this);
  199. }
  200. if (nalUnit.nalUnitType === 'pic_parameter_set_rbsp' && !pps) {
  201. pps = nalUnit.data;
  202. track.pps = [nalUnit.data];
  203. } // buffer video until flush() is called
  204. nalUnits.push(nalUnit);
  205. };
  206. /**
  207. * Pass constructed ISO BMFF track and boxes on to the
  208. * next stream in the pipeline
  209. **/
  210. this.flush = function () {
  211. var frames,
  212. gopForFusion,
  213. gops,
  214. moof,
  215. mdat,
  216. boxes,
  217. prependedContentDuration = 0,
  218. firstGop,
  219. lastGop; // Throw away nalUnits at the start of the byte stream until
  220. // we find the first AUD
  221. while (nalUnits.length) {
  222. if (nalUnits[0].nalUnitType === 'access_unit_delimiter_rbsp') {
  223. break;
  224. }
  225. nalUnits.shift();
  226. } // Return early if no video data has been observed
  227. if (nalUnits.length === 0) {
  228. this.resetStream_();
  229. this.trigger('done', 'VideoSegmentStream');
  230. return;
  231. } // Organize the raw nal-units into arrays that represent
  232. // higher-level constructs such as frames and gops
  233. // (group-of-pictures)
  234. frames = frameUtils.groupNalsIntoFrames(nalUnits);
  235. gops = frameUtils.groupFramesIntoGops(frames); // If the first frame of this fragment is not a keyframe we have
  236. // a problem since MSE (on Chrome) requires a leading keyframe.
  237. //
  238. // We have two approaches to repairing this situation:
  239. // 1) GOP-FUSION:
  240. // This is where we keep track of the GOPS (group-of-pictures)
  241. // from previous fragments and attempt to find one that we can
  242. // prepend to the current fragment in order to create a valid
  243. // fragment.
  244. // 2) KEYFRAME-PULLING:
  245. // Here we search for the first keyframe in the fragment and
  246. // throw away all the frames between the start of the fragment
  247. // and that keyframe. We then extend the duration and pull the
  248. // PTS of the keyframe forward so that it covers the time range
  249. // of the frames that were disposed of.
  250. //
  251. // #1 is far prefereable over #2 which can cause "stuttering" but
  252. // requires more things to be just right.
  253. if (!gops[0][0].keyFrame) {
  254. // Search for a gop for fusion from our gopCache
  255. gopForFusion = this.getGopForFusion_(nalUnits[0], track);
  256. if (gopForFusion) {
  257. // in order to provide more accurate timing information about the segment, save
  258. // the number of seconds prepended to the original segment due to GOP fusion
  259. prependedContentDuration = gopForFusion.duration;
  260. gops.unshift(gopForFusion); // Adjust Gops' metadata to account for the inclusion of the
  261. // new gop at the beginning
  262. gops.byteLength += gopForFusion.byteLength;
  263. gops.nalCount += gopForFusion.nalCount;
  264. gops.pts = gopForFusion.pts;
  265. gops.dts = gopForFusion.dts;
  266. gops.duration += gopForFusion.duration;
  267. } else {
  268. // If we didn't find a candidate gop fall back to keyframe-pulling
  269. gops = frameUtils.extendFirstKeyFrame(gops);
  270. }
  271. } // Trim gops to align with gopsToAlignWith
  272. if (gopsToAlignWith.length) {
  273. var alignedGops;
  274. if (options.alignGopsAtEnd) {
  275. alignedGops = this.alignGopsAtEnd_(gops);
  276. } else {
  277. alignedGops = this.alignGopsAtStart_(gops);
  278. }
  279. if (!alignedGops) {
  280. // save all the nals in the last GOP into the gop cache
  281. this.gopCache_.unshift({
  282. gop: gops.pop(),
  283. pps: track.pps,
  284. sps: track.sps
  285. }); // Keep a maximum of 6 GOPs in the cache
  286. this.gopCache_.length = Math.min(6, this.gopCache_.length); // Clear nalUnits
  287. nalUnits = []; // return early no gops can be aligned with desired gopsToAlignWith
  288. this.resetStream_();
  289. this.trigger('done', 'VideoSegmentStream');
  290. return;
  291. } // Some gops were trimmed. clear dts info so minSegmentDts and pts are correct
  292. // when recalculated before sending off to CoalesceStream
  293. trackDecodeInfo.clearDtsInfo(track);
  294. gops = alignedGops;
  295. }
  296. trackDecodeInfo.collectDtsInfo(track, gops); // First, we have to build the index from byte locations to
  297. // samples (that is, frames) in the video data
  298. track.samples = frameUtils.generateSampleTable(gops); // Concatenate the video data and construct the mdat
  299. mdat = mp4.mdat(frameUtils.concatenateNalData(gops));
  300. track.baseMediaDecodeTime = trackDecodeInfo.calculateTrackBaseMediaDecodeTime(track, options.keepOriginalTimestamps);
  301. this.trigger('processedGopsInfo', gops.map(function (gop) {
  302. return {
  303. pts: gop.pts,
  304. dts: gop.dts,
  305. byteLength: gop.byteLength
  306. };
  307. }));
  308. firstGop = gops[0];
  309. lastGop = gops[gops.length - 1];
  310. this.trigger('segmentTimingInfo', generateSegmentTimingInfo(track.baseMediaDecodeTime, firstGop.dts, firstGop.pts, lastGop.dts + lastGop.duration, lastGop.pts + lastGop.duration, prependedContentDuration));
  311. this.trigger('timingInfo', {
  312. start: gops[0].pts,
  313. end: gops[gops.length - 1].pts + gops[gops.length - 1].duration
  314. }); // save all the nals in the last GOP into the gop cache
  315. this.gopCache_.unshift({
  316. gop: gops.pop(),
  317. pps: track.pps,
  318. sps: track.sps
  319. }); // Keep a maximum of 6 GOPs in the cache
  320. this.gopCache_.length = Math.min(6, this.gopCache_.length); // Clear nalUnits
  321. nalUnits = [];
  322. this.trigger('baseMediaDecodeTime', track.baseMediaDecodeTime);
  323. this.trigger('timelineStartInfo', track.timelineStartInfo);
  324. moof = mp4.moof(sequenceNumber, [track]); // it would be great to allocate this array up front instead of
  325. // throwing away hundreds of media segment fragments
  326. boxes = new Uint8Array(moof.byteLength + mdat.byteLength); // Bump the sequence number for next time
  327. sequenceNumber++;
  328. boxes.set(moof);
  329. boxes.set(mdat, moof.byteLength);
  330. this.trigger('data', {
  331. track: track,
  332. boxes: boxes
  333. });
  334. this.resetStream_(); // Continue with the flush process now
  335. this.trigger('done', 'VideoSegmentStream');
  336. };
  337. this.reset = function () {
  338. this.resetStream_();
  339. nalUnits = [];
  340. this.gopCache_.length = 0;
  341. gopsToAlignWith.length = 0;
  342. this.trigger('reset');
  343. };
  344. this.resetStream_ = function () {
  345. trackDecodeInfo.clearDtsInfo(track); // reset config and pps because they may differ across segments
  346. // for instance, when we are rendition switching
  347. config = undefined;
  348. pps = undefined;
  349. }; // Search for a candidate Gop for gop-fusion from the gop cache and
  350. // return it or return null if no good candidate was found
  351. this.getGopForFusion_ = function (nalUnit) {
  352. var halfSecond = 45000,
  353. // Half-a-second in a 90khz clock
  354. allowableOverlap = 10000,
  355. // About 3 frames @ 30fps
  356. nearestDistance = Infinity,
  357. dtsDistance,
  358. nearestGopObj,
  359. currentGop,
  360. currentGopObj,
  361. i; // Search for the GOP nearest to the beginning of this nal unit
  362. for (i = 0; i < this.gopCache_.length; i++) {
  363. currentGopObj = this.gopCache_[i];
  364. currentGop = currentGopObj.gop; // Reject Gops with different SPS or PPS
  365. if (!(track.pps && arrayEquals(track.pps[0], currentGopObj.pps[0])) || !(track.sps && arrayEquals(track.sps[0], currentGopObj.sps[0]))) {
  366. continue;
  367. } // Reject Gops that would require a negative baseMediaDecodeTime
  368. if (currentGop.dts < track.timelineStartInfo.dts) {
  369. continue;
  370. } // The distance between the end of the gop and the start of the nalUnit
  371. dtsDistance = nalUnit.dts - currentGop.dts - currentGop.duration; // Only consider GOPS that start before the nal unit and end within
  372. // a half-second of the nal unit
  373. if (dtsDistance >= -allowableOverlap && dtsDistance <= halfSecond) {
  374. // Always use the closest GOP we found if there is more than
  375. // one candidate
  376. if (!nearestGopObj || nearestDistance > dtsDistance) {
  377. nearestGopObj = currentGopObj;
  378. nearestDistance = dtsDistance;
  379. }
  380. }
  381. }
  382. if (nearestGopObj) {
  383. return nearestGopObj.gop;
  384. }
  385. return null;
  386. }; // trim gop list to the first gop found that has a matching pts with a gop in the list
  387. // of gopsToAlignWith starting from the START of the list
  388. this.alignGopsAtStart_ = function (gops) {
  389. var alignIndex, gopIndex, align, gop, byteLength, nalCount, duration, alignedGops;
  390. byteLength = gops.byteLength;
  391. nalCount = gops.nalCount;
  392. duration = gops.duration;
  393. alignIndex = gopIndex = 0;
  394. while (alignIndex < gopsToAlignWith.length && gopIndex < gops.length) {
  395. align = gopsToAlignWith[alignIndex];
  396. gop = gops[gopIndex];
  397. if (align.pts === gop.pts) {
  398. break;
  399. }
  400. if (gop.pts > align.pts) {
  401. // this current gop starts after the current gop we want to align on, so increment
  402. // align index
  403. alignIndex++;
  404. continue;
  405. } // current gop starts before the current gop we want to align on. so increment gop
  406. // index
  407. gopIndex++;
  408. byteLength -= gop.byteLength;
  409. nalCount -= gop.nalCount;
  410. duration -= gop.duration;
  411. }
  412. if (gopIndex === 0) {
  413. // no gops to trim
  414. return gops;
  415. }
  416. if (gopIndex === gops.length) {
  417. // all gops trimmed, skip appending all gops
  418. return null;
  419. }
  420. alignedGops = gops.slice(gopIndex);
  421. alignedGops.byteLength = byteLength;
  422. alignedGops.duration = duration;
  423. alignedGops.nalCount = nalCount;
  424. alignedGops.pts = alignedGops[0].pts;
  425. alignedGops.dts = alignedGops[0].dts;
  426. return alignedGops;
  427. }; // trim gop list to the first gop found that has a matching pts with a gop in the list
  428. // of gopsToAlignWith starting from the END of the list
  429. this.alignGopsAtEnd_ = function (gops) {
  430. var alignIndex, gopIndex, align, gop, alignEndIndex, matchFound;
  431. alignIndex = gopsToAlignWith.length - 1;
  432. gopIndex = gops.length - 1;
  433. alignEndIndex = null;
  434. matchFound = false;
  435. while (alignIndex >= 0 && gopIndex >= 0) {
  436. align = gopsToAlignWith[alignIndex];
  437. gop = gops[gopIndex];
  438. if (align.pts === gop.pts) {
  439. matchFound = true;
  440. break;
  441. }
  442. if (align.pts > gop.pts) {
  443. alignIndex--;
  444. continue;
  445. }
  446. if (alignIndex === gopsToAlignWith.length - 1) {
  447. // gop.pts is greater than the last alignment candidate. If no match is found
  448. // by the end of this loop, we still want to append gops that come after this
  449. // point
  450. alignEndIndex = gopIndex;
  451. }
  452. gopIndex--;
  453. }
  454. if (!matchFound && alignEndIndex === null) {
  455. return null;
  456. }
  457. var trimIndex;
  458. if (matchFound) {
  459. trimIndex = gopIndex;
  460. } else {
  461. trimIndex = alignEndIndex;
  462. }
  463. if (trimIndex === 0) {
  464. return gops;
  465. }
  466. var alignedGops = gops.slice(trimIndex);
  467. var metadata = alignedGops.reduce(function (total, gop) {
  468. total.byteLength += gop.byteLength;
  469. total.duration += gop.duration;
  470. total.nalCount += gop.nalCount;
  471. return total;
  472. }, {
  473. byteLength: 0,
  474. duration: 0,
  475. nalCount: 0
  476. });
  477. alignedGops.byteLength = metadata.byteLength;
  478. alignedGops.duration = metadata.duration;
  479. alignedGops.nalCount = metadata.nalCount;
  480. alignedGops.pts = alignedGops[0].pts;
  481. alignedGops.dts = alignedGops[0].dts;
  482. return alignedGops;
  483. };
  484. this.alignGopsWith = function (newGopsToAlignWith) {
  485. gopsToAlignWith = newGopsToAlignWith;
  486. };
  487. };
  488. _VideoSegmentStream.prototype = new Stream();
  489. /**
  490. * A Stream that can combine multiple streams (ie. audio & video)
  491. * into a single output segment for MSE. Also supports audio-only
  492. * and video-only streams.
  493. * @param options {object} transmuxer options object
  494. * @param options.keepOriginalTimestamps {boolean} If true, keep the timestamps
  495. * in the source; false to adjust the first segment to start at media timeline start.
  496. */
  497. _CoalesceStream = function CoalesceStream(options, metadataStream) {
  498. // Number of Tracks per output segment
  499. // If greater than 1, we combine multiple
  500. // tracks into a single segment
  501. this.numberOfTracks = 0;
  502. this.metadataStream = metadataStream;
  503. options = options || {};
  504. if (typeof options.remux !== 'undefined') {
  505. this.remuxTracks = !!options.remux;
  506. } else {
  507. this.remuxTracks = true;
  508. }
  509. if (typeof options.keepOriginalTimestamps === 'boolean') {
  510. this.keepOriginalTimestamps = options.keepOriginalTimestamps;
  511. } else {
  512. this.keepOriginalTimestamps = false;
  513. }
  514. this.pendingTracks = [];
  515. this.videoTrack = null;
  516. this.pendingBoxes = [];
  517. this.pendingCaptions = [];
  518. this.pendingMetadata = [];
  519. this.pendingBytes = 0;
  520. this.emittedTracks = 0;
  521. _CoalesceStream.prototype.init.call(this); // Take output from multiple
  522. this.push = function (output) {
  523. // buffer incoming captions until the associated video segment
  524. // finishes
  525. if (output.text) {
  526. return this.pendingCaptions.push(output);
  527. } // buffer incoming id3 tags until the final flush
  528. if (output.frames) {
  529. return this.pendingMetadata.push(output);
  530. } // Add this track to the list of pending tracks and store
  531. // important information required for the construction of
  532. // the final segment
  533. this.pendingTracks.push(output.track);
  534. this.pendingBytes += output.boxes.byteLength; // TODO: is there an issue for this against chrome?
  535. // We unshift audio and push video because
  536. // as of Chrome 75 when switching from
  537. // one init segment to another if the video
  538. // mdat does not appear after the audio mdat
  539. // only audio will play for the duration of our transmux.
  540. if (output.track.type === 'video') {
  541. this.videoTrack = output.track;
  542. this.pendingBoxes.push(output.boxes);
  543. }
  544. if (output.track.type === 'audio') {
  545. this.audioTrack = output.track;
  546. this.pendingBoxes.unshift(output.boxes);
  547. }
  548. };
  549. };
  550. _CoalesceStream.prototype = new Stream();
  551. _CoalesceStream.prototype.flush = function (flushSource) {
  552. var offset = 0,
  553. event = {
  554. captions: [],
  555. captionStreams: {},
  556. metadata: [],
  557. info: {}
  558. },
  559. caption,
  560. id3,
  561. initSegment,
  562. timelineStartPts = 0,
  563. i;
  564. if (this.pendingTracks.length < this.numberOfTracks) {
  565. if (flushSource !== 'VideoSegmentStream' && flushSource !== 'AudioSegmentStream') {
  566. // Return because we haven't received a flush from a data-generating
  567. // portion of the segment (meaning that we have only recieved meta-data
  568. // or captions.)
  569. return;
  570. } else if (this.remuxTracks) {
  571. // Return until we have enough tracks from the pipeline to remux (if we
  572. // are remuxing audio and video into a single MP4)
  573. return;
  574. } else if (this.pendingTracks.length === 0) {
  575. // In the case where we receive a flush without any data having been
  576. // received we consider it an emitted track for the purposes of coalescing
  577. // `done` events.
  578. // We do this for the case where there is an audio and video track in the
  579. // segment but no audio data. (seen in several playlists with alternate
  580. // audio tracks and no audio present in the main TS segments.)
  581. this.emittedTracks++;
  582. if (this.emittedTracks >= this.numberOfTracks) {
  583. this.trigger('done');
  584. this.emittedTracks = 0;
  585. }
  586. return;
  587. }
  588. }
  589. if (this.videoTrack) {
  590. timelineStartPts = this.videoTrack.timelineStartInfo.pts;
  591. VIDEO_PROPERTIES.forEach(function (prop) {
  592. event.info[prop] = this.videoTrack[prop];
  593. }, this);
  594. } else if (this.audioTrack) {
  595. timelineStartPts = this.audioTrack.timelineStartInfo.pts;
  596. AUDIO_PROPERTIES.forEach(function (prop) {
  597. event.info[prop] = this.audioTrack[prop];
  598. }, this);
  599. }
  600. if (this.videoTrack || this.audioTrack) {
  601. if (this.pendingTracks.length === 1) {
  602. event.type = this.pendingTracks[0].type;
  603. } else {
  604. event.type = 'combined';
  605. }
  606. this.emittedTracks += this.pendingTracks.length;
  607. initSegment = mp4.initSegment(this.pendingTracks); // Create a new typed array to hold the init segment
  608. event.initSegment = new Uint8Array(initSegment.byteLength); // Create an init segment containing a moov
  609. // and track definitions
  610. event.initSegment.set(initSegment); // Create a new typed array to hold the moof+mdats
  611. event.data = new Uint8Array(this.pendingBytes); // Append each moof+mdat (one per track) together
  612. for (i = 0; i < this.pendingBoxes.length; i++) {
  613. event.data.set(this.pendingBoxes[i], offset);
  614. offset += this.pendingBoxes[i].byteLength;
  615. } // Translate caption PTS times into second offsets to match the
  616. // video timeline for the segment, and add track info
  617. for (i = 0; i < this.pendingCaptions.length; i++) {
  618. caption = this.pendingCaptions[i];
  619. caption.startTime = clock.metadataTsToSeconds(caption.startPts, timelineStartPts, this.keepOriginalTimestamps);
  620. caption.endTime = clock.metadataTsToSeconds(caption.endPts, timelineStartPts, this.keepOriginalTimestamps);
  621. event.captionStreams[caption.stream] = true;
  622. event.captions.push(caption);
  623. } // Translate ID3 frame PTS times into second offsets to match the
  624. // video timeline for the segment
  625. for (i = 0; i < this.pendingMetadata.length; i++) {
  626. id3 = this.pendingMetadata[i];
  627. id3.cueTime = clock.metadataTsToSeconds(id3.pts, timelineStartPts, this.keepOriginalTimestamps);
  628. event.metadata.push(id3);
  629. } // We add this to every single emitted segment even though we only need
  630. // it for the first
  631. event.metadata.dispatchType = this.metadataStream.dispatchType; // Reset stream state
  632. this.pendingTracks.length = 0;
  633. this.videoTrack = null;
  634. this.pendingBoxes.length = 0;
  635. this.pendingCaptions.length = 0;
  636. this.pendingBytes = 0;
  637. this.pendingMetadata.length = 0; // Emit the built segment
  638. // We include captions and ID3 tags for backwards compatibility,
  639. // ideally we should send only video and audio in the data event
  640. this.trigger('data', event); // Emit each caption to the outside world
  641. // Ideally, this would happen immediately on parsing captions,
  642. // but we need to ensure that video data is sent back first
  643. // so that caption timing can be adjusted to match video timing
  644. for (i = 0; i < event.captions.length; i++) {
  645. caption = event.captions[i];
  646. this.trigger('caption', caption);
  647. } // Emit each id3 tag to the outside world
  648. // Ideally, this would happen immediately on parsing the tag,
  649. // but we need to ensure that video data is sent back first
  650. // so that ID3 frame timing can be adjusted to match video timing
  651. for (i = 0; i < event.metadata.length; i++) {
  652. id3 = event.metadata[i];
  653. this.trigger('id3Frame', id3);
  654. }
  655. } // Only emit `done` if all tracks have been flushed and emitted
  656. if (this.emittedTracks >= this.numberOfTracks) {
  657. this.trigger('done');
  658. this.emittedTracks = 0;
  659. }
  660. };
  661. _CoalesceStream.prototype.setRemux = function (val) {
  662. this.remuxTracks = val;
  663. };
  664. /**
  665. * A Stream that expects MP2T binary data as input and produces
  666. * corresponding media segments, suitable for use with Media Source
  667. * Extension (MSE) implementations that support the ISO BMFF byte
  668. * stream format, like Chrome.
  669. */
  670. _Transmuxer = function Transmuxer(options) {
  671. var self = this,
  672. hasFlushed = true,
  673. videoTrack,
  674. audioTrack;
  675. _Transmuxer.prototype.init.call(this);
  676. options = options || {};
  677. this.baseMediaDecodeTime = options.baseMediaDecodeTime || 0;
  678. this.transmuxPipeline_ = {};
  679. this.setupAacPipeline = function () {
  680. var pipeline = {};
  681. this.transmuxPipeline_ = pipeline;
  682. pipeline.type = 'aac';
  683. pipeline.metadataStream = new m2ts.MetadataStream(); // set up the parsing pipeline
  684. pipeline.aacStream = new AacStream();
  685. pipeline.audioTimestampRolloverStream = new m2ts.TimestampRolloverStream('audio');
  686. pipeline.timedMetadataTimestampRolloverStream = new m2ts.TimestampRolloverStream('timed-metadata');
  687. pipeline.adtsStream = new AdtsStream();
  688. pipeline.coalesceStream = new _CoalesceStream(options, pipeline.metadataStream);
  689. pipeline.headOfPipeline = pipeline.aacStream;
  690. pipeline.aacStream.pipe(pipeline.audioTimestampRolloverStream).pipe(pipeline.adtsStream);
  691. pipeline.aacStream.pipe(pipeline.timedMetadataTimestampRolloverStream).pipe(pipeline.metadataStream).pipe(pipeline.coalesceStream);
  692. pipeline.metadataStream.on('timestamp', function (frame) {
  693. pipeline.aacStream.setTimestamp(frame.timeStamp);
  694. });
  695. pipeline.aacStream.on('data', function (data) {
  696. if (data.type !== 'timed-metadata' && data.type !== 'audio' || pipeline.audioSegmentStream) {
  697. return;
  698. }
  699. audioTrack = audioTrack || {
  700. timelineStartInfo: {
  701. baseMediaDecodeTime: self.baseMediaDecodeTime
  702. },
  703. codec: 'adts',
  704. type: 'audio'
  705. }; // hook up the audio segment stream to the first track with aac data
  706. pipeline.coalesceStream.numberOfTracks++;
  707. pipeline.audioSegmentStream = new _AudioSegmentStream(audioTrack, options);
  708. pipeline.audioSegmentStream.on('log', self.getLogTrigger_('audioSegmentStream'));
  709. pipeline.audioSegmentStream.on('timingInfo', self.trigger.bind(self, 'audioTimingInfo')); // Set up the final part of the audio pipeline
  710. pipeline.adtsStream.pipe(pipeline.audioSegmentStream).pipe(pipeline.coalesceStream); // emit pmt info
  711. self.trigger('trackinfo', {
  712. hasAudio: !!audioTrack,
  713. hasVideo: !!videoTrack
  714. });
  715. }); // Re-emit any data coming from the coalesce stream to the outside world
  716. pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data')); // Let the consumer know we have finished flushing the entire pipeline
  717. pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
  718. addPipelineLogRetriggers(this, pipeline);
  719. };
  720. this.setupTsPipeline = function () {
  721. var pipeline = {};
  722. this.transmuxPipeline_ = pipeline;
  723. pipeline.type = 'ts';
  724. pipeline.metadataStream = new m2ts.MetadataStream(); // set up the parsing pipeline
  725. pipeline.packetStream = new m2ts.TransportPacketStream();
  726. pipeline.parseStream = new m2ts.TransportParseStream();
  727. pipeline.elementaryStream = new m2ts.ElementaryStream();
  728. pipeline.timestampRolloverStream = new m2ts.TimestampRolloverStream();
  729. pipeline.adtsStream = new AdtsStream();
  730. pipeline.h264Stream = new H264Stream();
  731. pipeline.captionStream = new m2ts.CaptionStream(options);
  732. pipeline.coalesceStream = new _CoalesceStream(options, pipeline.metadataStream);
  733. pipeline.headOfPipeline = pipeline.packetStream; // disassemble MPEG2-TS packets into elementary streams
  734. pipeline.packetStream.pipe(pipeline.parseStream).pipe(pipeline.elementaryStream).pipe(pipeline.timestampRolloverStream); // !!THIS ORDER IS IMPORTANT!!
  735. // demux the streams
  736. pipeline.timestampRolloverStream.pipe(pipeline.h264Stream);
  737. pipeline.timestampRolloverStream.pipe(pipeline.adtsStream);
  738. pipeline.timestampRolloverStream.pipe(pipeline.metadataStream).pipe(pipeline.coalesceStream); // Hook up CEA-608/708 caption stream
  739. pipeline.h264Stream.pipe(pipeline.captionStream).pipe(pipeline.coalesceStream);
  740. pipeline.elementaryStream.on('data', function (data) {
  741. var i;
  742. if (data.type === 'metadata') {
  743. i = data.tracks.length; // scan the tracks listed in the metadata
  744. while (i--) {
  745. if (!videoTrack && data.tracks[i].type === 'video') {
  746. videoTrack = data.tracks[i];
  747. videoTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
  748. } else if (!audioTrack && data.tracks[i].type === 'audio') {
  749. audioTrack = data.tracks[i];
  750. audioTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
  751. }
  752. } // hook up the video segment stream to the first track with h264 data
  753. if (videoTrack && !pipeline.videoSegmentStream) {
  754. pipeline.coalesceStream.numberOfTracks++;
  755. pipeline.videoSegmentStream = new _VideoSegmentStream(videoTrack, options);
  756. pipeline.videoSegmentStream.on('log', self.getLogTrigger_('videoSegmentStream'));
  757. pipeline.videoSegmentStream.on('timelineStartInfo', function (timelineStartInfo) {
  758. // When video emits timelineStartInfo data after a flush, we forward that
  759. // info to the AudioSegmentStream, if it exists, because video timeline
  760. // data takes precedence. Do not do this if keepOriginalTimestamps is set,
  761. // because this is a particularly subtle form of timestamp alteration.
  762. if (audioTrack && !options.keepOriginalTimestamps) {
  763. audioTrack.timelineStartInfo = timelineStartInfo; // On the first segment we trim AAC frames that exist before the
  764. // very earliest DTS we have seen in video because Chrome will
  765. // interpret any video track with a baseMediaDecodeTime that is
  766. // non-zero as a gap.
  767. pipeline.audioSegmentStream.setEarliestDts(timelineStartInfo.dts - self.baseMediaDecodeTime);
  768. }
  769. });
  770. pipeline.videoSegmentStream.on('processedGopsInfo', self.trigger.bind(self, 'gopInfo'));
  771. pipeline.videoSegmentStream.on('segmentTimingInfo', self.trigger.bind(self, 'videoSegmentTimingInfo'));
  772. pipeline.videoSegmentStream.on('baseMediaDecodeTime', function (baseMediaDecodeTime) {
  773. if (audioTrack) {
  774. pipeline.audioSegmentStream.setVideoBaseMediaDecodeTime(baseMediaDecodeTime);
  775. }
  776. });
  777. pipeline.videoSegmentStream.on('timingInfo', self.trigger.bind(self, 'videoTimingInfo')); // Set up the final part of the video pipeline
  778. pipeline.h264Stream.pipe(pipeline.videoSegmentStream).pipe(pipeline.coalesceStream);
  779. }
  780. if (audioTrack && !pipeline.audioSegmentStream) {
  781. // hook up the audio segment stream to the first track with aac data
  782. pipeline.coalesceStream.numberOfTracks++;
  783. pipeline.audioSegmentStream = new _AudioSegmentStream(audioTrack, options);
  784. pipeline.audioSegmentStream.on('log', self.getLogTrigger_('audioSegmentStream'));
  785. pipeline.audioSegmentStream.on('timingInfo', self.trigger.bind(self, 'audioTimingInfo'));
  786. pipeline.audioSegmentStream.on('segmentTimingInfo', self.trigger.bind(self, 'audioSegmentTimingInfo')); // Set up the final part of the audio pipeline
  787. pipeline.adtsStream.pipe(pipeline.audioSegmentStream).pipe(pipeline.coalesceStream);
  788. } // emit pmt info
  789. self.trigger('trackinfo', {
  790. hasAudio: !!audioTrack,
  791. hasVideo: !!videoTrack
  792. });
  793. }
  794. }); // Re-emit any data coming from the coalesce stream to the outside world
  795. pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data'));
  796. pipeline.coalesceStream.on('id3Frame', function (id3Frame) {
  797. id3Frame.dispatchType = pipeline.metadataStream.dispatchType;
  798. self.trigger('id3Frame', id3Frame);
  799. });
  800. pipeline.coalesceStream.on('caption', this.trigger.bind(this, 'caption')); // Let the consumer know we have finished flushing the entire pipeline
  801. pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
  802. addPipelineLogRetriggers(this, pipeline);
  803. }; // hook up the segment streams once track metadata is delivered
  804. this.setBaseMediaDecodeTime = function (baseMediaDecodeTime) {
  805. var pipeline = this.transmuxPipeline_;
  806. if (!options.keepOriginalTimestamps) {
  807. this.baseMediaDecodeTime = baseMediaDecodeTime;
  808. }
  809. if (audioTrack) {
  810. audioTrack.timelineStartInfo.dts = undefined;
  811. audioTrack.timelineStartInfo.pts = undefined;
  812. trackDecodeInfo.clearDtsInfo(audioTrack);
  813. if (pipeline.audioTimestampRolloverStream) {
  814. pipeline.audioTimestampRolloverStream.discontinuity();
  815. }
  816. }
  817. if (videoTrack) {
  818. if (pipeline.videoSegmentStream) {
  819. pipeline.videoSegmentStream.gopCache_ = [];
  820. }
  821. videoTrack.timelineStartInfo.dts = undefined;
  822. videoTrack.timelineStartInfo.pts = undefined;
  823. trackDecodeInfo.clearDtsInfo(videoTrack);
  824. pipeline.captionStream.reset();
  825. }
  826. if (pipeline.timestampRolloverStream) {
  827. pipeline.timestampRolloverStream.discontinuity();
  828. }
  829. };
  830. this.setAudioAppendStart = function (timestamp) {
  831. if (audioTrack) {
  832. this.transmuxPipeline_.audioSegmentStream.setAudioAppendStart(timestamp);
  833. }
  834. };
  835. this.setRemux = function (val) {
  836. var pipeline = this.transmuxPipeline_;
  837. options.remux = val;
  838. if (pipeline && pipeline.coalesceStream) {
  839. pipeline.coalesceStream.setRemux(val);
  840. }
  841. };
  842. this.alignGopsWith = function (gopsToAlignWith) {
  843. if (videoTrack && this.transmuxPipeline_.videoSegmentStream) {
  844. this.transmuxPipeline_.videoSegmentStream.alignGopsWith(gopsToAlignWith);
  845. }
  846. };
  847. this.getLogTrigger_ = function (key) {
  848. var self = this;
  849. return function (event) {
  850. event.stream = key;
  851. self.trigger('log', event);
  852. };
  853. }; // feed incoming data to the front of the parsing pipeline
  854. this.push = function (data) {
  855. if (hasFlushed) {
  856. var isAac = isLikelyAacData(data);
  857. if (isAac && this.transmuxPipeline_.type !== 'aac') {
  858. this.setupAacPipeline();
  859. } else if (!isAac && this.transmuxPipeline_.type !== 'ts') {
  860. this.setupTsPipeline();
  861. }
  862. hasFlushed = false;
  863. }
  864. this.transmuxPipeline_.headOfPipeline.push(data);
  865. }; // flush any buffered data
  866. this.flush = function () {
  867. hasFlushed = true; // Start at the top of the pipeline and flush all pending work
  868. this.transmuxPipeline_.headOfPipeline.flush();
  869. };
  870. this.endTimeline = function () {
  871. this.transmuxPipeline_.headOfPipeline.endTimeline();
  872. };
  873. this.reset = function () {
  874. if (this.transmuxPipeline_.headOfPipeline) {
  875. this.transmuxPipeline_.headOfPipeline.reset();
  876. }
  877. }; // Caption data has to be reset when seeking outside buffered range
  878. this.resetCaptions = function () {
  879. if (this.transmuxPipeline_.captionStream) {
  880. this.transmuxPipeline_.captionStream.reset();
  881. }
  882. };
  883. };
  884. _Transmuxer.prototype = new Stream();
  885. module.exports = {
  886. Transmuxer: _Transmuxer,
  887. VideoSegmentStream: _VideoSegmentStream,
  888. AudioSegmentStream: _AudioSegmentStream,
  889. AUDIO_PROPERTIES: AUDIO_PROPERTIES,
  890. VIDEO_PROPERTIES: VIDEO_PROPERTIES,
  891. // exported for testing
  892. generateSegmentTimingInfo: generateSegmentTimingInfo
  893. };