probe.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * Utilities to detect basic properties and metadata about MP4s.
  8. */
  9. 'use strict';
  10. var toUnsigned = require('../utils/bin').toUnsigned;
  11. var toHexString = require('../utils/bin').toHexString;
  12. var findBox = require('../mp4/find-box.js');
  13. var parseType = require('../mp4/parse-type.js');
  14. var emsg = require('../mp4/emsg.js');
  15. var parseTfhd = require('../tools/parse-tfhd.js');
  16. var parseTrun = require('../tools/parse-trun.js');
  17. var parseTfdt = require('../tools/parse-tfdt.js');
  18. var getUint64 = require('../utils/numbers.js').getUint64;
  19. var timescale, startTime, compositionStartTime, getVideoTrackIds, getTracks, getTimescaleFromMediaHeader, getEmsgID3;
  20. var window = require('global/window');
  21. var parseId3Frames = require('../tools/parse-id3.js').parseId3Frames;
  22. /**
  23. * Parses an MP4 initialization segment and extracts the timescale
  24. * values for any declared tracks. Timescale values indicate the
  25. * number of clock ticks per second to assume for time-based values
  26. * elsewhere in the MP4.
  27. *
  28. * To determine the start time of an MP4, you need two pieces of
  29. * information: the timescale unit and the earliest base media decode
  30. * time. Multiple timescales can be specified within an MP4 but the
  31. * base media decode time is always expressed in the timescale from
  32. * the media header box for the track:
  33. * ```
  34. * moov > trak > mdia > mdhd.timescale
  35. * ```
  36. * @param init {Uint8Array} the bytes of the init segment
  37. * @return {object} a hash of track ids to timescale values or null if
  38. * the init segment is malformed.
  39. */
  40. timescale = function timescale(init) {
  41. var result = {},
  42. traks = findBox(init, ['moov', 'trak']); // mdhd timescale
  43. return traks.reduce(function (result, trak) {
  44. var tkhd, version, index, id, mdhd;
  45. tkhd = findBox(trak, ['tkhd'])[0];
  46. if (!tkhd) {
  47. return null;
  48. }
  49. version = tkhd[0];
  50. index = version === 0 ? 12 : 20;
  51. id = toUnsigned(tkhd[index] << 24 | tkhd[index + 1] << 16 | tkhd[index + 2] << 8 | tkhd[index + 3]);
  52. mdhd = findBox(trak, ['mdia', 'mdhd'])[0];
  53. if (!mdhd) {
  54. return null;
  55. }
  56. version = mdhd[0];
  57. index = version === 0 ? 12 : 20;
  58. result[id] = toUnsigned(mdhd[index] << 24 | mdhd[index + 1] << 16 | mdhd[index + 2] << 8 | mdhd[index + 3]);
  59. return result;
  60. }, result);
  61. };
  62. /**
  63. * Determine the base media decode start time, in seconds, for an MP4
  64. * fragment. If multiple fragments are specified, the earliest time is
  65. * returned.
  66. *
  67. * The base media decode time can be parsed from track fragment
  68. * metadata:
  69. * ```
  70. * moof > traf > tfdt.baseMediaDecodeTime
  71. * ```
  72. * It requires the timescale value from the mdhd to interpret.
  73. *
  74. * @param timescale {object} a hash of track ids to timescale values.
  75. * @return {number} the earliest base media decode start time for the
  76. * fragment, in seconds
  77. */
  78. startTime = function startTime(timescale, fragment) {
  79. var trafs, result; // we need info from two childrend of each track fragment box
  80. trafs = findBox(fragment, ['moof', 'traf']); // determine the start times for each track
  81. var lowestTime = trafs.reduce(function (acc, traf) {
  82. var tfhd = findBox(traf, ['tfhd'])[0]; // get the track id from the tfhd
  83. var id = toUnsigned(tfhd[4] << 24 | tfhd[5] << 16 | tfhd[6] << 8 | tfhd[7]); // assume a 90kHz clock if no timescale was specified
  84. var scale = timescale[id] || 90e3; // get the base media decode time from the tfdt
  85. var tfdt = findBox(traf, ['tfdt'])[0];
  86. var dv = new DataView(tfdt.buffer, tfdt.byteOffset, tfdt.byteLength);
  87. var baseTime; // version 1 is 64 bit
  88. if (tfdt[0] === 1) {
  89. baseTime = getUint64(tfdt.subarray(4, 12));
  90. } else {
  91. baseTime = dv.getUint32(4);
  92. } // convert base time to seconds if it is a valid number.
  93. var seconds;
  94. if (typeof baseTime === 'bigint') {
  95. seconds = baseTime / window.BigInt(scale);
  96. } else if (typeof baseTime === 'number' && !isNaN(baseTime)) {
  97. seconds = baseTime / scale;
  98. }
  99. if (seconds < Number.MAX_SAFE_INTEGER) {
  100. seconds = Number(seconds);
  101. }
  102. if (seconds < acc) {
  103. acc = seconds;
  104. }
  105. return acc;
  106. }, Infinity);
  107. return typeof lowestTime === 'bigint' || isFinite(lowestTime) ? lowestTime : 0;
  108. };
  109. /**
  110. * Determine the composition start, in seconds, for an MP4
  111. * fragment.
  112. *
  113. * The composition start time of a fragment can be calculated using the base
  114. * media decode time, composition time offset, and timescale, as follows:
  115. *
  116. * compositionStartTime = (baseMediaDecodeTime + compositionTimeOffset) / timescale
  117. *
  118. * All of the aforementioned information is contained within a media fragment's
  119. * `traf` box, except for timescale info, which comes from the initialization
  120. * segment, so a track id (also contained within a `traf`) is also necessary to
  121. * associate it with a timescale
  122. *
  123. *
  124. * @param timescales {object} - a hash of track ids to timescale values.
  125. * @param fragment {Unit8Array} - the bytes of a media segment
  126. * @return {number} the composition start time for the fragment, in seconds
  127. **/
  128. compositionStartTime = function compositionStartTime(timescales, fragment) {
  129. var trafBoxes = findBox(fragment, ['moof', 'traf']);
  130. var baseMediaDecodeTime = 0;
  131. var compositionTimeOffset = 0;
  132. var trackId;
  133. if (trafBoxes && trafBoxes.length) {
  134. // The spec states that track run samples contained within a `traf` box are contiguous, but
  135. // it does not explicitly state whether the `traf` boxes themselves are contiguous.
  136. // We will assume that they are, so we only need the first to calculate start time.
  137. var tfhd = findBox(trafBoxes[0], ['tfhd'])[0];
  138. var trun = findBox(trafBoxes[0], ['trun'])[0];
  139. var tfdt = findBox(trafBoxes[0], ['tfdt'])[0];
  140. if (tfhd) {
  141. var parsedTfhd = parseTfhd(tfhd);
  142. trackId = parsedTfhd.trackId;
  143. }
  144. if (tfdt) {
  145. var parsedTfdt = parseTfdt(tfdt);
  146. baseMediaDecodeTime = parsedTfdt.baseMediaDecodeTime;
  147. }
  148. if (trun) {
  149. var parsedTrun = parseTrun(trun);
  150. if (parsedTrun.samples && parsedTrun.samples.length) {
  151. compositionTimeOffset = parsedTrun.samples[0].compositionTimeOffset || 0;
  152. }
  153. }
  154. } // Get timescale for this specific track. Assume a 90kHz clock if no timescale was
  155. // specified.
  156. var timescale = timescales[trackId] || 90e3; // return the composition start time, in seconds
  157. if (typeof baseMediaDecodeTime === 'bigint') {
  158. compositionTimeOffset = window.BigInt(compositionTimeOffset);
  159. timescale = window.BigInt(timescale);
  160. }
  161. var result = (baseMediaDecodeTime + compositionTimeOffset) / timescale;
  162. if (typeof result === 'bigint' && result < Number.MAX_SAFE_INTEGER) {
  163. result = Number(result);
  164. }
  165. return result;
  166. };
  167. /**
  168. * Find the trackIds of the video tracks in this source.
  169. * Found by parsing the Handler Reference and Track Header Boxes:
  170. * moov > trak > mdia > hdlr
  171. * moov > trak > tkhd
  172. *
  173. * @param {Uint8Array} init - The bytes of the init segment for this source
  174. * @return {Number[]} A list of trackIds
  175. *
  176. * @see ISO-BMFF-12/2015, Section 8.4.3
  177. **/
  178. getVideoTrackIds = function getVideoTrackIds(init) {
  179. var traks = findBox(init, ['moov', 'trak']);
  180. var videoTrackIds = [];
  181. traks.forEach(function (trak) {
  182. var hdlrs = findBox(trak, ['mdia', 'hdlr']);
  183. var tkhds = findBox(trak, ['tkhd']);
  184. hdlrs.forEach(function (hdlr, index) {
  185. var handlerType = parseType(hdlr.subarray(8, 12));
  186. var tkhd = tkhds[index];
  187. var view;
  188. var version;
  189. var trackId;
  190. if (handlerType === 'vide') {
  191. view = new DataView(tkhd.buffer, tkhd.byteOffset, tkhd.byteLength);
  192. version = view.getUint8(0);
  193. trackId = version === 0 ? view.getUint32(12) : view.getUint32(20);
  194. videoTrackIds.push(trackId);
  195. }
  196. });
  197. });
  198. return videoTrackIds;
  199. };
  200. getTimescaleFromMediaHeader = function getTimescaleFromMediaHeader(mdhd) {
  201. // mdhd is a FullBox, meaning it will have its own version as the first byte
  202. var version = mdhd[0];
  203. var index = version === 0 ? 12 : 20;
  204. return toUnsigned(mdhd[index] << 24 | mdhd[index + 1] << 16 | mdhd[index + 2] << 8 | mdhd[index + 3]);
  205. };
  206. /**
  207. * Get all the video, audio, and hint tracks from a non fragmented
  208. * mp4 segment
  209. */
  210. getTracks = function getTracks(init) {
  211. var traks = findBox(init, ['moov', 'trak']);
  212. var tracks = [];
  213. traks.forEach(function (trak) {
  214. var track = {};
  215. var tkhd = findBox(trak, ['tkhd'])[0];
  216. var view, tkhdVersion; // id
  217. if (tkhd) {
  218. view = new DataView(tkhd.buffer, tkhd.byteOffset, tkhd.byteLength);
  219. tkhdVersion = view.getUint8(0);
  220. track.id = tkhdVersion === 0 ? view.getUint32(12) : view.getUint32(20);
  221. }
  222. var hdlr = findBox(trak, ['mdia', 'hdlr'])[0]; // type
  223. if (hdlr) {
  224. var type = parseType(hdlr.subarray(8, 12));
  225. if (type === 'vide') {
  226. track.type = 'video';
  227. } else if (type === 'soun') {
  228. track.type = 'audio';
  229. } else {
  230. track.type = type;
  231. }
  232. } // codec
  233. var stsd = findBox(trak, ['mdia', 'minf', 'stbl', 'stsd'])[0];
  234. if (stsd) {
  235. var sampleDescriptions = stsd.subarray(8); // gives the codec type string
  236. track.codec = parseType(sampleDescriptions.subarray(4, 8));
  237. var codecBox = findBox(sampleDescriptions, [track.codec])[0];
  238. var codecConfig, codecConfigType;
  239. if (codecBox) {
  240. // https://tools.ietf.org/html/rfc6381#section-3.3
  241. if (/^[asm]vc[1-9]$/i.test(track.codec)) {
  242. // we don't need anything but the "config" parameter of the
  243. // avc1 codecBox
  244. codecConfig = codecBox.subarray(78);
  245. codecConfigType = parseType(codecConfig.subarray(4, 8));
  246. if (codecConfigType === 'avcC' && codecConfig.length > 11) {
  247. track.codec += '.'; // left padded with zeroes for single digit hex
  248. // profile idc
  249. track.codec += toHexString(codecConfig[9]); // the byte containing the constraint_set flags
  250. track.codec += toHexString(codecConfig[10]); // level idc
  251. track.codec += toHexString(codecConfig[11]);
  252. } else {
  253. // TODO: show a warning that we couldn't parse the codec
  254. // and are using the default
  255. track.codec = 'avc1.4d400d';
  256. }
  257. } else if (/^mp4[a,v]$/i.test(track.codec)) {
  258. // we do not need anything but the streamDescriptor of the mp4a codecBox
  259. codecConfig = codecBox.subarray(28);
  260. codecConfigType = parseType(codecConfig.subarray(4, 8));
  261. if (codecConfigType === 'esds' && codecConfig.length > 20 && codecConfig[19] !== 0) {
  262. track.codec += '.' + toHexString(codecConfig[19]); // this value is only a single digit
  263. track.codec += '.' + toHexString(codecConfig[20] >>> 2 & 0x3f).replace(/^0/, '');
  264. } else {
  265. // TODO: show a warning that we couldn't parse the codec
  266. // and are using the default
  267. track.codec = 'mp4a.40.2';
  268. }
  269. } else {
  270. // flac, opus, etc
  271. track.codec = track.codec.toLowerCase();
  272. }
  273. }
  274. }
  275. var mdhd = findBox(trak, ['mdia', 'mdhd'])[0];
  276. if (mdhd) {
  277. track.timescale = getTimescaleFromMediaHeader(mdhd);
  278. }
  279. tracks.push(track);
  280. });
  281. return tracks;
  282. };
  283. /**
  284. * Returns an array of emsg ID3 data from the provided segmentData.
  285. * An offset can also be provided as the Latest Arrival Time to calculate
  286. * the Event Start Time of v0 EMSG boxes.
  287. * See: https://dashif-documents.azurewebsites.net/Events/master/event.html#Inband-event-timing
  288. *
  289. * @param {Uint8Array} segmentData the segment byte array.
  290. * @param {number} offset the segment start time or Latest Arrival Time,
  291. * @return {Object[]} an array of ID3 parsed from EMSG boxes
  292. */
  293. getEmsgID3 = function getEmsgID3(segmentData, offset) {
  294. if (offset === void 0) {
  295. offset = 0;
  296. }
  297. var emsgBoxes = findBox(segmentData, ['emsg']);
  298. return emsgBoxes.map(function (data) {
  299. var parsedBox = emsg.parseEmsgBox(new Uint8Array(data));
  300. var parsedId3Frames = parseId3Frames(parsedBox.message_data);
  301. return {
  302. cueTime: emsg.scaleTime(parsedBox.presentation_time, parsedBox.timescale, parsedBox.presentation_time_delta, offset),
  303. duration: emsg.scaleTime(parsedBox.event_duration, parsedBox.timescale),
  304. frames: parsedId3Frames
  305. };
  306. });
  307. };
  308. module.exports = {
  309. // export mp4 inspector's findBox and parseType for backwards compatibility
  310. findBox: findBox,
  311. parseType: parseType,
  312. timescale: timescale,
  313. startTime: startTime,
  314. compositionStartTime: compositionStartTime,
  315. videoTrackIds: getVideoTrackIds,
  316. tracks: getTracks,
  317. getTimescaleFromMediaHeader: getTimescaleFromMediaHeader,
  318. getEmsgID3: getEmsgID3
  319. };