probe.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * Utilities to detect basic properties and metadata about MP4s.
  8. */
  9. 'use strict';
  10. var toUnsigned = require('../utils/bin').toUnsigned;
  11. var toHexString = require('../utils/bin').toHexString;
  12. var findBox = require('../mp4/find-box.js');
  13. var parseType = require('../mp4/parse-type.js');
  14. var emsg = require('../mp4/emsg.js');
  15. var parseTfhd = require('../tools/parse-tfhd.js');
  16. var parseTrun = require('../tools/parse-trun.js');
  17. var parseTfdt = require('../tools/parse-tfdt.js');
  18. var getUint64 = require('../utils/numbers.js').getUint64;
  19. var timescale, startTime, compositionStartTime, getVideoTrackIds, getTracks,
  20. getTimescaleFromMediaHeader, getEmsgID3;
  21. var window = require('global/window');
  22. var parseId3Frames = require('../tools/parse-id3.js').parseId3Frames;
  23. /**
  24. * Parses an MP4 initialization segment and extracts the timescale
  25. * values for any declared tracks. Timescale values indicate the
  26. * number of clock ticks per second to assume for time-based values
  27. * elsewhere in the MP4.
  28. *
  29. * To determine the start time of an MP4, you need two pieces of
  30. * information: the timescale unit and the earliest base media decode
  31. * time. Multiple timescales can be specified within an MP4 but the
  32. * base media decode time is always expressed in the timescale from
  33. * the media header box for the track:
  34. * ```
  35. * moov > trak > mdia > mdhd.timescale
  36. * ```
  37. * @param init {Uint8Array} the bytes of the init segment
  38. * @return {object} a hash of track ids to timescale values or null if
  39. * the init segment is malformed.
  40. */
  41. timescale = function(init) {
  42. var
  43. result = {},
  44. traks = findBox(init, ['moov', 'trak']);
  45. // mdhd timescale
  46. return traks.reduce(function(result, trak) {
  47. var tkhd, version, index, id, mdhd;
  48. tkhd = findBox(trak, ['tkhd'])[0];
  49. if (!tkhd) {
  50. return null;
  51. }
  52. version = tkhd[0];
  53. index = version === 0 ? 12 : 20;
  54. id = toUnsigned(tkhd[index] << 24 |
  55. tkhd[index + 1] << 16 |
  56. tkhd[index + 2] << 8 |
  57. tkhd[index + 3]);
  58. mdhd = findBox(trak, ['mdia', 'mdhd'])[0];
  59. if (!mdhd) {
  60. return null;
  61. }
  62. version = mdhd[0];
  63. index = version === 0 ? 12 : 20;
  64. result[id] = toUnsigned(mdhd[index] << 24 |
  65. mdhd[index + 1] << 16 |
  66. mdhd[index + 2] << 8 |
  67. mdhd[index + 3]);
  68. return result;
  69. }, result);
  70. };
  71. /**
  72. * Determine the base media decode start time, in seconds, for an MP4
  73. * fragment. If multiple fragments are specified, the earliest time is
  74. * returned.
  75. *
  76. * The base media decode time can be parsed from track fragment
  77. * metadata:
  78. * ```
  79. * moof > traf > tfdt.baseMediaDecodeTime
  80. * ```
  81. * It requires the timescale value from the mdhd to interpret.
  82. *
  83. * @param timescale {object} a hash of track ids to timescale values.
  84. * @return {number} the earliest base media decode start time for the
  85. * fragment, in seconds
  86. */
  87. startTime = function(timescale, fragment) {
  88. var trafs, result;
  89. // we need info from two childrend of each track fragment box
  90. trafs = findBox(fragment, ['moof', 'traf']);
  91. // determine the start times for each track
  92. var lowestTime = trafs.reduce(function(acc, traf) {
  93. var tfhd = findBox(traf, ['tfhd'])[0];
  94. // get the track id from the tfhd
  95. var id = toUnsigned(tfhd[4] << 24 |
  96. tfhd[5] << 16 |
  97. tfhd[6] << 8 |
  98. tfhd[7]);
  99. // assume a 90kHz clock if no timescale was specified
  100. var scale = timescale[id] || 90e3;
  101. // get the base media decode time from the tfdt
  102. var tfdt = findBox(traf, ['tfdt'])[0];
  103. var dv = new DataView(tfdt.buffer, tfdt.byteOffset, tfdt.byteLength);
  104. var baseTime;
  105. // version 1 is 64 bit
  106. if (tfdt[0] === 1) {
  107. baseTime = getUint64(tfdt.subarray(4, 12));
  108. } else {
  109. baseTime = dv.getUint32(4);
  110. }
  111. // convert base time to seconds if it is a valid number.
  112. let seconds;
  113. if (typeof baseTime === 'bigint') {
  114. seconds = baseTime / window.BigInt(scale);
  115. } else if (typeof baseTime === 'number' && !isNaN(baseTime)) {
  116. seconds = baseTime / scale;
  117. }
  118. if (seconds < Number.MAX_SAFE_INTEGER) {
  119. seconds = Number(seconds);
  120. }
  121. if (seconds < acc) {
  122. acc = seconds;
  123. }
  124. return acc;
  125. }, Infinity);
  126. return typeof lowestTime === 'bigint' || isFinite(lowestTime) ? lowestTime : 0;
  127. };
  128. /**
  129. * Determine the composition start, in seconds, for an MP4
  130. * fragment.
  131. *
  132. * The composition start time of a fragment can be calculated using the base
  133. * media decode time, composition time offset, and timescale, as follows:
  134. *
  135. * compositionStartTime = (baseMediaDecodeTime + compositionTimeOffset) / timescale
  136. *
  137. * All of the aforementioned information is contained within a media fragment's
  138. * `traf` box, except for timescale info, which comes from the initialization
  139. * segment, so a track id (also contained within a `traf`) is also necessary to
  140. * associate it with a timescale
  141. *
  142. *
  143. * @param timescales {object} - a hash of track ids to timescale values.
  144. * @param fragment {Unit8Array} - the bytes of a media segment
  145. * @return {number} the composition start time for the fragment, in seconds
  146. **/
  147. compositionStartTime = function(timescales, fragment) {
  148. var trafBoxes = findBox(fragment, ['moof', 'traf']);
  149. var baseMediaDecodeTime = 0;
  150. var compositionTimeOffset = 0;
  151. var trackId;
  152. if (trafBoxes && trafBoxes.length) {
  153. // The spec states that track run samples contained within a `traf` box are contiguous, but
  154. // it does not explicitly state whether the `traf` boxes themselves are contiguous.
  155. // We will assume that they are, so we only need the first to calculate start time.
  156. var tfhd = findBox(trafBoxes[0], ['tfhd'])[0];
  157. var trun = findBox(trafBoxes[0], ['trun'])[0];
  158. var tfdt = findBox(trafBoxes[0], ['tfdt'])[0];
  159. if (tfhd) {
  160. var parsedTfhd = parseTfhd(tfhd);
  161. trackId = parsedTfhd.trackId;
  162. }
  163. if (tfdt) {
  164. var parsedTfdt = parseTfdt(tfdt);
  165. baseMediaDecodeTime = parsedTfdt.baseMediaDecodeTime;
  166. }
  167. if (trun) {
  168. var parsedTrun = parseTrun(trun);
  169. if (parsedTrun.samples && parsedTrun.samples.length) {
  170. compositionTimeOffset = parsedTrun.samples[0].compositionTimeOffset || 0;
  171. }
  172. }
  173. }
  174. // Get timescale for this specific track. Assume a 90kHz clock if no timescale was
  175. // specified.
  176. var timescale = timescales[trackId] || 90e3;
  177. // return the composition start time, in seconds
  178. if (typeof baseMediaDecodeTime === 'bigint') {
  179. compositionTimeOffset = window.BigInt(compositionTimeOffset);
  180. timescale = window.BigInt(timescale);
  181. }
  182. var result = (baseMediaDecodeTime + compositionTimeOffset) / timescale;
  183. if (typeof result === 'bigint' && result < Number.MAX_SAFE_INTEGER) {
  184. result = Number(result);
  185. }
  186. return result;
  187. };
  188. /**
  189. * Find the trackIds of the video tracks in this source.
  190. * Found by parsing the Handler Reference and Track Header Boxes:
  191. * moov > trak > mdia > hdlr
  192. * moov > trak > tkhd
  193. *
  194. * @param {Uint8Array} init - The bytes of the init segment for this source
  195. * @return {Number[]} A list of trackIds
  196. *
  197. * @see ISO-BMFF-12/2015, Section 8.4.3
  198. **/
  199. getVideoTrackIds = function(init) {
  200. var traks = findBox(init, ['moov', 'trak']);
  201. var videoTrackIds = [];
  202. traks.forEach(function(trak) {
  203. var hdlrs = findBox(trak, ['mdia', 'hdlr']);
  204. var tkhds = findBox(trak, ['tkhd']);
  205. hdlrs.forEach(function(hdlr, index) {
  206. var handlerType = parseType(hdlr.subarray(8, 12));
  207. var tkhd = tkhds[index];
  208. var view;
  209. var version;
  210. var trackId;
  211. if (handlerType === 'vide') {
  212. view = new DataView(tkhd.buffer, tkhd.byteOffset, tkhd.byteLength);
  213. version = view.getUint8(0);
  214. trackId = (version === 0) ? view.getUint32(12) : view.getUint32(20);
  215. videoTrackIds.push(trackId);
  216. }
  217. });
  218. });
  219. return videoTrackIds;
  220. };
  221. getTimescaleFromMediaHeader = function(mdhd) {
  222. // mdhd is a FullBox, meaning it will have its own version as the first byte
  223. var version = mdhd[0];
  224. var index = version === 0 ? 12 : 20;
  225. return toUnsigned(
  226. mdhd[index] << 24 |
  227. mdhd[index + 1] << 16 |
  228. mdhd[index + 2] << 8 |
  229. mdhd[index + 3]
  230. );
  231. };
  232. /**
  233. * Get all the video, audio, and hint tracks from a non fragmented
  234. * mp4 segment
  235. */
  236. getTracks = function(init) {
  237. var traks = findBox(init, ['moov', 'trak']);
  238. var tracks = [];
  239. traks.forEach(function(trak) {
  240. var track = {};
  241. var tkhd = findBox(trak, ['tkhd'])[0];
  242. var view, tkhdVersion;
  243. // id
  244. if (tkhd) {
  245. view = new DataView(tkhd.buffer, tkhd.byteOffset, tkhd.byteLength);
  246. tkhdVersion = view.getUint8(0);
  247. track.id = (tkhdVersion === 0) ? view.getUint32(12) : view.getUint32(20);
  248. }
  249. var hdlr = findBox(trak, ['mdia', 'hdlr'])[0];
  250. // type
  251. if (hdlr) {
  252. var type = parseType(hdlr.subarray(8, 12));
  253. if (type === 'vide') {
  254. track.type = 'video';
  255. } else if (type === 'soun') {
  256. track.type = 'audio';
  257. } else {
  258. track.type = type;
  259. }
  260. }
  261. // codec
  262. var stsd = findBox(trak, ['mdia', 'minf', 'stbl', 'stsd'])[0];
  263. if (stsd) {
  264. var sampleDescriptions = stsd.subarray(8);
  265. // gives the codec type string
  266. track.codec = parseType(sampleDescriptions.subarray(4, 8));
  267. var codecBox = findBox(sampleDescriptions, [track.codec])[0];
  268. var codecConfig, codecConfigType;
  269. if (codecBox) {
  270. // https://tools.ietf.org/html/rfc6381#section-3.3
  271. if ((/^[asm]vc[1-9]$/i).test(track.codec)) {
  272. // we don't need anything but the "config" parameter of the
  273. // avc1 codecBox
  274. codecConfig = codecBox.subarray(78);
  275. codecConfigType = parseType(codecConfig.subarray(4, 8));
  276. if (codecConfigType === 'avcC' && codecConfig.length > 11) {
  277. track.codec += '.';
  278. // left padded with zeroes for single digit hex
  279. // profile idc
  280. track.codec += toHexString(codecConfig[9]);
  281. // the byte containing the constraint_set flags
  282. track.codec += toHexString(codecConfig[10]);
  283. // level idc
  284. track.codec += toHexString(codecConfig[11]);
  285. } else {
  286. // TODO: show a warning that we couldn't parse the codec
  287. // and are using the default
  288. track.codec = 'avc1.4d400d';
  289. }
  290. } else if ((/^mp4[a,v]$/i).test(track.codec)) {
  291. // we do not need anything but the streamDescriptor of the mp4a codecBox
  292. codecConfig = codecBox.subarray(28);
  293. codecConfigType = parseType(codecConfig.subarray(4, 8));
  294. if (codecConfigType === 'esds' && codecConfig.length > 20 && codecConfig[19] !== 0) {
  295. track.codec += '.' + toHexString(codecConfig[19]);
  296. // this value is only a single digit
  297. track.codec += '.' + toHexString((codecConfig[20] >>> 2) & 0x3f).replace(/^0/, '');
  298. } else {
  299. // TODO: show a warning that we couldn't parse the codec
  300. // and are using the default
  301. track.codec = 'mp4a.40.2';
  302. }
  303. } else {
  304. // flac, opus, etc
  305. track.codec = track.codec.toLowerCase();
  306. }
  307. }
  308. }
  309. var mdhd = findBox(trak, ['mdia', 'mdhd'])[0];
  310. if (mdhd) {
  311. track.timescale = getTimescaleFromMediaHeader(mdhd);
  312. }
  313. tracks.push(track);
  314. });
  315. return tracks;
  316. };
  317. /**
  318. * Returns an array of emsg ID3 data from the provided segmentData.
  319. * An offset can also be provided as the Latest Arrival Time to calculate
  320. * the Event Start Time of v0 EMSG boxes.
  321. * See: https://dashif-documents.azurewebsites.net/Events/master/event.html#Inband-event-timing
  322. *
  323. * @param {Uint8Array} segmentData the segment byte array.
  324. * @param {number} offset the segment start time or Latest Arrival Time,
  325. * @return {Object[]} an array of ID3 parsed from EMSG boxes
  326. */
  327. getEmsgID3 = function(segmentData, offset = 0) {
  328. var emsgBoxes = findBox(segmentData, ['emsg']);
  329. return emsgBoxes.map((data) => {
  330. var parsedBox = emsg.parseEmsgBox(new Uint8Array(data));
  331. var parsedId3Frames = parseId3Frames(parsedBox.message_data);
  332. return {
  333. cueTime: emsg.scaleTime(parsedBox.presentation_time, parsedBox.timescale, parsedBox.presentation_time_delta, offset),
  334. duration: emsg.scaleTime(parsedBox.event_duration, parsedBox.timescale),
  335. frames: parsedId3Frames
  336. };
  337. });
  338. };
  339. module.exports = {
  340. // export mp4 inspector's findBox and parseType for backwards compatibility
  341. findBox: findBox,
  342. parseType: parseType,
  343. timescale: timescale,
  344. startTime: startTime,
  345. compositionStartTime: compositionStartTime,
  346. videoTrackIds: getVideoTrackIds,
  347. tracks: getTracks,
  348. getTimescaleFromMediaHeader: getTimescaleFromMediaHeader,
  349. getEmsgID3: getEmsgID3,
  350. };