mp4-helpers.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. import { stringToBytes, toUint8, bytesMatch, bytesToString, toHexString, padStart, bytesToNumber } from './byte-helpers.js';
  2. import { getAvcCodec, getHvcCodec, getAv1Codec } from './codec-helpers.js';
  3. import { parseOpusHead } from './opus-helpers.js';
  4. var normalizePath = function normalizePath(path) {
  5. if (typeof path === 'string') {
  6. return stringToBytes(path);
  7. }
  8. if (typeof path === 'number') {
  9. return path;
  10. }
  11. return path;
  12. };
  13. var normalizePaths = function normalizePaths(paths) {
  14. if (!Array.isArray(paths)) {
  15. return [normalizePath(paths)];
  16. }
  17. return paths.map(function (p) {
  18. return normalizePath(p);
  19. });
  20. };
  21. var DESCRIPTORS;
  22. export var parseDescriptors = function parseDescriptors(bytes) {
  23. bytes = toUint8(bytes);
  24. var results = [];
  25. var i = 0;
  26. while (bytes.length > i) {
  27. var tag = bytes[i];
  28. var size = 0;
  29. var headerSize = 0; // tag
  30. headerSize++;
  31. var byte = bytes[headerSize]; // first byte
  32. headerSize++;
  33. while (byte & 0x80) {
  34. size = (byte & 0x7F) << 7;
  35. byte = bytes[headerSize];
  36. headerSize++;
  37. }
  38. size += byte & 0x7F;
  39. for (var z = 0; z < DESCRIPTORS.length; z++) {
  40. var _DESCRIPTORS$z = DESCRIPTORS[z],
  41. id = _DESCRIPTORS$z.id,
  42. parser = _DESCRIPTORS$z.parser;
  43. if (tag === id) {
  44. results.push(parser(bytes.subarray(headerSize, headerSize + size)));
  45. break;
  46. }
  47. }
  48. i += size + headerSize;
  49. }
  50. return results;
  51. };
  52. DESCRIPTORS = [{
  53. id: 0x03,
  54. parser: function parser(bytes) {
  55. var desc = {
  56. tag: 0x03,
  57. id: bytes[0] << 8 | bytes[1],
  58. flags: bytes[2],
  59. size: 3,
  60. dependsOnEsId: 0,
  61. ocrEsId: 0,
  62. descriptors: [],
  63. url: ''
  64. }; // depends on es id
  65. if (desc.flags & 0x80) {
  66. desc.dependsOnEsId = bytes[desc.size] << 8 | bytes[desc.size + 1];
  67. desc.size += 2;
  68. } // url
  69. if (desc.flags & 0x40) {
  70. var len = bytes[desc.size];
  71. desc.url = bytesToString(bytes.subarray(desc.size + 1, desc.size + 1 + len));
  72. desc.size += len;
  73. } // ocr es id
  74. if (desc.flags & 0x20) {
  75. desc.ocrEsId = bytes[desc.size] << 8 | bytes[desc.size + 1];
  76. desc.size += 2;
  77. }
  78. desc.descriptors = parseDescriptors(bytes.subarray(desc.size)) || [];
  79. return desc;
  80. }
  81. }, {
  82. id: 0x04,
  83. parser: function parser(bytes) {
  84. // DecoderConfigDescriptor
  85. var desc = {
  86. tag: 0x04,
  87. oti: bytes[0],
  88. streamType: bytes[1],
  89. bufferSize: bytes[2] << 16 | bytes[3] << 8 | bytes[4],
  90. maxBitrate: bytes[5] << 24 | bytes[6] << 16 | bytes[7] << 8 | bytes[8],
  91. avgBitrate: bytes[9] << 24 | bytes[10] << 16 | bytes[11] << 8 | bytes[12],
  92. descriptors: parseDescriptors(bytes.subarray(13))
  93. };
  94. return desc;
  95. }
  96. }, {
  97. id: 0x05,
  98. parser: function parser(bytes) {
  99. // DecoderSpecificInfo
  100. return {
  101. tag: 0x05,
  102. bytes: bytes
  103. };
  104. }
  105. }, {
  106. id: 0x06,
  107. parser: function parser(bytes) {
  108. // SLConfigDescriptor
  109. return {
  110. tag: 0x06,
  111. bytes: bytes
  112. };
  113. }
  114. }];
  115. /**
  116. * find any number of boxes by name given a path to it in an iso bmff
  117. * such as mp4.
  118. *
  119. * @param {TypedArray} bytes
  120. * bytes for the iso bmff to search for boxes in
  121. *
  122. * @param {Uint8Array[]|string[]|string|Uint8Array} name
  123. * An array of paths or a single path representing the name
  124. * of boxes to search through in bytes. Paths may be
  125. * uint8 (character codes) or strings.
  126. *
  127. * @param {boolean} [complete=false]
  128. * Should we search only for complete boxes on the final path.
  129. * This is very useful when you do not want to get back partial boxes
  130. * in the case of streaming files.
  131. *
  132. * @return {Uint8Array[]}
  133. * An array of the end paths that we found.
  134. */
  135. export var findBox = function findBox(bytes, paths, complete) {
  136. if (complete === void 0) {
  137. complete = false;
  138. }
  139. paths = normalizePaths(paths);
  140. bytes = toUint8(bytes);
  141. var results = [];
  142. if (!paths.length) {
  143. // short-circuit the search for empty paths
  144. return results;
  145. }
  146. var i = 0;
  147. while (i < bytes.length) {
  148. var size = (bytes[i] << 24 | bytes[i + 1] << 16 | bytes[i + 2] << 8 | bytes[i + 3]) >>> 0;
  149. var type = bytes.subarray(i + 4, i + 8); // invalid box format.
  150. if (size === 0) {
  151. break;
  152. }
  153. var end = i + size;
  154. if (end > bytes.length) {
  155. // this box is bigger than the number of bytes we have
  156. // and complete is set, we cannot find any more boxes.
  157. if (complete) {
  158. break;
  159. }
  160. end = bytes.length;
  161. }
  162. var data = bytes.subarray(i + 8, end);
  163. if (bytesMatch(type, paths[0])) {
  164. if (paths.length === 1) {
  165. // this is the end of the path and we've found the box we were
  166. // looking for
  167. results.push(data);
  168. } else {
  169. // recursively search for the next box along the path
  170. results.push.apply(results, findBox(data, paths.slice(1), complete));
  171. }
  172. }
  173. i = end;
  174. } // we've finished searching all of bytes
  175. return results;
  176. };
  177. /**
  178. * Search for a single matching box by name in an iso bmff format like
  179. * mp4. This function is useful for finding codec boxes which
  180. * can be placed arbitrarily in sample descriptions depending
  181. * on the version of the file or file type.
  182. *
  183. * @param {TypedArray} bytes
  184. * bytes for the iso bmff to search for boxes in
  185. *
  186. * @param {string|Uint8Array} name
  187. * The name of the box to find.
  188. *
  189. * @return {Uint8Array[]}
  190. * a subarray of bytes representing the name boxed we found.
  191. */
  192. export var findNamedBox = function findNamedBox(bytes, name) {
  193. name = normalizePath(name);
  194. if (!name.length) {
  195. // short-circuit the search for empty paths
  196. return bytes.subarray(bytes.length);
  197. }
  198. var i = 0;
  199. while (i < bytes.length) {
  200. if (bytesMatch(bytes.subarray(i, i + name.length), name)) {
  201. var size = (bytes[i - 4] << 24 | bytes[i - 3] << 16 | bytes[i - 2] << 8 | bytes[i - 1]) >>> 0;
  202. var end = size > 1 ? i + size : bytes.byteLength;
  203. return bytes.subarray(i + 4, end);
  204. }
  205. i++;
  206. } // we've finished searching all of bytes
  207. return bytes.subarray(bytes.length);
  208. };
  209. var parseSamples = function parseSamples(data, entrySize, parseEntry) {
  210. if (entrySize === void 0) {
  211. entrySize = 4;
  212. }
  213. if (parseEntry === void 0) {
  214. parseEntry = function parseEntry(d) {
  215. return bytesToNumber(d);
  216. };
  217. }
  218. var entries = [];
  219. if (!data || !data.length) {
  220. return entries;
  221. }
  222. var entryCount = bytesToNumber(data.subarray(4, 8));
  223. for (var i = 8; entryCount; i += entrySize, entryCount--) {
  224. entries.push(parseEntry(data.subarray(i, i + entrySize)));
  225. }
  226. return entries;
  227. };
  228. export var buildFrameTable = function buildFrameTable(stbl, timescale) {
  229. var keySamples = parseSamples(findBox(stbl, ['stss'])[0]);
  230. var chunkOffsets = parseSamples(findBox(stbl, ['stco'])[0]);
  231. var timeToSamples = parseSamples(findBox(stbl, ['stts'])[0], 8, function (entry) {
  232. return {
  233. sampleCount: bytesToNumber(entry.subarray(0, 4)),
  234. sampleDelta: bytesToNumber(entry.subarray(4, 8))
  235. };
  236. });
  237. var samplesToChunks = parseSamples(findBox(stbl, ['stsc'])[0], 12, function (entry) {
  238. return {
  239. firstChunk: bytesToNumber(entry.subarray(0, 4)),
  240. samplesPerChunk: bytesToNumber(entry.subarray(4, 8)),
  241. sampleDescriptionIndex: bytesToNumber(entry.subarray(8, 12))
  242. };
  243. });
  244. var stsz = findBox(stbl, ['stsz'])[0]; // stsz starts with a 4 byte sampleSize which we don't need
  245. var sampleSizes = parseSamples(stsz && stsz.length && stsz.subarray(4) || null);
  246. var frames = [];
  247. for (var chunkIndex = 0; chunkIndex < chunkOffsets.length; chunkIndex++) {
  248. var samplesInChunk = void 0;
  249. for (var i = 0; i < samplesToChunks.length; i++) {
  250. var sampleToChunk = samplesToChunks[i];
  251. var isThisOne = chunkIndex + 1 >= sampleToChunk.firstChunk && (i + 1 >= samplesToChunks.length || chunkIndex + 1 < samplesToChunks[i + 1].firstChunk);
  252. if (isThisOne) {
  253. samplesInChunk = sampleToChunk.samplesPerChunk;
  254. break;
  255. }
  256. }
  257. var chunkOffset = chunkOffsets[chunkIndex];
  258. for (var _i = 0; _i < samplesInChunk; _i++) {
  259. var frameEnd = sampleSizes[frames.length]; // if we don't have key samples every frame is a keyframe
  260. var keyframe = !keySamples.length;
  261. if (keySamples.length && keySamples.indexOf(frames.length + 1) !== -1) {
  262. keyframe = true;
  263. }
  264. var frame = {
  265. keyframe: keyframe,
  266. start: chunkOffset,
  267. end: chunkOffset + frameEnd
  268. };
  269. for (var k = 0; k < timeToSamples.length; k++) {
  270. var _timeToSamples$k = timeToSamples[k],
  271. sampleCount = _timeToSamples$k.sampleCount,
  272. sampleDelta = _timeToSamples$k.sampleDelta;
  273. if (frames.length <= sampleCount) {
  274. // ms to ns
  275. var lastTimestamp = frames.length ? frames[frames.length - 1].timestamp : 0;
  276. frame.timestamp = lastTimestamp + sampleDelta / timescale * 1000;
  277. frame.duration = sampleDelta;
  278. break;
  279. }
  280. }
  281. frames.push(frame);
  282. chunkOffset += frameEnd;
  283. }
  284. }
  285. return frames;
  286. };
  287. export var addSampleDescription = function addSampleDescription(track, bytes) {
  288. var codec = bytesToString(bytes.subarray(0, 4));
  289. if (track.type === 'video') {
  290. track.info = track.info || {};
  291. track.info.width = bytes[28] << 8 | bytes[29];
  292. track.info.height = bytes[30] << 8 | bytes[31];
  293. } else if (track.type === 'audio') {
  294. track.info = track.info || {};
  295. track.info.channels = bytes[20] << 8 | bytes[21];
  296. track.info.bitDepth = bytes[22] << 8 | bytes[23];
  297. track.info.sampleRate = bytes[28] << 8 | bytes[29];
  298. }
  299. if (codec === 'avc1') {
  300. var avcC = findNamedBox(bytes, 'avcC'); // AVCDecoderConfigurationRecord
  301. codec += "." + getAvcCodec(avcC);
  302. track.info.avcC = avcC; // TODO: do we need to parse all this?
  303. /* {
  304. configurationVersion: avcC[0],
  305. profile: avcC[1],
  306. profileCompatibility: avcC[2],
  307. level: avcC[3],
  308. lengthSizeMinusOne: avcC[4] & 0x3
  309. };
  310. let spsNalUnitCount = avcC[5] & 0x1F;
  311. const spsNalUnits = track.info.avc.spsNalUnits = [];
  312. // past spsNalUnitCount
  313. let offset = 6;
  314. while (spsNalUnitCount--) {
  315. const nalLen = avcC[offset] << 8 | avcC[offset + 1];
  316. spsNalUnits.push(avcC.subarray(offset + 2, offset + 2 + nalLen));
  317. offset += nalLen + 2;
  318. }
  319. let ppsNalUnitCount = avcC[offset];
  320. const ppsNalUnits = track.info.avc.ppsNalUnits = [];
  321. // past ppsNalUnitCount
  322. offset += 1;
  323. while (ppsNalUnitCount--) {
  324. const nalLen = avcC[offset] << 8 | avcC[offset + 1];
  325. ppsNalUnits.push(avcC.subarray(offset + 2, offset + 2 + nalLen));
  326. offset += nalLen + 2;
  327. }*/
  328. // HEVCDecoderConfigurationRecord
  329. } else if (codec === 'hvc1' || codec === 'hev1') {
  330. codec += "." + getHvcCodec(findNamedBox(bytes, 'hvcC'));
  331. } else if (codec === 'mp4a' || codec === 'mp4v') {
  332. var esds = findNamedBox(bytes, 'esds');
  333. var esDescriptor = parseDescriptors(esds.subarray(4))[0];
  334. var decoderConfig = esDescriptor && esDescriptor.descriptors.filter(function (_ref) {
  335. var tag = _ref.tag;
  336. return tag === 0x04;
  337. })[0];
  338. if (decoderConfig) {
  339. // most codecs do not have a further '.'
  340. // such as 0xa5 for ac-3 and 0xa6 for e-ac-3
  341. codec += '.' + toHexString(decoderConfig.oti);
  342. if (decoderConfig.oti === 0x40) {
  343. codec += '.' + (decoderConfig.descriptors[0].bytes[0] >> 3).toString();
  344. } else if (decoderConfig.oti === 0x20) {
  345. codec += '.' + decoderConfig.descriptors[0].bytes[4].toString();
  346. } else if (decoderConfig.oti === 0xdd) {
  347. codec = 'vorbis';
  348. }
  349. } else if (track.type === 'audio') {
  350. codec += '.40.2';
  351. } else {
  352. codec += '.20.9';
  353. }
  354. } else if (codec === 'av01') {
  355. // AV1DecoderConfigurationRecord
  356. codec += "." + getAv1Codec(findNamedBox(bytes, 'av1C'));
  357. } else if (codec === 'vp09') {
  358. // VPCodecConfigurationRecord
  359. var vpcC = findNamedBox(bytes, 'vpcC'); // https://www.webmproject.org/vp9/mp4/
  360. var profile = vpcC[0];
  361. var level = vpcC[1];
  362. var bitDepth = vpcC[2] >> 4;
  363. var chromaSubsampling = (vpcC[2] & 0x0F) >> 1;
  364. var videoFullRangeFlag = (vpcC[2] & 0x0F) >> 3;
  365. var colourPrimaries = vpcC[3];
  366. var transferCharacteristics = vpcC[4];
  367. var matrixCoefficients = vpcC[5];
  368. codec += "." + padStart(profile, 2, '0');
  369. codec += "." + padStart(level, 2, '0');
  370. codec += "." + padStart(bitDepth, 2, '0');
  371. codec += "." + padStart(chromaSubsampling, 2, '0');
  372. codec += "." + padStart(colourPrimaries, 2, '0');
  373. codec += "." + padStart(transferCharacteristics, 2, '0');
  374. codec += "." + padStart(matrixCoefficients, 2, '0');
  375. codec += "." + padStart(videoFullRangeFlag, 2, '0');
  376. } else if (codec === 'theo') {
  377. codec = 'theora';
  378. } else if (codec === 'spex') {
  379. codec = 'speex';
  380. } else if (codec === '.mp3') {
  381. codec = 'mp4a.40.34';
  382. } else if (codec === 'msVo') {
  383. codec = 'vorbis';
  384. } else if (codec === 'Opus') {
  385. codec = 'opus';
  386. var dOps = findNamedBox(bytes, 'dOps');
  387. track.info.opus = parseOpusHead(dOps); // TODO: should this go into the webm code??
  388. // Firefox requires a codecDelay for opus playback
  389. // see https://bugzilla.mozilla.org/show_bug.cgi?id=1276238
  390. track.info.codecDelay = 6500000;
  391. } else {
  392. codec = codec.toLowerCase();
  393. }
  394. /* eslint-enable */
  395. // flac, ac-3, ec-3, opus
  396. track.codec = codec;
  397. };
  398. export var parseTracks = function parseTracks(bytes, frameTable) {
  399. if (frameTable === void 0) {
  400. frameTable = true;
  401. }
  402. bytes = toUint8(bytes);
  403. var traks = findBox(bytes, ['moov', 'trak'], true);
  404. var tracks = [];
  405. traks.forEach(function (trak) {
  406. var track = {
  407. bytes: trak
  408. };
  409. var mdia = findBox(trak, ['mdia'])[0];
  410. var hdlr = findBox(mdia, ['hdlr'])[0];
  411. var trakType = bytesToString(hdlr.subarray(8, 12));
  412. if (trakType === 'soun') {
  413. track.type = 'audio';
  414. } else if (trakType === 'vide') {
  415. track.type = 'video';
  416. } else {
  417. track.type = trakType;
  418. }
  419. var tkhd = findBox(trak, ['tkhd'])[0];
  420. if (tkhd) {
  421. var view = new DataView(tkhd.buffer, tkhd.byteOffset, tkhd.byteLength);
  422. var tkhdVersion = view.getUint8(0);
  423. track.number = tkhdVersion === 0 ? view.getUint32(12) : view.getUint32(20);
  424. }
  425. var mdhd = findBox(mdia, ['mdhd'])[0];
  426. if (mdhd) {
  427. // mdhd is a FullBox, meaning it will have its own version as the first byte
  428. var version = mdhd[0];
  429. var index = version === 0 ? 12 : 20;
  430. track.timescale = (mdhd[index] << 24 | mdhd[index + 1] << 16 | mdhd[index + 2] << 8 | mdhd[index + 3]) >>> 0;
  431. }
  432. var stbl = findBox(mdia, ['minf', 'stbl'])[0];
  433. var stsd = findBox(stbl, ['stsd'])[0];
  434. var descriptionCount = bytesToNumber(stsd.subarray(4, 8));
  435. var offset = 8; // add codec and codec info
  436. while (descriptionCount--) {
  437. var len = bytesToNumber(stsd.subarray(offset, offset + 4));
  438. var sampleDescriptor = stsd.subarray(offset + 4, offset + 4 + len);
  439. addSampleDescription(track, sampleDescriptor);
  440. offset += 4 + len;
  441. }
  442. if (frameTable) {
  443. track.frameTable = buildFrameTable(stbl, track.timescale);
  444. } // codec has no sub parameters
  445. tracks.push(track);
  446. });
  447. return tracks;
  448. };
  449. export var parseMediaInfo = function parseMediaInfo(bytes) {
  450. var mvhd = findBox(bytes, ['moov', 'mvhd'], true)[0];
  451. if (!mvhd || !mvhd.length) {
  452. return;
  453. }
  454. var info = {}; // ms to ns
  455. // mvhd v1 has 8 byte duration and other fields too
  456. if (mvhd[0] === 1) {
  457. info.timestampScale = bytesToNumber(mvhd.subarray(20, 24));
  458. info.duration = bytesToNumber(mvhd.subarray(24, 32));
  459. } else {
  460. info.timestampScale = bytesToNumber(mvhd.subarray(12, 16));
  461. info.duration = bytesToNumber(mvhd.subarray(16, 20));
  462. }
  463. info.bytes = mvhd;
  464. return info;
  465. };