ebml-helpers.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", {
  3. value: true
  4. });
  5. exports.parseData = exports.parseTracks = exports.decodeBlock = exports.findEbml = exports.EBML_TAGS = void 0;
  6. var _byteHelpers = require("./byte-helpers");
  7. var _codecHelpers = require("./codec-helpers.js");
  8. // relevant specs for this parser:
  9. // https://matroska-org.github.io/libebml/specs.html
  10. // https://www.matroska.org/technical/elements.html
  11. // https://www.webmproject.org/docs/container/
  12. var EBML_TAGS = {
  13. EBML: (0, _byteHelpers.toUint8)([0x1A, 0x45, 0xDF, 0xA3]),
  14. DocType: (0, _byteHelpers.toUint8)([0x42, 0x82]),
  15. Segment: (0, _byteHelpers.toUint8)([0x18, 0x53, 0x80, 0x67]),
  16. SegmentInfo: (0, _byteHelpers.toUint8)([0x15, 0x49, 0xA9, 0x66]),
  17. Tracks: (0, _byteHelpers.toUint8)([0x16, 0x54, 0xAE, 0x6B]),
  18. Track: (0, _byteHelpers.toUint8)([0xAE]),
  19. TrackNumber: (0, _byteHelpers.toUint8)([0xd7]),
  20. DefaultDuration: (0, _byteHelpers.toUint8)([0x23, 0xe3, 0x83]),
  21. TrackEntry: (0, _byteHelpers.toUint8)([0xAE]),
  22. TrackType: (0, _byteHelpers.toUint8)([0x83]),
  23. FlagDefault: (0, _byteHelpers.toUint8)([0x88]),
  24. CodecID: (0, _byteHelpers.toUint8)([0x86]),
  25. CodecPrivate: (0, _byteHelpers.toUint8)([0x63, 0xA2]),
  26. VideoTrack: (0, _byteHelpers.toUint8)([0xe0]),
  27. AudioTrack: (0, _byteHelpers.toUint8)([0xe1]),
  28. // Not used yet, but will be used for live webm/mkv
  29. // see https://www.matroska.org/technical/basics.html#block-structure
  30. // see https://www.matroska.org/technical/basics.html#simpleblock-structure
  31. Cluster: (0, _byteHelpers.toUint8)([0x1F, 0x43, 0xB6, 0x75]),
  32. Timestamp: (0, _byteHelpers.toUint8)([0xE7]),
  33. TimestampScale: (0, _byteHelpers.toUint8)([0x2A, 0xD7, 0xB1]),
  34. BlockGroup: (0, _byteHelpers.toUint8)([0xA0]),
  35. BlockDuration: (0, _byteHelpers.toUint8)([0x9B]),
  36. Block: (0, _byteHelpers.toUint8)([0xA1]),
  37. SimpleBlock: (0, _byteHelpers.toUint8)([0xA3])
  38. };
  39. /**
  40. * This is a simple table to determine the length
  41. * of things in ebml. The length is one based (starts at 1,
  42. * rather than zero) and for every zero bit before a one bit
  43. * we add one to length. We also need this table because in some
  44. * case we have to xor all the length bits from another value.
  45. */
  46. exports.EBML_TAGS = EBML_TAGS;
  47. var LENGTH_TABLE = [128, 64, 32, 16, 8, 4, 2, 1];
  48. var getLength = function getLength(byte) {
  49. var len = 1;
  50. for (var i = 0; i < LENGTH_TABLE.length; i++) {
  51. if (byte & LENGTH_TABLE[i]) {
  52. break;
  53. }
  54. len++;
  55. }
  56. return len;
  57. }; // length in ebml is stored in the first 4 to 8 bits
  58. // of the first byte. 4 for the id length and 8 for the
  59. // data size length. Length is measured by converting the number to binary
  60. // then 1 + the number of zeros before a 1 is encountered starting
  61. // from the left.
  62. var getvint = function getvint(bytes, offset, removeLength, signed) {
  63. if (removeLength === void 0) {
  64. removeLength = true;
  65. }
  66. if (signed === void 0) {
  67. signed = false;
  68. }
  69. var length = getLength(bytes[offset]);
  70. var valueBytes = bytes.subarray(offset, offset + length); // NOTE that we do **not** subarray here because we need to copy these bytes
  71. // as they will be modified below to remove the dataSizeLen bits and we do not
  72. // want to modify the original data. normally we could just call slice on
  73. // uint8array but ie 11 does not support that...
  74. if (removeLength) {
  75. valueBytes = Array.prototype.slice.call(bytes, offset, offset + length);
  76. valueBytes[0] ^= LENGTH_TABLE[length - 1];
  77. }
  78. return {
  79. length: length,
  80. value: (0, _byteHelpers.bytesToNumber)(valueBytes, {
  81. signed: signed
  82. }),
  83. bytes: valueBytes
  84. };
  85. };
  86. var normalizePath = function normalizePath(path) {
  87. if (typeof path === 'string') {
  88. return path.match(/.{1,2}/g).map(function (p) {
  89. return normalizePath(p);
  90. });
  91. }
  92. if (typeof path === 'number') {
  93. return (0, _byteHelpers.numberToBytes)(path);
  94. }
  95. return path;
  96. };
  97. var normalizePaths = function normalizePaths(paths) {
  98. if (!Array.isArray(paths)) {
  99. return [normalizePath(paths)];
  100. }
  101. return paths.map(function (p) {
  102. return normalizePath(p);
  103. });
  104. };
  105. var getInfinityDataSize = function getInfinityDataSize(id, bytes, offset) {
  106. if (offset >= bytes.length) {
  107. return bytes.length;
  108. }
  109. var innerid = getvint(bytes, offset, false);
  110. if ((0, _byteHelpers.bytesMatch)(id.bytes, innerid.bytes)) {
  111. return offset;
  112. }
  113. var dataHeader = getvint(bytes, offset + innerid.length);
  114. return getInfinityDataSize(id, bytes, offset + dataHeader.length + dataHeader.value + innerid.length);
  115. };
  116. /**
  117. * Notes on the EBLM format.
  118. *
  119. * EBLM uses "vints" tags. Every vint tag contains
  120. * two parts
  121. *
  122. * 1. The length from the first byte. You get this by
  123. * converting the byte to binary and counting the zeros
  124. * before a 1. Then you add 1 to that. Examples
  125. * 00011111 = length 4 because there are 3 zeros before a 1.
  126. * 00100000 = length 3 because there are 2 zeros before a 1.
  127. * 00000011 = length 7 because there are 6 zeros before a 1.
  128. *
  129. * 2. The bits used for length are removed from the first byte
  130. * Then all the bytes are merged into a value. NOTE: this
  131. * is not the case for id ebml tags as there id includes
  132. * length bits.
  133. *
  134. */
  135. var findEbml = function findEbml(bytes, paths) {
  136. paths = normalizePaths(paths);
  137. bytes = (0, _byteHelpers.toUint8)(bytes);
  138. var results = [];
  139. if (!paths.length) {
  140. return results;
  141. }
  142. var i = 0;
  143. while (i < bytes.length) {
  144. var id = getvint(bytes, i, false);
  145. var dataHeader = getvint(bytes, i + id.length);
  146. var dataStart = i + id.length + dataHeader.length; // dataSize is unknown or this is a live stream
  147. if (dataHeader.value === 0x7f) {
  148. dataHeader.value = getInfinityDataSize(id, bytes, dataStart);
  149. if (dataHeader.value !== bytes.length) {
  150. dataHeader.value -= dataStart;
  151. }
  152. }
  153. var dataEnd = dataStart + dataHeader.value > bytes.length ? bytes.length : dataStart + dataHeader.value;
  154. var data = bytes.subarray(dataStart, dataEnd);
  155. if ((0, _byteHelpers.bytesMatch)(paths[0], id.bytes)) {
  156. if (paths.length === 1) {
  157. // this is the end of the paths and we've found the tag we were
  158. // looking for
  159. results.push(data);
  160. } else {
  161. // recursively search for the next tag inside of the data
  162. // of this one
  163. results = results.concat(findEbml(data, paths.slice(1)));
  164. }
  165. }
  166. var totalLength = id.length + dataHeader.length + data.length; // move past this tag entirely, we are not looking for it
  167. i += totalLength;
  168. }
  169. return results;
  170. }; // see https://www.matroska.org/technical/basics.html#block-structure
  171. exports.findEbml = findEbml;
  172. var decodeBlock = function decodeBlock(block, type, timestampScale, clusterTimestamp) {
  173. var duration;
  174. if (type === 'group') {
  175. duration = findEbml(block, [EBML_TAGS.BlockDuration])[0];
  176. if (duration) {
  177. duration = (0, _byteHelpers.bytesToNumber)(duration);
  178. duration = 1 / timestampScale * duration * timestampScale / 1000;
  179. }
  180. block = findEbml(block, [EBML_TAGS.Block])[0];
  181. type = 'block'; // treat data as a block after this point
  182. }
  183. var dv = new DataView(block.buffer, block.byteOffset, block.byteLength);
  184. var trackNumber = getvint(block, 0);
  185. var timestamp = dv.getInt16(trackNumber.length, false);
  186. var flags = block[trackNumber.length + 2];
  187. var data = block.subarray(trackNumber.length + 3); // pts/dts in seconds
  188. var ptsdts = 1 / timestampScale * (clusterTimestamp + timestamp) * timestampScale / 1000; // return the frame
  189. var parsed = {
  190. duration: duration,
  191. trackNumber: trackNumber.value,
  192. keyframe: type === 'simple' && flags >> 7 === 1,
  193. invisible: (flags & 0x08) >> 3 === 1,
  194. lacing: (flags & 0x06) >> 1,
  195. discardable: type === 'simple' && (flags & 0x01) === 1,
  196. frames: [],
  197. pts: ptsdts,
  198. dts: ptsdts,
  199. timestamp: timestamp
  200. };
  201. if (!parsed.lacing) {
  202. parsed.frames.push(data);
  203. return parsed;
  204. }
  205. var numberOfFrames = data[0] + 1;
  206. var frameSizes = [];
  207. var offset = 1; // Fixed
  208. if (parsed.lacing === 2) {
  209. var sizeOfFrame = (data.length - offset) / numberOfFrames;
  210. for (var i = 0; i < numberOfFrames; i++) {
  211. frameSizes.push(sizeOfFrame);
  212. }
  213. } // xiph
  214. if (parsed.lacing === 1) {
  215. for (var _i = 0; _i < numberOfFrames - 1; _i++) {
  216. var size = 0;
  217. do {
  218. size += data[offset];
  219. offset++;
  220. } while (data[offset - 1] === 0xFF);
  221. frameSizes.push(size);
  222. }
  223. } // ebml
  224. if (parsed.lacing === 3) {
  225. // first vint is unsinged
  226. // after that vints are singed and
  227. // based on a compounding size
  228. var _size = 0;
  229. for (var _i2 = 0; _i2 < numberOfFrames - 1; _i2++) {
  230. var vint = _i2 === 0 ? getvint(data, offset) : getvint(data, offset, true, true);
  231. _size += vint.value;
  232. frameSizes.push(_size);
  233. offset += vint.length;
  234. }
  235. }
  236. frameSizes.forEach(function (size) {
  237. parsed.frames.push(data.subarray(offset, offset + size));
  238. offset += size;
  239. });
  240. return parsed;
  241. }; // VP9 Codec Feature Metadata (CodecPrivate)
  242. // https://www.webmproject.org/docs/container/
  243. exports.decodeBlock = decodeBlock;
  244. var parseVp9Private = function parseVp9Private(bytes) {
  245. var i = 0;
  246. var params = {};
  247. while (i < bytes.length) {
  248. var id = bytes[i] & 0x7f;
  249. var len = bytes[i + 1];
  250. var val = void 0;
  251. if (len === 1) {
  252. val = bytes[i + 2];
  253. } else {
  254. val = bytes.subarray(i + 2, i + 2 + len);
  255. }
  256. if (id === 1) {
  257. params.profile = val;
  258. } else if (id === 2) {
  259. params.level = val;
  260. } else if (id === 3) {
  261. params.bitDepth = val;
  262. } else if (id === 4) {
  263. params.chromaSubsampling = val;
  264. } else {
  265. params[id] = val;
  266. }
  267. i += 2 + len;
  268. }
  269. return params;
  270. };
  271. var parseTracks = function parseTracks(bytes) {
  272. bytes = (0, _byteHelpers.toUint8)(bytes);
  273. var decodedTracks = [];
  274. var tracks = findEbml(bytes, [EBML_TAGS.Segment, EBML_TAGS.Tracks, EBML_TAGS.Track]);
  275. if (!tracks.length) {
  276. tracks = findEbml(bytes, [EBML_TAGS.Tracks, EBML_TAGS.Track]);
  277. }
  278. if (!tracks.length) {
  279. tracks = findEbml(bytes, [EBML_TAGS.Track]);
  280. }
  281. if (!tracks.length) {
  282. return decodedTracks;
  283. }
  284. tracks.forEach(function (track) {
  285. var trackType = findEbml(track, EBML_TAGS.TrackType)[0];
  286. if (!trackType || !trackType.length) {
  287. return;
  288. } // 1 is video, 2 is audio, 17 is subtitle
  289. // other values are unimportant in this context
  290. if (trackType[0] === 1) {
  291. trackType = 'video';
  292. } else if (trackType[0] === 2) {
  293. trackType = 'audio';
  294. } else if (trackType[0] === 17) {
  295. trackType = 'subtitle';
  296. } else {
  297. return;
  298. } // todo parse language
  299. var decodedTrack = {
  300. rawCodec: (0, _byteHelpers.bytesToString)(findEbml(track, [EBML_TAGS.CodecID])[0]),
  301. type: trackType,
  302. codecPrivate: findEbml(track, [EBML_TAGS.CodecPrivate])[0],
  303. number: (0, _byteHelpers.bytesToNumber)(findEbml(track, [EBML_TAGS.TrackNumber])[0]),
  304. defaultDuration: (0, _byteHelpers.bytesToNumber)(findEbml(track, [EBML_TAGS.DefaultDuration])[0]),
  305. default: findEbml(track, [EBML_TAGS.FlagDefault])[0],
  306. rawData: track
  307. };
  308. var codec = '';
  309. if (/V_MPEG4\/ISO\/AVC/.test(decodedTrack.rawCodec)) {
  310. codec = "avc1." + (0, _codecHelpers.getAvcCodec)(decodedTrack.codecPrivate);
  311. } else if (/V_MPEGH\/ISO\/HEVC/.test(decodedTrack.rawCodec)) {
  312. codec = "hev1." + (0, _codecHelpers.getHvcCodec)(decodedTrack.codecPrivate);
  313. } else if (/V_MPEG4\/ISO\/ASP/.test(decodedTrack.rawCodec)) {
  314. if (decodedTrack.codecPrivate) {
  315. codec = 'mp4v.20.' + decodedTrack.codecPrivate[4].toString();
  316. } else {
  317. codec = 'mp4v.20.9';
  318. }
  319. } else if (/^V_THEORA/.test(decodedTrack.rawCodec)) {
  320. codec = 'theora';
  321. } else if (/^V_VP8/.test(decodedTrack.rawCodec)) {
  322. codec = 'vp8';
  323. } else if (/^V_VP9/.test(decodedTrack.rawCodec)) {
  324. if (decodedTrack.codecPrivate) {
  325. var _parseVp9Private = parseVp9Private(decodedTrack.codecPrivate),
  326. profile = _parseVp9Private.profile,
  327. level = _parseVp9Private.level,
  328. bitDepth = _parseVp9Private.bitDepth,
  329. chromaSubsampling = _parseVp9Private.chromaSubsampling;
  330. codec = 'vp09.';
  331. codec += (0, _byteHelpers.padStart)(profile, 2, '0') + ".";
  332. codec += (0, _byteHelpers.padStart)(level, 2, '0') + ".";
  333. codec += (0, _byteHelpers.padStart)(bitDepth, 2, '0') + ".";
  334. codec += "" + (0, _byteHelpers.padStart)(chromaSubsampling, 2, '0'); // Video -> Colour -> Ebml name
  335. var matrixCoefficients = findEbml(track, [0xE0, [0x55, 0xB0], [0x55, 0xB1]])[0] || [];
  336. var videoFullRangeFlag = findEbml(track, [0xE0, [0x55, 0xB0], [0x55, 0xB9]])[0] || [];
  337. var transferCharacteristics = findEbml(track, [0xE0, [0x55, 0xB0], [0x55, 0xBA]])[0] || [];
  338. var colourPrimaries = findEbml(track, [0xE0, [0x55, 0xB0], [0x55, 0xBB]])[0] || []; // if we find any optional codec parameter specify them all.
  339. if (matrixCoefficients.length || videoFullRangeFlag.length || transferCharacteristics.length || colourPrimaries.length) {
  340. codec += "." + (0, _byteHelpers.padStart)(colourPrimaries[0], 2, '0');
  341. codec += "." + (0, _byteHelpers.padStart)(transferCharacteristics[0], 2, '0');
  342. codec += "." + (0, _byteHelpers.padStart)(matrixCoefficients[0], 2, '0');
  343. codec += "." + (0, _byteHelpers.padStart)(videoFullRangeFlag[0], 2, '0');
  344. }
  345. } else {
  346. codec = 'vp9';
  347. }
  348. } else if (/^V_AV1/.test(decodedTrack.rawCodec)) {
  349. codec = "av01." + (0, _codecHelpers.getAv1Codec)(decodedTrack.codecPrivate);
  350. } else if (/A_ALAC/.test(decodedTrack.rawCodec)) {
  351. codec = 'alac';
  352. } else if (/A_MPEG\/L2/.test(decodedTrack.rawCodec)) {
  353. codec = 'mp2';
  354. } else if (/A_MPEG\/L3/.test(decodedTrack.rawCodec)) {
  355. codec = 'mp3';
  356. } else if (/^A_AAC/.test(decodedTrack.rawCodec)) {
  357. if (decodedTrack.codecPrivate) {
  358. codec = 'mp4a.40.' + (decodedTrack.codecPrivate[0] >>> 3).toString();
  359. } else {
  360. codec = 'mp4a.40.2';
  361. }
  362. } else if (/^A_AC3/.test(decodedTrack.rawCodec)) {
  363. codec = 'ac-3';
  364. } else if (/^A_PCM/.test(decodedTrack.rawCodec)) {
  365. codec = 'pcm';
  366. } else if (/^A_MS\/ACM/.test(decodedTrack.rawCodec)) {
  367. codec = 'speex';
  368. } else if (/^A_EAC3/.test(decodedTrack.rawCodec)) {
  369. codec = 'ec-3';
  370. } else if (/^A_VORBIS/.test(decodedTrack.rawCodec)) {
  371. codec = 'vorbis';
  372. } else if (/^A_FLAC/.test(decodedTrack.rawCodec)) {
  373. codec = 'flac';
  374. } else if (/^A_OPUS/.test(decodedTrack.rawCodec)) {
  375. codec = 'opus';
  376. }
  377. decodedTrack.codec = codec;
  378. decodedTracks.push(decodedTrack);
  379. });
  380. return decodedTracks.sort(function (a, b) {
  381. return a.number - b.number;
  382. });
  383. };
  384. exports.parseTracks = parseTracks;
  385. var parseData = function parseData(data, tracks) {
  386. var allBlocks = [];
  387. var segment = findEbml(data, [EBML_TAGS.Segment])[0];
  388. var timestampScale = findEbml(segment, [EBML_TAGS.SegmentInfo, EBML_TAGS.TimestampScale])[0]; // in nanoseconds, defaults to 1ms
  389. if (timestampScale && timestampScale.length) {
  390. timestampScale = (0, _byteHelpers.bytesToNumber)(timestampScale);
  391. } else {
  392. timestampScale = 1000000;
  393. }
  394. var clusters = findEbml(segment, [EBML_TAGS.Cluster]);
  395. if (!tracks) {
  396. tracks = parseTracks(segment);
  397. }
  398. clusters.forEach(function (cluster, ci) {
  399. var simpleBlocks = findEbml(cluster, [EBML_TAGS.SimpleBlock]).map(function (b) {
  400. return {
  401. type: 'simple',
  402. data: b
  403. };
  404. });
  405. var blockGroups = findEbml(cluster, [EBML_TAGS.BlockGroup]).map(function (b) {
  406. return {
  407. type: 'group',
  408. data: b
  409. };
  410. });
  411. var timestamp = findEbml(cluster, [EBML_TAGS.Timestamp])[0] || 0;
  412. if (timestamp && timestamp.length) {
  413. timestamp = (0, _byteHelpers.bytesToNumber)(timestamp);
  414. } // get all blocks then sort them into the correct order
  415. var blocks = simpleBlocks.concat(blockGroups).sort(function (a, b) {
  416. return a.data.byteOffset - b.data.byteOffset;
  417. });
  418. blocks.forEach(function (block, bi) {
  419. var decoded = decodeBlock(block.data, block.type, timestampScale, timestamp);
  420. allBlocks.push(decoded);
  421. });
  422. });
  423. return {
  424. tracks: tracks,
  425. blocks: allBlocks
  426. };
  427. };
  428. exports.parseData = parseData;