format-parser.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. import { bytesToString, toUint8, toHexString, bytesMatch } from './byte-helpers.js';
  2. import { parseTracks as parseEbmlTracks } from './ebml-helpers.js';
  3. import { parseTracks as parseMp4Tracks } from './mp4-helpers.js';
  4. import { findFourCC } from './riff-helpers.js';
  5. import { getPages } from './ogg-helpers.js';
  6. import { detectContainerForBytes } from './containers.js';
  7. import { findH264Nal, findH265Nal } from './nal-helpers.js';
  8. import { parseTs } from './m2ts-helpers.js';
  9. import { getAvcCodec, getHvcCodec } from './codec-helpers.js';
  10. import { getId3Offset } from './id3-helpers.js'; // https://docs.microsoft.com/en-us/windows/win32/medfound/audio-subtype-guids
  11. // https://tools.ietf.org/html/rfc2361
  12. var wFormatTagCodec = function wFormatTagCodec(wFormatTag) {
  13. wFormatTag = toUint8(wFormatTag);
  14. if (bytesMatch(wFormatTag, [0x00, 0x55])) {
  15. return 'mp3';
  16. } else if (bytesMatch(wFormatTag, [0x16, 0x00]) || bytesMatch(wFormatTag, [0x00, 0xFF])) {
  17. return 'aac';
  18. } else if (bytesMatch(wFormatTag, [0x70, 0x4f])) {
  19. return 'opus';
  20. } else if (bytesMatch(wFormatTag, [0x6C, 0x61])) {
  21. return 'alac';
  22. } else if (bytesMatch(wFormatTag, [0xF1, 0xAC])) {
  23. return 'flac';
  24. } else if (bytesMatch(wFormatTag, [0x20, 0x00])) {
  25. return 'ac-3';
  26. } else if (bytesMatch(wFormatTag, [0xFF, 0xFE])) {
  27. return 'ec-3';
  28. } else if (bytesMatch(wFormatTag, [0x00, 0x50])) {
  29. return 'mp2';
  30. } else if (bytesMatch(wFormatTag, [0x56, 0x6f])) {
  31. return 'vorbis';
  32. } else if (bytesMatch(wFormatTag, [0xA1, 0x09])) {
  33. return 'speex';
  34. }
  35. return '';
  36. };
  37. var formatMimetype = function formatMimetype(name, codecs) {
  38. var codecString = ['video', 'audio'].reduce(function (acc, type) {
  39. if (codecs[type]) {
  40. acc += (acc.length ? ',' : '') + codecs[type];
  41. }
  42. return acc;
  43. }, '');
  44. return (codecs.video ? 'video' : 'audio') + "/" + name + (codecString ? ";codecs=\"" + codecString + "\"" : '');
  45. };
  46. var parseCodecFrom = {
  47. mov: function mov(bytes) {
  48. // mov and mp4 both use a nearly identical box structure.
  49. var retval = parseCodecFrom.mp4(bytes);
  50. if (retval.mimetype) {
  51. retval.mimetype = retval.mimetype.replace('mp4', 'quicktime');
  52. }
  53. return retval;
  54. },
  55. mp4: function mp4(bytes) {
  56. bytes = toUint8(bytes);
  57. var codecs = {};
  58. var tracks = parseMp4Tracks(bytes);
  59. for (var i = 0; i < tracks.length; i++) {
  60. var track = tracks[i];
  61. if (track.type === 'audio' && !codecs.audio) {
  62. codecs.audio = track.codec;
  63. }
  64. if (track.type === 'video' && !codecs.video) {
  65. codecs.video = track.codec;
  66. }
  67. }
  68. return {
  69. codecs: codecs,
  70. mimetype: formatMimetype('mp4', codecs)
  71. };
  72. },
  73. '3gp': function gp(bytes) {
  74. return {
  75. codecs: {},
  76. mimetype: 'video/3gpp'
  77. };
  78. },
  79. ogg: function ogg(bytes) {
  80. var pages = getPages(bytes, 0, 4);
  81. var codecs = {};
  82. pages.forEach(function (page) {
  83. if (bytesMatch(page, [0x4F, 0x70, 0x75, 0x73], {
  84. offset: 28
  85. })) {
  86. codecs.audio = 'opus';
  87. } else if (bytesMatch(page, [0x56, 0x50, 0x38, 0x30], {
  88. offset: 29
  89. })) {
  90. codecs.video = 'vp8';
  91. } else if (bytesMatch(page, [0x74, 0x68, 0x65, 0x6F, 0x72, 0x61], {
  92. offset: 29
  93. })) {
  94. codecs.video = 'theora';
  95. } else if (bytesMatch(page, [0x46, 0x4C, 0x41, 0x43], {
  96. offset: 29
  97. })) {
  98. codecs.audio = 'flac';
  99. } else if (bytesMatch(page, [0x53, 0x70, 0x65, 0x65, 0x78], {
  100. offset: 28
  101. })) {
  102. codecs.audio = 'speex';
  103. } else if (bytesMatch(page, [0x76, 0x6F, 0x72, 0x62, 0x69, 0x73], {
  104. offset: 29
  105. })) {
  106. codecs.audio = 'vorbis';
  107. }
  108. });
  109. return {
  110. codecs: codecs,
  111. mimetype: formatMimetype('ogg', codecs)
  112. };
  113. },
  114. wav: function wav(bytes) {
  115. var format = findFourCC(bytes, ['WAVE', 'fmt'])[0];
  116. var wFormatTag = Array.prototype.slice.call(format, 0, 2).reverse();
  117. var mimetype = 'audio/vnd.wave';
  118. var codecs = {
  119. audio: wFormatTagCodec(wFormatTag)
  120. };
  121. var codecString = wFormatTag.reduce(function (acc, v) {
  122. if (v) {
  123. acc += toHexString(v);
  124. }
  125. return acc;
  126. }, '');
  127. if (codecString) {
  128. mimetype += ";codec=" + codecString;
  129. }
  130. if (codecString && !codecs.audio) {
  131. codecs.audio = codecString;
  132. }
  133. return {
  134. codecs: codecs,
  135. mimetype: mimetype
  136. };
  137. },
  138. avi: function avi(bytes) {
  139. var movi = findFourCC(bytes, ['AVI', 'movi'])[0];
  140. var strls = findFourCC(bytes, ['AVI', 'hdrl', 'strl']);
  141. var codecs = {};
  142. strls.forEach(function (strl) {
  143. var strh = findFourCC(strl, ['strh'])[0];
  144. var strf = findFourCC(strl, ['strf'])[0]; // now parse AVIStreamHeader to get codec and type:
  145. // https://docs.microsoft.com/en-us/previous-versions/windows/desktop/api/avifmt/ns-avifmt-avistreamheader
  146. var type = bytesToString(strh.subarray(0, 4));
  147. var codec;
  148. var codecType;
  149. if (type === 'vids') {
  150. // https://docs.microsoft.com/en-us/windows/win32/api/wingdi/ns-wingdi-bitmapinfoheader
  151. var handler = bytesToString(strh.subarray(4, 8));
  152. var compression = bytesToString(strf.subarray(16, 20)); // look for 00dc (compressed video fourcc code) or 00db (uncompressed video fourcc code)
  153. var videoData = findFourCC(movi, ['00dc'])[0] || findFourCC(movi, ['00db'][0]);
  154. if (handler === 'H264' || compression === 'H264') {
  155. if (videoData && videoData.length) {
  156. codec = parseCodecFrom.h264(videoData).codecs.video;
  157. } else {
  158. codec = 'avc1';
  159. }
  160. } else if (handler === 'HEVC' || compression === 'HEVC') {
  161. if (videoData && videoData.length) {
  162. codec = parseCodecFrom.h265(videoData).codecs.video;
  163. } else {
  164. codec = 'hev1';
  165. }
  166. } else if (handler === 'FMP4' || compression === 'FMP4') {
  167. if (movi.length) {
  168. codec = 'mp4v.20.' + movi[12].toString();
  169. } else {
  170. codec = 'mp4v.20';
  171. }
  172. } else if (handler === 'VP80' || compression === 'VP80') {
  173. codec = 'vp8';
  174. } else if (handler === 'VP90' || compression === 'VP90') {
  175. codec = 'vp9';
  176. } else if (handler === 'AV01' || compression === 'AV01') {
  177. codec = 'av01';
  178. } else if (handler === 'theo' || compression === 'theora') {
  179. codec = 'theora';
  180. } else {
  181. if (videoData && videoData.length) {
  182. var result = detectContainerForBytes(videoData);
  183. if (result === 'h264') {
  184. codec = parseCodecFrom.h264(movi).codecs.video;
  185. }
  186. if (result === 'h265') {
  187. codec = parseCodecFrom.h265(movi).codecs.video;
  188. }
  189. }
  190. if (!codec) {
  191. codec = handler || compression;
  192. }
  193. }
  194. codecType = 'video';
  195. } else if (type === 'auds') {
  196. codecType = 'audio'; // look for 00wb (audio data fourcc)
  197. // const audioData = findFourCC(movi, ['01wb']);
  198. var wFormatTag = Array.prototype.slice.call(strf, 0, 2).reverse();
  199. codecs.audio = wFormatTagCodec(wFormatTag);
  200. } else {
  201. return;
  202. }
  203. if (codec) {
  204. codecs[codecType] = codec;
  205. }
  206. });
  207. return {
  208. codecs: codecs,
  209. mimetype: formatMimetype('avi', codecs)
  210. };
  211. },
  212. ts: function ts(bytes) {
  213. var result = parseTs(bytes);
  214. var codecs = {};
  215. Object.keys(result.streams).forEach(function (esPid) {
  216. var stream = result.streams[esPid];
  217. if (stream.codec === 'avc1' && stream.packets.length) {
  218. stream.codec = parseCodecFrom.h264(stream.packets[0]).codecs.video;
  219. } else if (stream.codec === 'hev1' && stream.packets.length) {
  220. stream.codec = parseCodecFrom.h265(stream.packets[0]).codecs.video;
  221. }
  222. codecs[stream.type] = stream.codec;
  223. });
  224. return {
  225. codecs: codecs,
  226. mimetype: formatMimetype('mp2t', codecs)
  227. };
  228. },
  229. webm: function webm(bytes) {
  230. // mkv and webm both use ebml to store code info
  231. var retval = parseCodecFrom.mkv(bytes);
  232. if (retval.mimetype) {
  233. retval.mimetype = retval.mimetype.replace('x-matroska', 'webm');
  234. }
  235. return retval;
  236. },
  237. mkv: function mkv(bytes) {
  238. var codecs = {};
  239. var tracks = parseEbmlTracks(bytes);
  240. for (var i = 0; i < tracks.length; i++) {
  241. var track = tracks[i];
  242. if (track.type === 'audio' && !codecs.audio) {
  243. codecs.audio = track.codec;
  244. }
  245. if (track.type === 'video' && !codecs.video) {
  246. codecs.video = track.codec;
  247. }
  248. }
  249. return {
  250. codecs: codecs,
  251. mimetype: formatMimetype('x-matroska', codecs)
  252. };
  253. },
  254. aac: function aac(bytes) {
  255. return {
  256. codecs: {
  257. audio: 'aac'
  258. },
  259. mimetype: 'audio/aac'
  260. };
  261. },
  262. ac3: function ac3(bytes) {
  263. // past id3 and syncword
  264. var offset = getId3Offset(bytes) + 2; // default to ac-3
  265. var codec = 'ac-3';
  266. if (bytesMatch(bytes, [0xB8, 0xE0], {
  267. offset: offset
  268. })) {
  269. codec = 'ac-3'; // 0x01, 0x7F
  270. } else if (bytesMatch(bytes, [0x01, 0x7f], {
  271. offset: offset
  272. })) {
  273. codec = 'ec-3';
  274. }
  275. return {
  276. codecs: {
  277. audio: codec
  278. },
  279. mimetype: 'audio/vnd.dolby.dd-raw'
  280. };
  281. },
  282. mp3: function mp3(bytes) {
  283. return {
  284. codecs: {
  285. audio: 'mp3'
  286. },
  287. mimetype: 'audio/mpeg'
  288. };
  289. },
  290. flac: function flac(bytes) {
  291. return {
  292. codecs: {
  293. audio: 'flac'
  294. },
  295. mimetype: 'audio/flac'
  296. };
  297. },
  298. 'h264': function h264(bytes) {
  299. // find seq_parameter_set_rbsp to get encoding settings for codec
  300. var nal = findH264Nal(bytes, 7, 3);
  301. var retval = {
  302. codecs: {
  303. video: 'avc1'
  304. },
  305. mimetype: 'video/h264'
  306. };
  307. if (nal.length) {
  308. retval.codecs.video += "." + getAvcCodec(nal);
  309. }
  310. return retval;
  311. },
  312. 'h265': function h265(bytes) {
  313. var retval = {
  314. codecs: {
  315. video: 'hev1'
  316. },
  317. mimetype: 'video/h265'
  318. }; // find video_parameter_set_rbsp or seq_parameter_set_rbsp
  319. // to get encoding settings for codec
  320. var nal = findH265Nal(bytes, [32, 33], 3);
  321. if (nal.length) {
  322. var type = nal[0] >> 1 & 0x3F; // profile_tier_level starts at byte 5 for video_parameter_set_rbsp
  323. // byte 2 for seq_parameter_set_rbsp
  324. retval.codecs.video += "." + getHvcCodec(nal.subarray(type === 32 ? 5 : 2));
  325. }
  326. return retval;
  327. }
  328. };
  329. export var parseFormatForBytes = function parseFormatForBytes(bytes) {
  330. bytes = toUint8(bytes);
  331. var result = {
  332. codecs: {},
  333. container: detectContainerForBytes(bytes),
  334. mimetype: ''
  335. };
  336. var parseCodecFn = parseCodecFrom[result.container];
  337. if (parseCodecFn) {
  338. var parsed = parseCodecFn ? parseCodecFn(bytes) : {};
  339. result.codecs = parsed.codecs || {};
  340. result.mimetype = parsed.mimetype || '';
  341. }
  342. return result;
  343. };