caption-parser.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * Reads in-band CEA-708 captions out of FMP4 segments.
  8. * @see https://en.wikipedia.org/wiki/CEA-708
  9. */
  10. 'use strict';
  11. var discardEmulationPreventionBytes = require('../tools/caption-packet-parser').discardEmulationPreventionBytes;
  12. var CaptionStream = require('../m2ts/caption-stream').CaptionStream;
  13. var findBox = require('../mp4/find-box.js');
  14. var parseTfdt = require('../tools/parse-tfdt.js');
  15. var parseTrun = require('../tools/parse-trun.js');
  16. var parseTfhd = require('../tools/parse-tfhd.js');
  17. var window = require('global/window');
  18. /**
  19. * Maps an offset in the mdat to a sample based on the the size of the samples.
  20. * Assumes that `parseSamples` has been called first.
  21. *
  22. * @param {Number} offset - The offset into the mdat
  23. * @param {Object[]} samples - An array of samples, parsed using `parseSamples`
  24. * @return {?Object} The matching sample, or null if no match was found.
  25. *
  26. * @see ISO-BMFF-12/2015, Section 8.8.8
  27. **/
  28. var mapToSample = function mapToSample(offset, samples) {
  29. var approximateOffset = offset;
  30. for (var i = 0; i < samples.length; i++) {
  31. var sample = samples[i];
  32. if (approximateOffset < sample.size) {
  33. return sample;
  34. }
  35. approximateOffset -= sample.size;
  36. }
  37. return null;
  38. };
  39. /**
  40. * Finds SEI nal units contained in a Media Data Box.
  41. * Assumes that `parseSamples` has been called first.
  42. *
  43. * @param {Uint8Array} avcStream - The bytes of the mdat
  44. * @param {Object[]} samples - The samples parsed out by `parseSamples`
  45. * @param {Number} trackId - The trackId of this video track
  46. * @return {Object[]} seiNals - the parsed SEI NALUs found.
  47. * The contents of the seiNal should match what is expected by
  48. * CaptionStream.push (nalUnitType, size, data, escapedRBSP, pts, dts)
  49. *
  50. * @see ISO-BMFF-12/2015, Section 8.1.1
  51. * @see Rec. ITU-T H.264, 7.3.2.3.1
  52. **/
  53. var findSeiNals = function findSeiNals(avcStream, samples, trackId) {
  54. var avcView = new DataView(avcStream.buffer, avcStream.byteOffset, avcStream.byteLength),
  55. result = {
  56. logs: [],
  57. seiNals: []
  58. },
  59. seiNal,
  60. i,
  61. length,
  62. lastMatchedSample;
  63. for (i = 0; i + 4 < avcStream.length; i += length) {
  64. length = avcView.getUint32(i);
  65. i += 4; // Bail if this doesn't appear to be an H264 stream
  66. if (length <= 0) {
  67. continue;
  68. }
  69. switch (avcStream[i] & 0x1F) {
  70. case 0x06:
  71. var data = avcStream.subarray(i + 1, i + 1 + length);
  72. var matchingSample = mapToSample(i, samples);
  73. seiNal = {
  74. nalUnitType: 'sei_rbsp',
  75. size: length,
  76. data: data,
  77. escapedRBSP: discardEmulationPreventionBytes(data),
  78. trackId: trackId
  79. };
  80. if (matchingSample) {
  81. seiNal.pts = matchingSample.pts;
  82. seiNal.dts = matchingSample.dts;
  83. lastMatchedSample = matchingSample;
  84. } else if (lastMatchedSample) {
  85. // If a matching sample cannot be found, use the last
  86. // sample's values as they should be as close as possible
  87. seiNal.pts = lastMatchedSample.pts;
  88. seiNal.dts = lastMatchedSample.dts;
  89. } else {
  90. result.logs.push({
  91. level: 'warn',
  92. message: 'We\'ve encountered a nal unit without data at ' + i + ' for trackId ' + trackId + '. See mux.js#223.'
  93. });
  94. break;
  95. }
  96. result.seiNals.push(seiNal);
  97. break;
  98. default:
  99. break;
  100. }
  101. }
  102. return result;
  103. };
  104. /**
  105. * Parses sample information out of Track Run Boxes and calculates
  106. * the absolute presentation and decode timestamps of each sample.
  107. *
  108. * @param {Array<Uint8Array>} truns - The Trun Run boxes to be parsed
  109. * @param {Number|BigInt} baseMediaDecodeTime - base media decode time from tfdt
  110. @see ISO-BMFF-12/2015, Section 8.8.12
  111. * @param {Object} tfhd - The parsed Track Fragment Header
  112. * @see inspect.parseTfhd
  113. * @return {Object[]} the parsed samples
  114. *
  115. * @see ISO-BMFF-12/2015, Section 8.8.8
  116. **/
  117. var parseSamples = function parseSamples(truns, baseMediaDecodeTime, tfhd) {
  118. var currentDts = baseMediaDecodeTime;
  119. var defaultSampleDuration = tfhd.defaultSampleDuration || 0;
  120. var defaultSampleSize = tfhd.defaultSampleSize || 0;
  121. var trackId = tfhd.trackId;
  122. var allSamples = [];
  123. truns.forEach(function (trun) {
  124. // Note: We currently do not parse the sample table as well
  125. // as the trun. It's possible some sources will require this.
  126. // moov > trak > mdia > minf > stbl
  127. var trackRun = parseTrun(trun);
  128. var samples = trackRun.samples;
  129. samples.forEach(function (sample) {
  130. if (sample.duration === undefined) {
  131. sample.duration = defaultSampleDuration;
  132. }
  133. if (sample.size === undefined) {
  134. sample.size = defaultSampleSize;
  135. }
  136. sample.trackId = trackId;
  137. sample.dts = currentDts;
  138. if (sample.compositionTimeOffset === undefined) {
  139. sample.compositionTimeOffset = 0;
  140. }
  141. if (typeof currentDts === 'bigint') {
  142. sample.pts = currentDts + window.BigInt(sample.compositionTimeOffset);
  143. currentDts += window.BigInt(sample.duration);
  144. } else {
  145. sample.pts = currentDts + sample.compositionTimeOffset;
  146. currentDts += sample.duration;
  147. }
  148. });
  149. allSamples = allSamples.concat(samples);
  150. });
  151. return allSamples;
  152. };
  153. /**
  154. * Parses out caption nals from an FMP4 segment's video tracks.
  155. *
  156. * @param {Uint8Array} segment - The bytes of a single segment
  157. * @param {Number} videoTrackId - The trackId of a video track in the segment
  158. * @return {Object.<Number, Object[]>} A mapping of video trackId to
  159. * a list of seiNals found in that track
  160. **/
  161. var parseCaptionNals = function parseCaptionNals(segment, videoTrackId) {
  162. // To get the samples
  163. var trafs = findBox(segment, ['moof', 'traf']); // To get SEI NAL units
  164. var mdats = findBox(segment, ['mdat']);
  165. var captionNals = {};
  166. var mdatTrafPairs = []; // Pair up each traf with a mdat as moofs and mdats are in pairs
  167. mdats.forEach(function (mdat, index) {
  168. var matchingTraf = trafs[index];
  169. mdatTrafPairs.push({
  170. mdat: mdat,
  171. traf: matchingTraf
  172. });
  173. });
  174. mdatTrafPairs.forEach(function (pair) {
  175. var mdat = pair.mdat;
  176. var traf = pair.traf;
  177. var tfhd = findBox(traf, ['tfhd']); // Exactly 1 tfhd per traf
  178. var headerInfo = parseTfhd(tfhd[0]);
  179. var trackId = headerInfo.trackId;
  180. var tfdt = findBox(traf, ['tfdt']); // Either 0 or 1 tfdt per traf
  181. var baseMediaDecodeTime = tfdt.length > 0 ? parseTfdt(tfdt[0]).baseMediaDecodeTime : 0;
  182. var truns = findBox(traf, ['trun']);
  183. var samples;
  184. var result; // Only parse video data for the chosen video track
  185. if (videoTrackId === trackId && truns.length > 0) {
  186. samples = parseSamples(truns, baseMediaDecodeTime, headerInfo);
  187. result = findSeiNals(mdat, samples, trackId);
  188. if (!captionNals[trackId]) {
  189. captionNals[trackId] = {
  190. seiNals: [],
  191. logs: []
  192. };
  193. }
  194. captionNals[trackId].seiNals = captionNals[trackId].seiNals.concat(result.seiNals);
  195. captionNals[trackId].logs = captionNals[trackId].logs.concat(result.logs);
  196. }
  197. });
  198. return captionNals;
  199. };
  200. /**
  201. * Parses out inband captions from an MP4 container and returns
  202. * caption objects that can be used by WebVTT and the TextTrack API.
  203. * @see https://developer.mozilla.org/en-US/docs/Web/API/VTTCue
  204. * @see https://developer.mozilla.org/en-US/docs/Web/API/TextTrack
  205. * Assumes that `probe.getVideoTrackIds` and `probe.timescale` have been called first
  206. *
  207. * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
  208. * @param {Number} trackId - The id of the video track to parse
  209. * @param {Number} timescale - The timescale for the video track from the init segment
  210. *
  211. * @return {?Object[]} parsedCaptions - A list of captions or null if no video tracks
  212. * @return {Number} parsedCaptions[].startTime - The time to show the caption in seconds
  213. * @return {Number} parsedCaptions[].endTime - The time to stop showing the caption in seconds
  214. * @return {String} parsedCaptions[].text - The visible content of the caption
  215. **/
  216. var parseEmbeddedCaptions = function parseEmbeddedCaptions(segment, trackId, timescale) {
  217. var captionNals; // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
  218. if (trackId === null) {
  219. return null;
  220. }
  221. captionNals = parseCaptionNals(segment, trackId);
  222. var trackNals = captionNals[trackId] || {};
  223. return {
  224. seiNals: trackNals.seiNals,
  225. logs: trackNals.logs,
  226. timescale: timescale
  227. };
  228. };
  229. /**
  230. * Converts SEI NALUs into captions that can be used by video.js
  231. **/
  232. var CaptionParser = function CaptionParser() {
  233. var isInitialized = false;
  234. var captionStream; // Stores segments seen before trackId and timescale are set
  235. var segmentCache; // Stores video track ID of the track being parsed
  236. var trackId; // Stores the timescale of the track being parsed
  237. var timescale; // Stores captions parsed so far
  238. var parsedCaptions; // Stores whether we are receiving partial data or not
  239. var parsingPartial;
  240. /**
  241. * A method to indicate whether a CaptionParser has been initalized
  242. * @returns {Boolean}
  243. **/
  244. this.isInitialized = function () {
  245. return isInitialized;
  246. };
  247. /**
  248. * Initializes the underlying CaptionStream, SEI NAL parsing
  249. * and management, and caption collection
  250. **/
  251. this.init = function (options) {
  252. captionStream = new CaptionStream();
  253. isInitialized = true;
  254. parsingPartial = options ? options.isPartial : false; // Collect dispatched captions
  255. captionStream.on('data', function (event) {
  256. // Convert to seconds in the source's timescale
  257. event.startTime = event.startPts / timescale;
  258. event.endTime = event.endPts / timescale;
  259. parsedCaptions.captions.push(event);
  260. parsedCaptions.captionStreams[event.stream] = true;
  261. });
  262. captionStream.on('log', function (log) {
  263. parsedCaptions.logs.push(log);
  264. });
  265. };
  266. /**
  267. * Determines if a new video track will be selected
  268. * or if the timescale changed
  269. * @return {Boolean}
  270. **/
  271. this.isNewInit = function (videoTrackIds, timescales) {
  272. if (videoTrackIds && videoTrackIds.length === 0 || timescales && typeof timescales === 'object' && Object.keys(timescales).length === 0) {
  273. return false;
  274. }
  275. return trackId !== videoTrackIds[0] || timescale !== timescales[trackId];
  276. };
  277. /**
  278. * Parses out SEI captions and interacts with underlying
  279. * CaptionStream to return dispatched captions
  280. *
  281. * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
  282. * @param {Number[]} videoTrackIds - A list of video tracks found in the init segment
  283. * @param {Object.<Number, Number>} timescales - The timescales found in the init segment
  284. * @see parseEmbeddedCaptions
  285. * @see m2ts/caption-stream.js
  286. **/
  287. this.parse = function (segment, videoTrackIds, timescales) {
  288. var parsedData;
  289. if (!this.isInitialized()) {
  290. return null; // This is not likely to be a video segment
  291. } else if (!videoTrackIds || !timescales) {
  292. return null;
  293. } else if (this.isNewInit(videoTrackIds, timescales)) {
  294. // Use the first video track only as there is no
  295. // mechanism to switch to other video tracks
  296. trackId = videoTrackIds[0];
  297. timescale = timescales[trackId]; // If an init segment has not been seen yet, hold onto segment
  298. // data until we have one.
  299. // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
  300. } else if (trackId === null || !timescale) {
  301. segmentCache.push(segment);
  302. return null;
  303. } // Now that a timescale and trackId is set, parse cached segments
  304. while (segmentCache.length > 0) {
  305. var cachedSegment = segmentCache.shift();
  306. this.parse(cachedSegment, videoTrackIds, timescales);
  307. }
  308. parsedData = parseEmbeddedCaptions(segment, trackId, timescale);
  309. if (parsedData && parsedData.logs) {
  310. parsedCaptions.logs = parsedCaptions.logs.concat(parsedData.logs);
  311. }
  312. if (parsedData === null || !parsedData.seiNals) {
  313. if (parsedCaptions.logs.length) {
  314. return {
  315. logs: parsedCaptions.logs,
  316. captions: [],
  317. captionStreams: []
  318. };
  319. }
  320. return null;
  321. }
  322. this.pushNals(parsedData.seiNals); // Force the parsed captions to be dispatched
  323. this.flushStream();
  324. return parsedCaptions;
  325. };
  326. /**
  327. * Pushes SEI NALUs onto CaptionStream
  328. * @param {Object[]} nals - A list of SEI nals parsed using `parseCaptionNals`
  329. * Assumes that `parseCaptionNals` has been called first
  330. * @see m2ts/caption-stream.js
  331. **/
  332. this.pushNals = function (nals) {
  333. if (!this.isInitialized() || !nals || nals.length === 0) {
  334. return null;
  335. }
  336. nals.forEach(function (nal) {
  337. captionStream.push(nal);
  338. });
  339. };
  340. /**
  341. * Flushes underlying CaptionStream to dispatch processed, displayable captions
  342. * @see m2ts/caption-stream.js
  343. **/
  344. this.flushStream = function () {
  345. if (!this.isInitialized()) {
  346. return null;
  347. }
  348. if (!parsingPartial) {
  349. captionStream.flush();
  350. } else {
  351. captionStream.partialFlush();
  352. }
  353. };
  354. /**
  355. * Reset caption buckets for new data
  356. **/
  357. this.clearParsedCaptions = function () {
  358. parsedCaptions.captions = [];
  359. parsedCaptions.captionStreams = {};
  360. parsedCaptions.logs = [];
  361. };
  362. /**
  363. * Resets underlying CaptionStream
  364. * @see m2ts/caption-stream.js
  365. **/
  366. this.resetCaptionStream = function () {
  367. if (!this.isInitialized()) {
  368. return null;
  369. }
  370. captionStream.reset();
  371. };
  372. /**
  373. * Convenience method to clear all captions flushed from the
  374. * CaptionStream and still being parsed
  375. * @see m2ts/caption-stream.js
  376. **/
  377. this.clearAllCaptions = function () {
  378. this.clearParsedCaptions();
  379. this.resetCaptionStream();
  380. };
  381. /**
  382. * Reset caption parser
  383. **/
  384. this.reset = function () {
  385. segmentCache = [];
  386. trackId = null;
  387. timescale = null;
  388. if (!parsedCaptions) {
  389. parsedCaptions = {
  390. captions: [],
  391. // CC1, CC2, CC3, CC4
  392. captionStreams: {},
  393. logs: []
  394. };
  395. } else {
  396. this.clearParsedCaptions();
  397. }
  398. this.resetCaptionStream();
  399. };
  400. this.reset();
  401. };
  402. module.exports = CaptionParser;