caption-parser.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * Reads in-band CEA-708 captions out of FMP4 segments.
  8. * @see https://en.wikipedia.org/wiki/CEA-708
  9. */
  10. 'use strict';
  11. var discardEmulationPreventionBytes = require('../tools/caption-packet-parser').discardEmulationPreventionBytes;
  12. var CaptionStream = require('../m2ts/caption-stream').CaptionStream;
  13. var findBox = require('../mp4/find-box.js');
  14. var parseTfdt = require('../tools/parse-tfdt.js');
  15. var parseTrun = require('../tools/parse-trun.js');
  16. var parseTfhd = require('../tools/parse-tfhd.js');
  17. var window = require('global/window');
  18. /**
  19. * Maps an offset in the mdat to a sample based on the the size of the samples.
  20. * Assumes that `parseSamples` has been called first.
  21. *
  22. * @param {Number} offset - The offset into the mdat
  23. * @param {Object[]} samples - An array of samples, parsed using `parseSamples`
  24. * @return {?Object} The matching sample, or null if no match was found.
  25. *
  26. * @see ISO-BMFF-12/2015, Section 8.8.8
  27. **/
  28. var mapToSample = function(offset, samples) {
  29. var approximateOffset = offset;
  30. for (var i = 0; i < samples.length; i++) {
  31. var sample = samples[i];
  32. if (approximateOffset < sample.size) {
  33. return sample;
  34. }
  35. approximateOffset -= sample.size;
  36. }
  37. return null;
  38. };
  39. /**
  40. * Finds SEI nal units contained in a Media Data Box.
  41. * Assumes that `parseSamples` has been called first.
  42. *
  43. * @param {Uint8Array} avcStream - The bytes of the mdat
  44. * @param {Object[]} samples - The samples parsed out by `parseSamples`
  45. * @param {Number} trackId - The trackId of this video track
  46. * @return {Object[]} seiNals - the parsed SEI NALUs found.
  47. * The contents of the seiNal should match what is expected by
  48. * CaptionStream.push (nalUnitType, size, data, escapedRBSP, pts, dts)
  49. *
  50. * @see ISO-BMFF-12/2015, Section 8.1.1
  51. * @see Rec. ITU-T H.264, 7.3.2.3.1
  52. **/
  53. var findSeiNals = function(avcStream, samples, trackId) {
  54. var
  55. avcView = new DataView(avcStream.buffer, avcStream.byteOffset, avcStream.byteLength),
  56. result = {
  57. logs: [],
  58. seiNals: []
  59. },
  60. seiNal,
  61. i,
  62. length,
  63. lastMatchedSample;
  64. for (i = 0; i + 4 < avcStream.length; i += length) {
  65. length = avcView.getUint32(i);
  66. i += 4;
  67. // Bail if this doesn't appear to be an H264 stream
  68. if (length <= 0) {
  69. continue;
  70. }
  71. switch (avcStream[i] & 0x1F) {
  72. case 0x06:
  73. var data = avcStream.subarray(i + 1, i + 1 + length);
  74. var matchingSample = mapToSample(i, samples);
  75. seiNal = {
  76. nalUnitType: 'sei_rbsp',
  77. size: length,
  78. data: data,
  79. escapedRBSP: discardEmulationPreventionBytes(data),
  80. trackId: trackId
  81. };
  82. if (matchingSample) {
  83. seiNal.pts = matchingSample.pts;
  84. seiNal.dts = matchingSample.dts;
  85. lastMatchedSample = matchingSample;
  86. } else if (lastMatchedSample) {
  87. // If a matching sample cannot be found, use the last
  88. // sample's values as they should be as close as possible
  89. seiNal.pts = lastMatchedSample.pts;
  90. seiNal.dts = lastMatchedSample.dts;
  91. } else {
  92. result.logs.push({
  93. level: 'warn',
  94. message: 'We\'ve encountered a nal unit without data at ' + i + ' for trackId ' + trackId + '. See mux.js#223.'
  95. });
  96. break;
  97. }
  98. result.seiNals.push(seiNal);
  99. break;
  100. default:
  101. break;
  102. }
  103. }
  104. return result;
  105. };
  106. /**
  107. * Parses sample information out of Track Run Boxes and calculates
  108. * the absolute presentation and decode timestamps of each sample.
  109. *
  110. * @param {Array<Uint8Array>} truns - The Trun Run boxes to be parsed
  111. * @param {Number|BigInt} baseMediaDecodeTime - base media decode time from tfdt
  112. @see ISO-BMFF-12/2015, Section 8.8.12
  113. * @param {Object} tfhd - The parsed Track Fragment Header
  114. * @see inspect.parseTfhd
  115. * @return {Object[]} the parsed samples
  116. *
  117. * @see ISO-BMFF-12/2015, Section 8.8.8
  118. **/
  119. var parseSamples = function(truns, baseMediaDecodeTime, tfhd) {
  120. var currentDts = baseMediaDecodeTime;
  121. var defaultSampleDuration = tfhd.defaultSampleDuration || 0;
  122. var defaultSampleSize = tfhd.defaultSampleSize || 0;
  123. var trackId = tfhd.trackId;
  124. var allSamples = [];
  125. truns.forEach(function(trun) {
  126. // Note: We currently do not parse the sample table as well
  127. // as the trun. It's possible some sources will require this.
  128. // moov > trak > mdia > minf > stbl
  129. var trackRun = parseTrun(trun);
  130. var samples = trackRun.samples;
  131. samples.forEach(function(sample) {
  132. if (sample.duration === undefined) {
  133. sample.duration = defaultSampleDuration;
  134. }
  135. if (sample.size === undefined) {
  136. sample.size = defaultSampleSize;
  137. }
  138. sample.trackId = trackId;
  139. sample.dts = currentDts;
  140. if (sample.compositionTimeOffset === undefined) {
  141. sample.compositionTimeOffset = 0;
  142. }
  143. if (typeof currentDts === 'bigint') {
  144. sample.pts = currentDts + window.BigInt(sample.compositionTimeOffset);
  145. currentDts += window.BigInt(sample.duration);
  146. } else {
  147. sample.pts = currentDts + sample.compositionTimeOffset;
  148. currentDts += sample.duration;
  149. }
  150. });
  151. allSamples = allSamples.concat(samples);
  152. });
  153. return allSamples;
  154. };
  155. /**
  156. * Parses out caption nals from an FMP4 segment's video tracks.
  157. *
  158. * @param {Uint8Array} segment - The bytes of a single segment
  159. * @param {Number} videoTrackId - The trackId of a video track in the segment
  160. * @return {Object.<Number, Object[]>} A mapping of video trackId to
  161. * a list of seiNals found in that track
  162. **/
  163. var parseCaptionNals = function(segment, videoTrackId) {
  164. // To get the samples
  165. var trafs = findBox(segment, ['moof', 'traf']);
  166. // To get SEI NAL units
  167. var mdats = findBox(segment, ['mdat']);
  168. var captionNals = {};
  169. var mdatTrafPairs = [];
  170. // Pair up each traf with a mdat as moofs and mdats are in pairs
  171. mdats.forEach(function(mdat, index) {
  172. var matchingTraf = trafs[index];
  173. mdatTrafPairs.push({
  174. mdat: mdat,
  175. traf: matchingTraf
  176. });
  177. });
  178. mdatTrafPairs.forEach(function(pair) {
  179. var mdat = pair.mdat;
  180. var traf = pair.traf;
  181. var tfhd = findBox(traf, ['tfhd']);
  182. // Exactly 1 tfhd per traf
  183. var headerInfo = parseTfhd(tfhd[0]);
  184. var trackId = headerInfo.trackId;
  185. var tfdt = findBox(traf, ['tfdt']);
  186. // Either 0 or 1 tfdt per traf
  187. var baseMediaDecodeTime = (tfdt.length > 0) ? parseTfdt(tfdt[0]).baseMediaDecodeTime : 0;
  188. var truns = findBox(traf, ['trun']);
  189. var samples;
  190. var result;
  191. // Only parse video data for the chosen video track
  192. if (videoTrackId === trackId && truns.length > 0) {
  193. samples = parseSamples(truns, baseMediaDecodeTime, headerInfo);
  194. result = findSeiNals(mdat, samples, trackId);
  195. if (!captionNals[trackId]) {
  196. captionNals[trackId] = {seiNals: [], logs: []};
  197. }
  198. captionNals[trackId].seiNals = captionNals[trackId].seiNals.concat(result.seiNals);
  199. captionNals[trackId].logs = captionNals[trackId].logs.concat(result.logs);
  200. }
  201. });
  202. return captionNals;
  203. };
  204. /**
  205. * Parses out inband captions from an MP4 container and returns
  206. * caption objects that can be used by WebVTT and the TextTrack API.
  207. * @see https://developer.mozilla.org/en-US/docs/Web/API/VTTCue
  208. * @see https://developer.mozilla.org/en-US/docs/Web/API/TextTrack
  209. * Assumes that `probe.getVideoTrackIds` and `probe.timescale` have been called first
  210. *
  211. * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
  212. * @param {Number} trackId - The id of the video track to parse
  213. * @param {Number} timescale - The timescale for the video track from the init segment
  214. *
  215. * @return {?Object[]} parsedCaptions - A list of captions or null if no video tracks
  216. * @return {Number} parsedCaptions[].startTime - The time to show the caption in seconds
  217. * @return {Number} parsedCaptions[].endTime - The time to stop showing the caption in seconds
  218. * @return {String} parsedCaptions[].text - The visible content of the caption
  219. **/
  220. var parseEmbeddedCaptions = function(segment, trackId, timescale) {
  221. var captionNals;
  222. // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
  223. if (trackId === null) {
  224. return null;
  225. }
  226. captionNals = parseCaptionNals(segment, trackId);
  227. var trackNals = captionNals[trackId] || {};
  228. return {
  229. seiNals: trackNals.seiNals,
  230. logs: trackNals.logs,
  231. timescale: timescale
  232. };
  233. };
  234. /**
  235. * Converts SEI NALUs into captions that can be used by video.js
  236. **/
  237. var CaptionParser = function() {
  238. var isInitialized = false;
  239. var captionStream;
  240. // Stores segments seen before trackId and timescale are set
  241. var segmentCache;
  242. // Stores video track ID of the track being parsed
  243. var trackId;
  244. // Stores the timescale of the track being parsed
  245. var timescale;
  246. // Stores captions parsed so far
  247. var parsedCaptions;
  248. // Stores whether we are receiving partial data or not
  249. var parsingPartial;
  250. /**
  251. * A method to indicate whether a CaptionParser has been initalized
  252. * @returns {Boolean}
  253. **/
  254. this.isInitialized = function() {
  255. return isInitialized;
  256. };
  257. /**
  258. * Initializes the underlying CaptionStream, SEI NAL parsing
  259. * and management, and caption collection
  260. **/
  261. this.init = function(options) {
  262. captionStream = new CaptionStream();
  263. isInitialized = true;
  264. parsingPartial = options ? options.isPartial : false;
  265. // Collect dispatched captions
  266. captionStream.on('data', function(event) {
  267. // Convert to seconds in the source's timescale
  268. event.startTime = event.startPts / timescale;
  269. event.endTime = event.endPts / timescale;
  270. parsedCaptions.captions.push(event);
  271. parsedCaptions.captionStreams[event.stream] = true;
  272. });
  273. captionStream.on('log', function(log) {
  274. parsedCaptions.logs.push(log);
  275. });
  276. };
  277. /**
  278. * Determines if a new video track will be selected
  279. * or if the timescale changed
  280. * @return {Boolean}
  281. **/
  282. this.isNewInit = function(videoTrackIds, timescales) {
  283. if ((videoTrackIds && videoTrackIds.length === 0) ||
  284. (timescales && typeof timescales === 'object' &&
  285. Object.keys(timescales).length === 0)) {
  286. return false;
  287. }
  288. return trackId !== videoTrackIds[0] ||
  289. timescale !== timescales[trackId];
  290. };
  291. /**
  292. * Parses out SEI captions and interacts with underlying
  293. * CaptionStream to return dispatched captions
  294. *
  295. * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
  296. * @param {Number[]} videoTrackIds - A list of video tracks found in the init segment
  297. * @param {Object.<Number, Number>} timescales - The timescales found in the init segment
  298. * @see parseEmbeddedCaptions
  299. * @see m2ts/caption-stream.js
  300. **/
  301. this.parse = function(segment, videoTrackIds, timescales) {
  302. var parsedData;
  303. if (!this.isInitialized()) {
  304. return null;
  305. // This is not likely to be a video segment
  306. } else if (!videoTrackIds || !timescales) {
  307. return null;
  308. } else if (this.isNewInit(videoTrackIds, timescales)) {
  309. // Use the first video track only as there is no
  310. // mechanism to switch to other video tracks
  311. trackId = videoTrackIds[0];
  312. timescale = timescales[trackId];
  313. // If an init segment has not been seen yet, hold onto segment
  314. // data until we have one.
  315. // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
  316. } else if (trackId === null || !timescale) {
  317. segmentCache.push(segment);
  318. return null;
  319. }
  320. // Now that a timescale and trackId is set, parse cached segments
  321. while (segmentCache.length > 0) {
  322. var cachedSegment = segmentCache.shift();
  323. this.parse(cachedSegment, videoTrackIds, timescales);
  324. }
  325. parsedData = parseEmbeddedCaptions(segment, trackId, timescale);
  326. if (parsedData && parsedData.logs) {
  327. parsedCaptions.logs = parsedCaptions.logs.concat(parsedData.logs);
  328. }
  329. if (parsedData === null || !parsedData.seiNals) {
  330. if (parsedCaptions.logs.length) {
  331. return {logs: parsedCaptions.logs, captions: [], captionStreams: []};
  332. }
  333. return null;
  334. }
  335. this.pushNals(parsedData.seiNals);
  336. // Force the parsed captions to be dispatched
  337. this.flushStream();
  338. return parsedCaptions;
  339. };
  340. /**
  341. * Pushes SEI NALUs onto CaptionStream
  342. * @param {Object[]} nals - A list of SEI nals parsed using `parseCaptionNals`
  343. * Assumes that `parseCaptionNals` has been called first
  344. * @see m2ts/caption-stream.js
  345. **/
  346. this.pushNals = function(nals) {
  347. if (!this.isInitialized() || !nals || nals.length === 0) {
  348. return null;
  349. }
  350. nals.forEach(function(nal) {
  351. captionStream.push(nal);
  352. });
  353. };
  354. /**
  355. * Flushes underlying CaptionStream to dispatch processed, displayable captions
  356. * @see m2ts/caption-stream.js
  357. **/
  358. this.flushStream = function() {
  359. if (!this.isInitialized()) {
  360. return null;
  361. }
  362. if (!parsingPartial) {
  363. captionStream.flush();
  364. } else {
  365. captionStream.partialFlush();
  366. }
  367. };
  368. /**
  369. * Reset caption buckets for new data
  370. **/
  371. this.clearParsedCaptions = function() {
  372. parsedCaptions.captions = [];
  373. parsedCaptions.captionStreams = {};
  374. parsedCaptions.logs = [];
  375. };
  376. /**
  377. * Resets underlying CaptionStream
  378. * @see m2ts/caption-stream.js
  379. **/
  380. this.resetCaptionStream = function() {
  381. if (!this.isInitialized()) {
  382. return null;
  383. }
  384. captionStream.reset();
  385. };
  386. /**
  387. * Convenience method to clear all captions flushed from the
  388. * CaptionStream and still being parsed
  389. * @see m2ts/caption-stream.js
  390. **/
  391. this.clearAllCaptions = function() {
  392. this.clearParsedCaptions();
  393. this.resetCaptionStream();
  394. };
  395. /**
  396. * Reset caption parser
  397. **/
  398. this.reset = function() {
  399. segmentCache = [];
  400. trackId = null;
  401. timescale = null;
  402. if (!parsedCaptions) {
  403. parsedCaptions = {
  404. captions: [],
  405. // CC1, CC2, CC3, CC4
  406. captionStreams: {},
  407. logs: []
  408. };
  409. } else {
  410. this.clearParsedCaptions();
  411. }
  412. this.resetCaptionStream();
  413. };
  414. this.reset();
  415. };
  416. module.exports = CaptionParser;