123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487 |
- /**
- * mux.js
- *
- * Copyright (c) Brightcove
- * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
- *
- * Reads in-band CEA-708 captions out of FMP4 segments.
- * @see https://en.wikipedia.org/wiki/CEA-708
- */
- 'use strict';
- var discardEmulationPreventionBytes = require('../tools/caption-packet-parser').discardEmulationPreventionBytes;
- var CaptionStream = require('../m2ts/caption-stream').CaptionStream;
- var findBox = require('../mp4/find-box.js');
- var parseTfdt = require('../tools/parse-tfdt.js');
- var parseTrun = require('../tools/parse-trun.js');
- var parseTfhd = require('../tools/parse-tfhd.js');
- var window = require('global/window');
- /**
- * Maps an offset in the mdat to a sample based on the the size of the samples.
- * Assumes that `parseSamples` has been called first.
- *
- * @param {Number} offset - The offset into the mdat
- * @param {Object[]} samples - An array of samples, parsed using `parseSamples`
- * @return {?Object} The matching sample, or null if no match was found.
- *
- * @see ISO-BMFF-12/2015, Section 8.8.8
- **/
- var mapToSample = function mapToSample(offset, samples) {
- var approximateOffset = offset;
- for (var i = 0; i < samples.length; i++) {
- var sample = samples[i];
- if (approximateOffset < sample.size) {
- return sample;
- }
- approximateOffset -= sample.size;
- }
- return null;
- };
- /**
- * Finds SEI nal units contained in a Media Data Box.
- * Assumes that `parseSamples` has been called first.
- *
- * @param {Uint8Array} avcStream - The bytes of the mdat
- * @param {Object[]} samples - The samples parsed out by `parseSamples`
- * @param {Number} trackId - The trackId of this video track
- * @return {Object[]} seiNals - the parsed SEI NALUs found.
- * The contents of the seiNal should match what is expected by
- * CaptionStream.push (nalUnitType, size, data, escapedRBSP, pts, dts)
- *
- * @see ISO-BMFF-12/2015, Section 8.1.1
- * @see Rec. ITU-T H.264, 7.3.2.3.1
- **/
- var findSeiNals = function findSeiNals(avcStream, samples, trackId) {
- var avcView = new DataView(avcStream.buffer, avcStream.byteOffset, avcStream.byteLength),
- result = {
- logs: [],
- seiNals: []
- },
- seiNal,
- i,
- length,
- lastMatchedSample;
- for (i = 0; i + 4 < avcStream.length; i += length) {
- length = avcView.getUint32(i);
- i += 4; // Bail if this doesn't appear to be an H264 stream
- if (length <= 0) {
- continue;
- }
- switch (avcStream[i] & 0x1F) {
- case 0x06:
- var data = avcStream.subarray(i + 1, i + 1 + length);
- var matchingSample = mapToSample(i, samples);
- seiNal = {
- nalUnitType: 'sei_rbsp',
- size: length,
- data: data,
- escapedRBSP: discardEmulationPreventionBytes(data),
- trackId: trackId
- };
- if (matchingSample) {
- seiNal.pts = matchingSample.pts;
- seiNal.dts = matchingSample.dts;
- lastMatchedSample = matchingSample;
- } else if (lastMatchedSample) {
- // If a matching sample cannot be found, use the last
- // sample's values as they should be as close as possible
- seiNal.pts = lastMatchedSample.pts;
- seiNal.dts = lastMatchedSample.dts;
- } else {
- result.logs.push({
- level: 'warn',
- message: 'We\'ve encountered a nal unit without data at ' + i + ' for trackId ' + trackId + '. See mux.js#223.'
- });
- break;
- }
- result.seiNals.push(seiNal);
- break;
- default:
- break;
- }
- }
- return result;
- };
- /**
- * Parses sample information out of Track Run Boxes and calculates
- * the absolute presentation and decode timestamps of each sample.
- *
- * @param {Array<Uint8Array>} truns - The Trun Run boxes to be parsed
- * @param {Number|BigInt} baseMediaDecodeTime - base media decode time from tfdt
- @see ISO-BMFF-12/2015, Section 8.8.12
- * @param {Object} tfhd - The parsed Track Fragment Header
- * @see inspect.parseTfhd
- * @return {Object[]} the parsed samples
- *
- * @see ISO-BMFF-12/2015, Section 8.8.8
- **/
- var parseSamples = function parseSamples(truns, baseMediaDecodeTime, tfhd) {
- var currentDts = baseMediaDecodeTime;
- var defaultSampleDuration = tfhd.defaultSampleDuration || 0;
- var defaultSampleSize = tfhd.defaultSampleSize || 0;
- var trackId = tfhd.trackId;
- var allSamples = [];
- truns.forEach(function (trun) {
- // Note: We currently do not parse the sample table as well
- // as the trun. It's possible some sources will require this.
- // moov > trak > mdia > minf > stbl
- var trackRun = parseTrun(trun);
- var samples = trackRun.samples;
- samples.forEach(function (sample) {
- if (sample.duration === undefined) {
- sample.duration = defaultSampleDuration;
- }
- if (sample.size === undefined) {
- sample.size = defaultSampleSize;
- }
- sample.trackId = trackId;
- sample.dts = currentDts;
- if (sample.compositionTimeOffset === undefined) {
- sample.compositionTimeOffset = 0;
- }
- if (typeof currentDts === 'bigint') {
- sample.pts = currentDts + window.BigInt(sample.compositionTimeOffset);
- currentDts += window.BigInt(sample.duration);
- } else {
- sample.pts = currentDts + sample.compositionTimeOffset;
- currentDts += sample.duration;
- }
- });
- allSamples = allSamples.concat(samples);
- });
- return allSamples;
- };
- /**
- * Parses out caption nals from an FMP4 segment's video tracks.
- *
- * @param {Uint8Array} segment - The bytes of a single segment
- * @param {Number} videoTrackId - The trackId of a video track in the segment
- * @return {Object.<Number, Object[]>} A mapping of video trackId to
- * a list of seiNals found in that track
- **/
- var parseCaptionNals = function parseCaptionNals(segment, videoTrackId) {
- // To get the samples
- var trafs = findBox(segment, ['moof', 'traf']); // To get SEI NAL units
- var mdats = findBox(segment, ['mdat']);
- var captionNals = {};
- var mdatTrafPairs = []; // Pair up each traf with a mdat as moofs and mdats are in pairs
- mdats.forEach(function (mdat, index) {
- var matchingTraf = trafs[index];
- mdatTrafPairs.push({
- mdat: mdat,
- traf: matchingTraf
- });
- });
- mdatTrafPairs.forEach(function (pair) {
- var mdat = pair.mdat;
- var traf = pair.traf;
- var tfhd = findBox(traf, ['tfhd']); // Exactly 1 tfhd per traf
- var headerInfo = parseTfhd(tfhd[0]);
- var trackId = headerInfo.trackId;
- var tfdt = findBox(traf, ['tfdt']); // Either 0 or 1 tfdt per traf
- var baseMediaDecodeTime = tfdt.length > 0 ? parseTfdt(tfdt[0]).baseMediaDecodeTime : 0;
- var truns = findBox(traf, ['trun']);
- var samples;
- var result; // Only parse video data for the chosen video track
- if (videoTrackId === trackId && truns.length > 0) {
- samples = parseSamples(truns, baseMediaDecodeTime, headerInfo);
- result = findSeiNals(mdat, samples, trackId);
- if (!captionNals[trackId]) {
- captionNals[trackId] = {
- seiNals: [],
- logs: []
- };
- }
- captionNals[trackId].seiNals = captionNals[trackId].seiNals.concat(result.seiNals);
- captionNals[trackId].logs = captionNals[trackId].logs.concat(result.logs);
- }
- });
- return captionNals;
- };
- /**
- * Parses out inband captions from an MP4 container and returns
- * caption objects that can be used by WebVTT and the TextTrack API.
- * @see https://developer.mozilla.org/en-US/docs/Web/API/VTTCue
- * @see https://developer.mozilla.org/en-US/docs/Web/API/TextTrack
- * Assumes that `probe.getVideoTrackIds` and `probe.timescale` have been called first
- *
- * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
- * @param {Number} trackId - The id of the video track to parse
- * @param {Number} timescale - The timescale for the video track from the init segment
- *
- * @return {?Object[]} parsedCaptions - A list of captions or null if no video tracks
- * @return {Number} parsedCaptions[].startTime - The time to show the caption in seconds
- * @return {Number} parsedCaptions[].endTime - The time to stop showing the caption in seconds
- * @return {String} parsedCaptions[].text - The visible content of the caption
- **/
- var parseEmbeddedCaptions = function parseEmbeddedCaptions(segment, trackId, timescale) {
- var captionNals; // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
- if (trackId === null) {
- return null;
- }
- captionNals = parseCaptionNals(segment, trackId);
- var trackNals = captionNals[trackId] || {};
- return {
- seiNals: trackNals.seiNals,
- logs: trackNals.logs,
- timescale: timescale
- };
- };
- /**
- * Converts SEI NALUs into captions that can be used by video.js
- **/
- var CaptionParser = function CaptionParser() {
- var isInitialized = false;
- var captionStream; // Stores segments seen before trackId and timescale are set
- var segmentCache; // Stores video track ID of the track being parsed
- var trackId; // Stores the timescale of the track being parsed
- var timescale; // Stores captions parsed so far
- var parsedCaptions; // Stores whether we are receiving partial data or not
- var parsingPartial;
- /**
- * A method to indicate whether a CaptionParser has been initalized
- * @returns {Boolean}
- **/
- this.isInitialized = function () {
- return isInitialized;
- };
- /**
- * Initializes the underlying CaptionStream, SEI NAL parsing
- * and management, and caption collection
- **/
- this.init = function (options) {
- captionStream = new CaptionStream();
- isInitialized = true;
- parsingPartial = options ? options.isPartial : false; // Collect dispatched captions
- captionStream.on('data', function (event) {
- // Convert to seconds in the source's timescale
- event.startTime = event.startPts / timescale;
- event.endTime = event.endPts / timescale;
- parsedCaptions.captions.push(event);
- parsedCaptions.captionStreams[event.stream] = true;
- });
- captionStream.on('log', function (log) {
- parsedCaptions.logs.push(log);
- });
- };
- /**
- * Determines if a new video track will be selected
- * or if the timescale changed
- * @return {Boolean}
- **/
- this.isNewInit = function (videoTrackIds, timescales) {
- if (videoTrackIds && videoTrackIds.length === 0 || timescales && typeof timescales === 'object' && Object.keys(timescales).length === 0) {
- return false;
- }
- return trackId !== videoTrackIds[0] || timescale !== timescales[trackId];
- };
- /**
- * Parses out SEI captions and interacts with underlying
- * CaptionStream to return dispatched captions
- *
- * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
- * @param {Number[]} videoTrackIds - A list of video tracks found in the init segment
- * @param {Object.<Number, Number>} timescales - The timescales found in the init segment
- * @see parseEmbeddedCaptions
- * @see m2ts/caption-stream.js
- **/
- this.parse = function (segment, videoTrackIds, timescales) {
- var parsedData;
- if (!this.isInitialized()) {
- return null; // This is not likely to be a video segment
- } else if (!videoTrackIds || !timescales) {
- return null;
- } else if (this.isNewInit(videoTrackIds, timescales)) {
- // Use the first video track only as there is no
- // mechanism to switch to other video tracks
- trackId = videoTrackIds[0];
- timescale = timescales[trackId]; // If an init segment has not been seen yet, hold onto segment
- // data until we have one.
- // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
- } else if (trackId === null || !timescale) {
- segmentCache.push(segment);
- return null;
- } // Now that a timescale and trackId is set, parse cached segments
- while (segmentCache.length > 0) {
- var cachedSegment = segmentCache.shift();
- this.parse(cachedSegment, videoTrackIds, timescales);
- }
- parsedData = parseEmbeddedCaptions(segment, trackId, timescale);
- if (parsedData && parsedData.logs) {
- parsedCaptions.logs = parsedCaptions.logs.concat(parsedData.logs);
- }
- if (parsedData === null || !parsedData.seiNals) {
- if (parsedCaptions.logs.length) {
- return {
- logs: parsedCaptions.logs,
- captions: [],
- captionStreams: []
- };
- }
- return null;
- }
- this.pushNals(parsedData.seiNals); // Force the parsed captions to be dispatched
- this.flushStream();
- return parsedCaptions;
- };
- /**
- * Pushes SEI NALUs onto CaptionStream
- * @param {Object[]} nals - A list of SEI nals parsed using `parseCaptionNals`
- * Assumes that `parseCaptionNals` has been called first
- * @see m2ts/caption-stream.js
- **/
- this.pushNals = function (nals) {
- if (!this.isInitialized() || !nals || nals.length === 0) {
- return null;
- }
- nals.forEach(function (nal) {
- captionStream.push(nal);
- });
- };
- /**
- * Flushes underlying CaptionStream to dispatch processed, displayable captions
- * @see m2ts/caption-stream.js
- **/
- this.flushStream = function () {
- if (!this.isInitialized()) {
- return null;
- }
- if (!parsingPartial) {
- captionStream.flush();
- } else {
- captionStream.partialFlush();
- }
- };
- /**
- * Reset caption buckets for new data
- **/
- this.clearParsedCaptions = function () {
- parsedCaptions.captions = [];
- parsedCaptions.captionStreams = {};
- parsedCaptions.logs = [];
- };
- /**
- * Resets underlying CaptionStream
- * @see m2ts/caption-stream.js
- **/
- this.resetCaptionStream = function () {
- if (!this.isInitialized()) {
- return null;
- }
- captionStream.reset();
- };
- /**
- * Convenience method to clear all captions flushed from the
- * CaptionStream and still being parsed
- * @see m2ts/caption-stream.js
- **/
- this.clearAllCaptions = function () {
- this.clearParsedCaptions();
- this.resetCaptionStream();
- };
- /**
- * Reset caption parser
- **/
- this.reset = function () {
- segmentCache = [];
- trackId = null;
- timescale = null;
- if (!parsedCaptions) {
- parsedCaptions = {
- captions: [],
- // CC1, CC2, CC3, CC4
- captionStreams: {},
- logs: []
- };
- } else {
- this.clearParsedCaptions();
- }
- this.resetCaptionStream();
- };
- this.reset();
- };
- module.exports = CaptionParser;
|