123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393 |
- /**
- * mux.js
- *
- * Copyright (c) Brightcove
- * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
- *
- * Utilities to detect basic properties and metadata about MP4s.
- */
- 'use strict';
- var toUnsigned = require('../utils/bin').toUnsigned;
- var toHexString = require('../utils/bin').toHexString;
- var findBox = require('../mp4/find-box.js');
- var parseType = require('../mp4/parse-type.js');
- var emsg = require('../mp4/emsg.js');
- var parseTfhd = require('../tools/parse-tfhd.js');
- var parseTrun = require('../tools/parse-trun.js');
- var parseTfdt = require('../tools/parse-tfdt.js');
- var getUint64 = require('../utils/numbers.js').getUint64;
- var timescale, startTime, compositionStartTime, getVideoTrackIds, getTracks, getTimescaleFromMediaHeader, getEmsgID3;
- var window = require('global/window');
- var parseId3Frames = require('../tools/parse-id3.js').parseId3Frames;
- /**
- * Parses an MP4 initialization segment and extracts the timescale
- * values for any declared tracks. Timescale values indicate the
- * number of clock ticks per second to assume for time-based values
- * elsewhere in the MP4.
- *
- * To determine the start time of an MP4, you need two pieces of
- * information: the timescale unit and the earliest base media decode
- * time. Multiple timescales can be specified within an MP4 but the
- * base media decode time is always expressed in the timescale from
- * the media header box for the track:
- * ```
- * moov > trak > mdia > mdhd.timescale
- * ```
- * @param init {Uint8Array} the bytes of the init segment
- * @return {object} a hash of track ids to timescale values or null if
- * the init segment is malformed.
- */
- timescale = function timescale(init) {
- var result = {},
- traks = findBox(init, ['moov', 'trak']); // mdhd timescale
- return traks.reduce(function (result, trak) {
- var tkhd, version, index, id, mdhd;
- tkhd = findBox(trak, ['tkhd'])[0];
- if (!tkhd) {
- return null;
- }
- version = tkhd[0];
- index = version === 0 ? 12 : 20;
- id = toUnsigned(tkhd[index] << 24 | tkhd[index + 1] << 16 | tkhd[index + 2] << 8 | tkhd[index + 3]);
- mdhd = findBox(trak, ['mdia', 'mdhd'])[0];
- if (!mdhd) {
- return null;
- }
- version = mdhd[0];
- index = version === 0 ? 12 : 20;
- result[id] = toUnsigned(mdhd[index] << 24 | mdhd[index + 1] << 16 | mdhd[index + 2] << 8 | mdhd[index + 3]);
- return result;
- }, result);
- };
- /**
- * Determine the base media decode start time, in seconds, for an MP4
- * fragment. If multiple fragments are specified, the earliest time is
- * returned.
- *
- * The base media decode time can be parsed from track fragment
- * metadata:
- * ```
- * moof > traf > tfdt.baseMediaDecodeTime
- * ```
- * It requires the timescale value from the mdhd to interpret.
- *
- * @param timescale {object} a hash of track ids to timescale values.
- * @return {number} the earliest base media decode start time for the
- * fragment, in seconds
- */
- startTime = function startTime(timescale, fragment) {
- var trafs, result; // we need info from two childrend of each track fragment box
- trafs = findBox(fragment, ['moof', 'traf']); // determine the start times for each track
- var lowestTime = trafs.reduce(function (acc, traf) {
- var tfhd = findBox(traf, ['tfhd'])[0]; // get the track id from the tfhd
- var id = toUnsigned(tfhd[4] << 24 | tfhd[5] << 16 | tfhd[6] << 8 | tfhd[7]); // assume a 90kHz clock if no timescale was specified
- var scale = timescale[id] || 90e3; // get the base media decode time from the tfdt
- var tfdt = findBox(traf, ['tfdt'])[0];
- var dv = new DataView(tfdt.buffer, tfdt.byteOffset, tfdt.byteLength);
- var baseTime; // version 1 is 64 bit
- if (tfdt[0] === 1) {
- baseTime = getUint64(tfdt.subarray(4, 12));
- } else {
- baseTime = dv.getUint32(4);
- } // convert base time to seconds if it is a valid number.
- var seconds;
- if (typeof baseTime === 'bigint') {
- seconds = baseTime / window.BigInt(scale);
- } else if (typeof baseTime === 'number' && !isNaN(baseTime)) {
- seconds = baseTime / scale;
- }
- if (seconds < Number.MAX_SAFE_INTEGER) {
- seconds = Number(seconds);
- }
- if (seconds < acc) {
- acc = seconds;
- }
- return acc;
- }, Infinity);
- return typeof lowestTime === 'bigint' || isFinite(lowestTime) ? lowestTime : 0;
- };
- /**
- * Determine the composition start, in seconds, for an MP4
- * fragment.
- *
- * The composition start time of a fragment can be calculated using the base
- * media decode time, composition time offset, and timescale, as follows:
- *
- * compositionStartTime = (baseMediaDecodeTime + compositionTimeOffset) / timescale
- *
- * All of the aforementioned information is contained within a media fragment's
- * `traf` box, except for timescale info, which comes from the initialization
- * segment, so a track id (also contained within a `traf`) is also necessary to
- * associate it with a timescale
- *
- *
- * @param timescales {object} - a hash of track ids to timescale values.
- * @param fragment {Unit8Array} - the bytes of a media segment
- * @return {number} the composition start time for the fragment, in seconds
- **/
- compositionStartTime = function compositionStartTime(timescales, fragment) {
- var trafBoxes = findBox(fragment, ['moof', 'traf']);
- var baseMediaDecodeTime = 0;
- var compositionTimeOffset = 0;
- var trackId;
- if (trafBoxes && trafBoxes.length) {
- // The spec states that track run samples contained within a `traf` box are contiguous, but
- // it does not explicitly state whether the `traf` boxes themselves are contiguous.
- // We will assume that they are, so we only need the first to calculate start time.
- var tfhd = findBox(trafBoxes[0], ['tfhd'])[0];
- var trun = findBox(trafBoxes[0], ['trun'])[0];
- var tfdt = findBox(trafBoxes[0], ['tfdt'])[0];
- if (tfhd) {
- var parsedTfhd = parseTfhd(tfhd);
- trackId = parsedTfhd.trackId;
- }
- if (tfdt) {
- var parsedTfdt = parseTfdt(tfdt);
- baseMediaDecodeTime = parsedTfdt.baseMediaDecodeTime;
- }
- if (trun) {
- var parsedTrun = parseTrun(trun);
- if (parsedTrun.samples && parsedTrun.samples.length) {
- compositionTimeOffset = parsedTrun.samples[0].compositionTimeOffset || 0;
- }
- }
- } // Get timescale for this specific track. Assume a 90kHz clock if no timescale was
- // specified.
- var timescale = timescales[trackId] || 90e3; // return the composition start time, in seconds
- if (typeof baseMediaDecodeTime === 'bigint') {
- compositionTimeOffset = window.BigInt(compositionTimeOffset);
- timescale = window.BigInt(timescale);
- }
- var result = (baseMediaDecodeTime + compositionTimeOffset) / timescale;
- if (typeof result === 'bigint' && result < Number.MAX_SAFE_INTEGER) {
- result = Number(result);
- }
- return result;
- };
- /**
- * Find the trackIds of the video tracks in this source.
- * Found by parsing the Handler Reference and Track Header Boxes:
- * moov > trak > mdia > hdlr
- * moov > trak > tkhd
- *
- * @param {Uint8Array} init - The bytes of the init segment for this source
- * @return {Number[]} A list of trackIds
- *
- * @see ISO-BMFF-12/2015, Section 8.4.3
- **/
- getVideoTrackIds = function getVideoTrackIds(init) {
- var traks = findBox(init, ['moov', 'trak']);
- var videoTrackIds = [];
- traks.forEach(function (trak) {
- var hdlrs = findBox(trak, ['mdia', 'hdlr']);
- var tkhds = findBox(trak, ['tkhd']);
- hdlrs.forEach(function (hdlr, index) {
- var handlerType = parseType(hdlr.subarray(8, 12));
- var tkhd = tkhds[index];
- var view;
- var version;
- var trackId;
- if (handlerType === 'vide') {
- view = new DataView(tkhd.buffer, tkhd.byteOffset, tkhd.byteLength);
- version = view.getUint8(0);
- trackId = version === 0 ? view.getUint32(12) : view.getUint32(20);
- videoTrackIds.push(trackId);
- }
- });
- });
- return videoTrackIds;
- };
- getTimescaleFromMediaHeader = function getTimescaleFromMediaHeader(mdhd) {
- // mdhd is a FullBox, meaning it will have its own version as the first byte
- var version = mdhd[0];
- var index = version === 0 ? 12 : 20;
- return toUnsigned(mdhd[index] << 24 | mdhd[index + 1] << 16 | mdhd[index + 2] << 8 | mdhd[index + 3]);
- };
- /**
- * Get all the video, audio, and hint tracks from a non fragmented
- * mp4 segment
- */
- getTracks = function getTracks(init) {
- var traks = findBox(init, ['moov', 'trak']);
- var tracks = [];
- traks.forEach(function (trak) {
- var track = {};
- var tkhd = findBox(trak, ['tkhd'])[0];
- var view, tkhdVersion; // id
- if (tkhd) {
- view = new DataView(tkhd.buffer, tkhd.byteOffset, tkhd.byteLength);
- tkhdVersion = view.getUint8(0);
- track.id = tkhdVersion === 0 ? view.getUint32(12) : view.getUint32(20);
- }
- var hdlr = findBox(trak, ['mdia', 'hdlr'])[0]; // type
- if (hdlr) {
- var type = parseType(hdlr.subarray(8, 12));
- if (type === 'vide') {
- track.type = 'video';
- } else if (type === 'soun') {
- track.type = 'audio';
- } else {
- track.type = type;
- }
- } // codec
- var stsd = findBox(trak, ['mdia', 'minf', 'stbl', 'stsd'])[0];
- if (stsd) {
- var sampleDescriptions = stsd.subarray(8); // gives the codec type string
- track.codec = parseType(sampleDescriptions.subarray(4, 8));
- var codecBox = findBox(sampleDescriptions, [track.codec])[0];
- var codecConfig, codecConfigType;
- if (codecBox) {
- // https://tools.ietf.org/html/rfc6381#section-3.3
- if (/^[asm]vc[1-9]$/i.test(track.codec)) {
- // we don't need anything but the "config" parameter of the
- // avc1 codecBox
- codecConfig = codecBox.subarray(78);
- codecConfigType = parseType(codecConfig.subarray(4, 8));
- if (codecConfigType === 'avcC' && codecConfig.length > 11) {
- track.codec += '.'; // left padded with zeroes for single digit hex
- // profile idc
- track.codec += toHexString(codecConfig[9]); // the byte containing the constraint_set flags
- track.codec += toHexString(codecConfig[10]); // level idc
- track.codec += toHexString(codecConfig[11]);
- } else {
- // TODO: show a warning that we couldn't parse the codec
- // and are using the default
- track.codec = 'avc1.4d400d';
- }
- } else if (/^mp4[a,v]$/i.test(track.codec)) {
- // we do not need anything but the streamDescriptor of the mp4a codecBox
- codecConfig = codecBox.subarray(28);
- codecConfigType = parseType(codecConfig.subarray(4, 8));
- if (codecConfigType === 'esds' && codecConfig.length > 20 && codecConfig[19] !== 0) {
- track.codec += '.' + toHexString(codecConfig[19]); // this value is only a single digit
- track.codec += '.' + toHexString(codecConfig[20] >>> 2 & 0x3f).replace(/^0/, '');
- } else {
- // TODO: show a warning that we couldn't parse the codec
- // and are using the default
- track.codec = 'mp4a.40.2';
- }
- } else {
- // flac, opus, etc
- track.codec = track.codec.toLowerCase();
- }
- }
- }
- var mdhd = findBox(trak, ['mdia', 'mdhd'])[0];
- if (mdhd) {
- track.timescale = getTimescaleFromMediaHeader(mdhd);
- }
- tracks.push(track);
- });
- return tracks;
- };
- /**
- * Returns an array of emsg ID3 data from the provided segmentData.
- * An offset can also be provided as the Latest Arrival Time to calculate
- * the Event Start Time of v0 EMSG boxes.
- * See: https://dashif-documents.azurewebsites.net/Events/master/event.html#Inband-event-timing
- *
- * @param {Uint8Array} segmentData the segment byte array.
- * @param {number} offset the segment start time or Latest Arrival Time,
- * @return {Object[]} an array of ID3 parsed from EMSG boxes
- */
- getEmsgID3 = function getEmsgID3(segmentData, offset) {
- if (offset === void 0) {
- offset = 0;
- }
- var emsgBoxes = findBox(segmentData, ['emsg']);
- return emsgBoxes.map(function (data) {
- var parsedBox = emsg.parseEmsgBox(new Uint8Array(data));
- var parsedId3Frames = parseId3Frames(parsedBox.message_data);
- return {
- cueTime: emsg.scaleTime(parsedBox.presentation_time, parsedBox.timescale, parsedBox.presentation_time_delta, offset),
- duration: emsg.scaleTime(parsedBox.event_duration, parsedBox.timescale),
- frames: parsedId3Frames
- };
- });
- };
- module.exports = {
- // export mp4 inspector's findBox and parseType for backwards compatibility
- findBox: findBox,
- parseType: parseType,
- timescale: timescale,
- startTime: startTime,
- compositionStartTime: compositionStartTime,
- videoTrackIds: getVideoTrackIds,
- tracks: getTracks,
- getTimescaleFromMediaHeader: getTimescaleFromMediaHeader,
- getEmsgID3: getEmsgID3
- };
|