parse-id3.js 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * Tools for parsing ID3 frame data
  8. * @see http://id3.org/id3v2.3.0
  9. */
  10. 'use strict';
  11. var typedArrayIndexOf = require('../utils/typed-array').typedArrayIndexOf,
  12. // Frames that allow different types of text encoding contain a text
  13. // encoding description byte [ID3v2.4.0 section 4.]
  14. textEncodingDescriptionByte = {
  15. Iso88591: 0x00,
  16. // ISO-8859-1, terminated with \0.
  17. Utf16: 0x01,
  18. // UTF-16 encoded Unicode BOM, terminated with \0\0
  19. Utf16be: 0x02,
  20. // UTF-16BE encoded Unicode, without BOM, terminated with \0\0
  21. Utf8: 0x03 // UTF-8 encoded Unicode, terminated with \0
  22. },
  23. // return a percent-encoded representation of the specified byte range
  24. // @see http://en.wikipedia.org/wiki/Percent-encoding
  25. percentEncode = function percentEncode(bytes, start, end) {
  26. var i,
  27. result = '';
  28. for (i = start; i < end; i++) {
  29. result += '%' + ('00' + bytes[i].toString(16)).slice(-2);
  30. }
  31. return result;
  32. },
  33. // return the string representation of the specified byte range,
  34. // interpreted as UTf-8.
  35. parseUtf8 = function parseUtf8(bytes, start, end) {
  36. return decodeURIComponent(percentEncode(bytes, start, end));
  37. },
  38. // return the string representation of the specified byte range,
  39. // interpreted as ISO-8859-1.
  40. parseIso88591 = function parseIso88591(bytes, start, end) {
  41. return unescape(percentEncode(bytes, start, end)); // jshint ignore:line
  42. },
  43. parseSyncSafeInteger = function parseSyncSafeInteger(data) {
  44. return data[0] << 21 | data[1] << 14 | data[2] << 7 | data[3];
  45. },
  46. frameParsers = {
  47. 'APIC': function APIC(frame) {
  48. var i = 1,
  49. mimeTypeEndIndex,
  50. descriptionEndIndex,
  51. LINK_MIME_TYPE = '-->';
  52. if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
  53. // ignore frames with unrecognized character encodings
  54. return;
  55. } // parsing fields [ID3v2.4.0 section 4.14.]
  56. mimeTypeEndIndex = typedArrayIndexOf(frame.data, 0, i);
  57. if (mimeTypeEndIndex < 0) {
  58. // malformed frame
  59. return;
  60. } // parsing Mime type field (terminated with \0)
  61. frame.mimeType = parseIso88591(frame.data, i, mimeTypeEndIndex);
  62. i = mimeTypeEndIndex + 1; // parsing 1-byte Picture Type field
  63. frame.pictureType = frame.data[i];
  64. i++;
  65. descriptionEndIndex = typedArrayIndexOf(frame.data, 0, i);
  66. if (descriptionEndIndex < 0) {
  67. // malformed frame
  68. return;
  69. } // parsing Description field (terminated with \0)
  70. frame.description = parseUtf8(frame.data, i, descriptionEndIndex);
  71. i = descriptionEndIndex + 1;
  72. if (frame.mimeType === LINK_MIME_TYPE) {
  73. // parsing Picture Data field as URL (always represented as ISO-8859-1 [ID3v2.4.0 section 4.])
  74. frame.url = parseIso88591(frame.data, i, frame.data.length);
  75. } else {
  76. // parsing Picture Data field as binary data
  77. frame.pictureData = frame.data.subarray(i, frame.data.length);
  78. }
  79. },
  80. 'T*': function T(frame) {
  81. if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
  82. // ignore frames with unrecognized character encodings
  83. return;
  84. } // parse text field, do not include null terminator in the frame value
  85. // frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
  86. frame.value = parseUtf8(frame.data, 1, frame.data.length).replace(/\0*$/, ''); // text information frames supports multiple strings, stored as a terminator separated list [ID3v2.4.0 section 4.2.]
  87. frame.values = frame.value.split('\0');
  88. },
  89. 'TXXX': function TXXX(frame) {
  90. var descriptionEndIndex;
  91. if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
  92. // ignore frames with unrecognized character encodings
  93. return;
  94. }
  95. descriptionEndIndex = typedArrayIndexOf(frame.data, 0, 1);
  96. if (descriptionEndIndex === -1) {
  97. return;
  98. } // parse the text fields
  99. frame.description = parseUtf8(frame.data, 1, descriptionEndIndex); // do not include the null terminator in the tag value
  100. // frames that allow different types of encoding contain terminated text
  101. // [ID3v2.4.0 section 4.]
  102. frame.value = parseUtf8(frame.data, descriptionEndIndex + 1, frame.data.length).replace(/\0*$/, '');
  103. frame.data = frame.value;
  104. },
  105. 'W*': function W(frame) {
  106. // parse URL field; URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
  107. // if the value is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
  108. frame.url = parseIso88591(frame.data, 0, frame.data.length).replace(/\0.*$/, '');
  109. },
  110. 'WXXX': function WXXX(frame) {
  111. var descriptionEndIndex;
  112. if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
  113. // ignore frames with unrecognized character encodings
  114. return;
  115. }
  116. descriptionEndIndex = typedArrayIndexOf(frame.data, 0, 1);
  117. if (descriptionEndIndex === -1) {
  118. return;
  119. } // parse the description and URL fields
  120. frame.description = parseUtf8(frame.data, 1, descriptionEndIndex); // URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
  121. // if the value is followed by a string termination all the following information
  122. // should be ignored [ID3v2.4.0 section 4.3]
  123. frame.url = parseIso88591(frame.data, descriptionEndIndex + 1, frame.data.length).replace(/\0.*$/, '');
  124. },
  125. 'PRIV': function PRIV(frame) {
  126. var i;
  127. for (i = 0; i < frame.data.length; i++) {
  128. if (frame.data[i] === 0) {
  129. // parse the description and URL fields
  130. frame.owner = parseIso88591(frame.data, 0, i);
  131. break;
  132. }
  133. }
  134. frame.privateData = frame.data.subarray(i + 1);
  135. frame.data = frame.privateData;
  136. }
  137. };
  138. var parseId3Frames = function parseId3Frames(data) {
  139. var frameSize,
  140. frameHeader,
  141. frameStart = 10,
  142. tagSize = 0,
  143. frames = []; // If we don't have enough data for a header, 10 bytes,
  144. // or 'ID3' in the first 3 bytes this is not a valid ID3 tag.
  145. if (data.length < 10 || data[0] !== 'I'.charCodeAt(0) || data[1] !== 'D'.charCodeAt(0) || data[2] !== '3'.charCodeAt(0)) {
  146. return;
  147. } // the frame size is transmitted as a 28-bit integer in the
  148. // last four bytes of the ID3 header.
  149. // The most significant bit of each byte is dropped and the
  150. // results concatenated to recover the actual value.
  151. tagSize = parseSyncSafeInteger(data.subarray(6, 10)); // ID3 reports the tag size excluding the header but it's more
  152. // convenient for our comparisons to include it
  153. tagSize += 10; // check bit 6 of byte 5 for the extended header flag.
  154. var hasExtendedHeader = data[5] & 0x40;
  155. if (hasExtendedHeader) {
  156. // advance the frame start past the extended header
  157. frameStart += 4; // header size field
  158. frameStart += parseSyncSafeInteger(data.subarray(10, 14));
  159. tagSize -= parseSyncSafeInteger(data.subarray(16, 20)); // clip any padding off the end
  160. } // parse one or more ID3 frames
  161. // http://id3.org/id3v2.3.0#ID3v2_frame_overview
  162. do {
  163. // determine the number of bytes in this frame
  164. frameSize = parseSyncSafeInteger(data.subarray(frameStart + 4, frameStart + 8));
  165. if (frameSize < 1) {
  166. break;
  167. }
  168. frameHeader = String.fromCharCode(data[frameStart], data[frameStart + 1], data[frameStart + 2], data[frameStart + 3]);
  169. var frame = {
  170. id: frameHeader,
  171. data: data.subarray(frameStart + 10, frameStart + frameSize + 10)
  172. };
  173. frame.key = frame.id; // parse frame values
  174. if (frameParsers[frame.id]) {
  175. // use frame specific parser
  176. frameParsers[frame.id](frame);
  177. } else if (frame.id[0] === 'T') {
  178. // use text frame generic parser
  179. frameParsers['T*'](frame);
  180. } else if (frame.id[0] === 'W') {
  181. // use URL link frame generic parser
  182. frameParsers['W*'](frame);
  183. }
  184. frames.push(frame);
  185. frameStart += 10; // advance past the frame header
  186. frameStart += frameSize; // advance past the frame body
  187. } while (frameStart < tagSize);
  188. return frames;
  189. };
  190. module.exports = {
  191. parseId3Frames: parseId3Frames,
  192. parseSyncSafeInteger: parseSyncSafeInteger,
  193. frameParsers: frameParsers
  194. };