parse-id3.js 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * Tools for parsing ID3 frame data
  8. * @see http://id3.org/id3v2.3.0
  9. */
  10. 'use strict';
  11. var
  12. typedArrayIndexOf = require('../utils/typed-array').typedArrayIndexOf,
  13. // Frames that allow different types of text encoding contain a text
  14. // encoding description byte [ID3v2.4.0 section 4.]
  15. textEncodingDescriptionByte = {
  16. Iso88591: 0x00, // ISO-8859-1, terminated with \0.
  17. Utf16: 0x01, // UTF-16 encoded Unicode BOM, terminated with \0\0
  18. Utf16be: 0x02, // UTF-16BE encoded Unicode, without BOM, terminated with \0\0
  19. Utf8: 0x03 // UTF-8 encoded Unicode, terminated with \0
  20. },
  21. // return a percent-encoded representation of the specified byte range
  22. // @see http://en.wikipedia.org/wiki/Percent-encoding
  23. percentEncode = function(bytes, start, end) {
  24. var i, result = '';
  25. for (i = start; i < end; i++) {
  26. result += '%' + ('00' + bytes[i].toString(16)).slice(-2);
  27. }
  28. return result;
  29. },
  30. // return the string representation of the specified byte range,
  31. // interpreted as UTf-8.
  32. parseUtf8 = function(bytes, start, end) {
  33. return decodeURIComponent(percentEncode(bytes, start, end));
  34. },
  35. // return the string representation of the specified byte range,
  36. // interpreted as ISO-8859-1.
  37. parseIso88591 = function(bytes, start, end) {
  38. return unescape(percentEncode(bytes, start, end)); // jshint ignore:line
  39. },
  40. parseSyncSafeInteger = function(data) {
  41. return (data[0] << 21) |
  42. (data[1] << 14) |
  43. (data[2] << 7) |
  44. (data[3]);
  45. },
  46. frameParsers = {
  47. 'APIC': function(frame) {
  48. var
  49. i = 1,
  50. mimeTypeEndIndex,
  51. descriptionEndIndex,
  52. LINK_MIME_TYPE = '-->';
  53. if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
  54. // ignore frames with unrecognized character encodings
  55. return;
  56. }
  57. // parsing fields [ID3v2.4.0 section 4.14.]
  58. mimeTypeEndIndex = typedArrayIndexOf(frame.data, 0, i);
  59. if (mimeTypeEndIndex < 0) {
  60. // malformed frame
  61. return;
  62. }
  63. // parsing Mime type field (terminated with \0)
  64. frame.mimeType = parseIso88591(frame.data, i, mimeTypeEndIndex);
  65. i = mimeTypeEndIndex + 1;
  66. // parsing 1-byte Picture Type field
  67. frame.pictureType = frame.data[i];
  68. i++
  69. descriptionEndIndex = typedArrayIndexOf(frame.data, 0, i);
  70. if (descriptionEndIndex < 0) {
  71. // malformed frame
  72. return;
  73. }
  74. // parsing Description field (terminated with \0)
  75. frame.description = parseUtf8(frame.data, i, descriptionEndIndex);
  76. i = descriptionEndIndex + 1;
  77. if (frame.mimeType === LINK_MIME_TYPE) {
  78. // parsing Picture Data field as URL (always represented as ISO-8859-1 [ID3v2.4.0 section 4.])
  79. frame.url = parseIso88591(frame.data, i, frame.data.length)
  80. } else {
  81. // parsing Picture Data field as binary data
  82. frame.pictureData = frame.data.subarray(i, frame.data.length);
  83. }
  84. },
  85. 'T*': function(frame) {
  86. if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
  87. // ignore frames with unrecognized character encodings
  88. return;
  89. }
  90. // parse text field, do not include null terminator in the frame value
  91. // frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
  92. frame.value = parseUtf8(frame.data, 1, frame.data.length).replace(/\0*$/, '');
  93. // text information frames supports multiple strings, stored as a terminator separated list [ID3v2.4.0 section 4.2.]
  94. frame.values = frame.value.split('\0');
  95. },
  96. 'TXXX': function(frame) {
  97. var descriptionEndIndex;
  98. if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
  99. // ignore frames with unrecognized character encodings
  100. return;
  101. }
  102. descriptionEndIndex = typedArrayIndexOf(frame.data, 0, 1);
  103. if (descriptionEndIndex === -1) {
  104. return;
  105. }
  106. // parse the text fields
  107. frame.description = parseUtf8(frame.data, 1, descriptionEndIndex);
  108. // do not include the null terminator in the tag value
  109. // frames that allow different types of encoding contain terminated text
  110. // [ID3v2.4.0 section 4.]
  111. frame.value = parseUtf8(
  112. frame.data,
  113. descriptionEndIndex + 1,
  114. frame.data.length
  115. ).replace(/\0*$/, '');
  116. frame.data = frame.value;
  117. },
  118. 'W*': function(frame) {
  119. // parse URL field; URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
  120. // if the value is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
  121. frame.url = parseIso88591(frame.data, 0, frame.data.length).replace(/\0.*$/, '');
  122. },
  123. 'WXXX': function(frame) {
  124. var descriptionEndIndex;
  125. if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
  126. // ignore frames with unrecognized character encodings
  127. return;
  128. }
  129. descriptionEndIndex = typedArrayIndexOf(frame.data, 0, 1);
  130. if (descriptionEndIndex === -1) {
  131. return;
  132. }
  133. // parse the description and URL fields
  134. frame.description = parseUtf8(frame.data, 1, descriptionEndIndex);
  135. // URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
  136. // if the value is followed by a string termination all the following information
  137. // should be ignored [ID3v2.4.0 section 4.3]
  138. frame.url = parseIso88591(
  139. frame.data,
  140. descriptionEndIndex + 1,
  141. frame.data.length
  142. ).replace(/\0.*$/, '');
  143. },
  144. 'PRIV': function(frame) {
  145. var i;
  146. for (i = 0; i < frame.data.length; i++) {
  147. if (frame.data[i] === 0) {
  148. // parse the description and URL fields
  149. frame.owner = parseIso88591(frame.data, 0, i);
  150. break;
  151. }
  152. }
  153. frame.privateData = frame.data.subarray(i + 1);
  154. frame.data = frame.privateData;
  155. }
  156. };
  157. var parseId3Frames = function(data) {
  158. var frameSize, frameHeader,
  159. frameStart = 10,
  160. tagSize = 0,
  161. frames = [];
  162. // If we don't have enough data for a header, 10 bytes,
  163. // or 'ID3' in the first 3 bytes this is not a valid ID3 tag.
  164. if (data.length < 10 ||
  165. data[0] !== 'I'.charCodeAt(0) ||
  166. data[1] !== 'D'.charCodeAt(0) ||
  167. data[2] !== '3'.charCodeAt(0)) {
  168. return;
  169. }
  170. // the frame size is transmitted as a 28-bit integer in the
  171. // last four bytes of the ID3 header.
  172. // The most significant bit of each byte is dropped and the
  173. // results concatenated to recover the actual value.
  174. tagSize = parseSyncSafeInteger(data.subarray(6, 10));
  175. // ID3 reports the tag size excluding the header but it's more
  176. // convenient for our comparisons to include it
  177. tagSize += 10;
  178. // check bit 6 of byte 5 for the extended header flag.
  179. var hasExtendedHeader = data[5] & 0x40;
  180. if (hasExtendedHeader) {
  181. // advance the frame start past the extended header
  182. frameStart += 4; // header size field
  183. frameStart += parseSyncSafeInteger(data.subarray(10, 14));
  184. tagSize -= parseSyncSafeInteger(data.subarray(16, 20)); // clip any padding off the end
  185. }
  186. // parse one or more ID3 frames
  187. // http://id3.org/id3v2.3.0#ID3v2_frame_overview
  188. do {
  189. // determine the number of bytes in this frame
  190. frameSize = parseSyncSafeInteger(data.subarray(frameStart + 4, frameStart + 8));
  191. if (frameSize < 1) {
  192. break;
  193. }
  194. frameHeader = String.fromCharCode(data[frameStart],
  195. data[frameStart + 1],
  196. data[frameStart + 2],
  197. data[frameStart + 3]);
  198. var frame = {
  199. id: frameHeader,
  200. data: data.subarray(frameStart + 10, frameStart + frameSize + 10)
  201. };
  202. frame.key = frame.id;
  203. // parse frame values
  204. if (frameParsers[frame.id]) {
  205. // use frame specific parser
  206. frameParsers[frame.id](frame);
  207. } else if (frame.id[0] === 'T') {
  208. // use text frame generic parser
  209. frameParsers['T*'](frame);
  210. } else if (frame.id[0] === 'W') {
  211. // use URL link frame generic parser
  212. frameParsers['W*'](frame);
  213. }
  214. frames.push(frame);
  215. frameStart += 10; // advance past the frame header
  216. frameStart += frameSize; // advance past the frame body
  217. } while (frameStart < tagSize);
  218. return frames;
  219. }
  220. module.exports = {
  221. parseId3Frames: parseId3Frames,
  222. parseSyncSafeInteger: parseSyncSafeInteger,
  223. frameParsers: frameParsers,
  224. };