tokenize.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. "use strict";
  2. module.exports = tokenize;
  3. var delimRe = /[\s{}=;:[\],'"()<>]/g,
  4. stringDoubleRe = /(?:"([^"\\]*(?:\\.[^"\\]*)*)")/g,
  5. stringSingleRe = /(?:'([^'\\]*(?:\\.[^'\\]*)*)')/g;
  6. var setCommentRe = /^ *[*/]+ */,
  7. setCommentAltRe = /^\s*\*?\/*/,
  8. setCommentSplitRe = /\n/g,
  9. whitespaceRe = /\s/,
  10. unescapeRe = /\\(.?)/g;
  11. var unescapeMap = {
  12. "0": "\0",
  13. "r": "\r",
  14. "n": "\n",
  15. "t": "\t"
  16. };
  17. /**
  18. * Unescapes a string.
  19. * @param {string} str String to unescape
  20. * @returns {string} Unescaped string
  21. * @property {Object.<string,string>} map Special characters map
  22. * @memberof tokenize
  23. */
  24. function unescape(str) {
  25. return str.replace(unescapeRe, function($0, $1) {
  26. switch ($1) {
  27. case "\\":
  28. case "":
  29. return $1;
  30. default:
  31. return unescapeMap[$1] || "";
  32. }
  33. });
  34. }
  35. tokenize.unescape = unescape;
  36. /**
  37. * Gets the next token and advances.
  38. * @typedef TokenizerHandleNext
  39. * @type {function}
  40. * @returns {string|null} Next token or `null` on eof
  41. */
  42. /**
  43. * Peeks for the next token.
  44. * @typedef TokenizerHandlePeek
  45. * @type {function}
  46. * @returns {string|null} Next token or `null` on eof
  47. */
  48. /**
  49. * Pushes a token back to the stack.
  50. * @typedef TokenizerHandlePush
  51. * @type {function}
  52. * @param {string} token Token
  53. * @returns {undefined}
  54. */
  55. /**
  56. * Skips the next token.
  57. * @typedef TokenizerHandleSkip
  58. * @type {function}
  59. * @param {string} expected Expected token
  60. * @param {boolean} [optional=false] If optional
  61. * @returns {boolean} Whether the token matched
  62. * @throws {Error} If the token didn't match and is not optional
  63. */
  64. /**
  65. * Gets the comment on the previous line or, alternatively, the line comment on the specified line.
  66. * @typedef TokenizerHandleCmnt
  67. * @type {function}
  68. * @param {number} [line] Line number
  69. * @returns {string|null} Comment text or `null` if none
  70. */
  71. /**
  72. * Handle object returned from {@link tokenize}.
  73. * @interface ITokenizerHandle
  74. * @property {TokenizerHandleNext} next Gets the next token and advances (`null` on eof)
  75. * @property {TokenizerHandlePeek} peek Peeks for the next token (`null` on eof)
  76. * @property {TokenizerHandlePush} push Pushes a token back to the stack
  77. * @property {TokenizerHandleSkip} skip Skips a token, returns its presence and advances or, if non-optional and not present, throws
  78. * @property {TokenizerHandleCmnt} cmnt Gets the comment on the previous line or the line comment on the specified line, if any
  79. * @property {number} line Current line number
  80. */
  81. /**
  82. * Tokenizes the given .proto source and returns an object with useful utility functions.
  83. * @param {string} source Source contents
  84. * @param {boolean} alternateCommentMode Whether we should activate alternate comment parsing mode.
  85. * @returns {ITokenizerHandle} Tokenizer handle
  86. */
  87. function tokenize(source, alternateCommentMode) {
  88. /* eslint-disable callback-return */
  89. source = source.toString();
  90. var offset = 0,
  91. length = source.length,
  92. line = 1,
  93. lastCommentLine = 0,
  94. comments = {};
  95. var stack = [];
  96. var stringDelim = null;
  97. /* istanbul ignore next */
  98. /**
  99. * Creates an error for illegal syntax.
  100. * @param {string} subject Subject
  101. * @returns {Error} Error created
  102. * @inner
  103. */
  104. function illegal(subject) {
  105. return Error("illegal " + subject + " (line " + line + ")");
  106. }
  107. /**
  108. * Reads a string till its end.
  109. * @returns {string} String read
  110. * @inner
  111. */
  112. function readString() {
  113. var re = stringDelim === "'" ? stringSingleRe : stringDoubleRe;
  114. re.lastIndex = offset - 1;
  115. var match = re.exec(source);
  116. if (!match)
  117. throw illegal("string");
  118. offset = re.lastIndex;
  119. push(stringDelim);
  120. stringDelim = null;
  121. return unescape(match[1]);
  122. }
  123. /**
  124. * Gets the character at `pos` within the source.
  125. * @param {number} pos Position
  126. * @returns {string} Character
  127. * @inner
  128. */
  129. function charAt(pos) {
  130. return source.charAt(pos);
  131. }
  132. /**
  133. * Sets the current comment text.
  134. * @param {number} start Start offset
  135. * @param {number} end End offset
  136. * @param {boolean} isLeading set if a leading comment
  137. * @returns {undefined}
  138. * @inner
  139. */
  140. function setComment(start, end, isLeading) {
  141. var comment = {
  142. type: source.charAt(start++),
  143. lineEmpty: false,
  144. leading: isLeading,
  145. };
  146. var lookback;
  147. if (alternateCommentMode) {
  148. lookback = 2; // alternate comment parsing: "//" or "/*"
  149. } else {
  150. lookback = 3; // "///" or "/**"
  151. }
  152. var commentOffset = start - lookback,
  153. c;
  154. do {
  155. if (--commentOffset < 0 ||
  156. (c = source.charAt(commentOffset)) === "\n") {
  157. comment.lineEmpty = true;
  158. break;
  159. }
  160. } while (c === " " || c === "\t");
  161. var lines = source
  162. .substring(start, end)
  163. .split(setCommentSplitRe);
  164. for (var i = 0; i < lines.length; ++i)
  165. lines[i] = lines[i]
  166. .replace(alternateCommentMode ? setCommentAltRe : setCommentRe, "")
  167. .trim();
  168. comment.text = lines
  169. .join("\n")
  170. .trim();
  171. comments[line] = comment;
  172. lastCommentLine = line;
  173. }
  174. function isDoubleSlashCommentLine(startOffset) {
  175. var endOffset = findEndOfLine(startOffset);
  176. // see if remaining line matches comment pattern
  177. var lineText = source.substring(startOffset, endOffset);
  178. // look for 1 or 2 slashes since startOffset would already point past
  179. // the first slash that started the comment.
  180. var isComment = /^\s*\/{1,2}/.test(lineText);
  181. return isComment;
  182. }
  183. function findEndOfLine(cursor) {
  184. // find end of cursor's line
  185. var endOffset = cursor;
  186. while (endOffset < length && charAt(endOffset) !== "\n") {
  187. endOffset++;
  188. }
  189. return endOffset;
  190. }
  191. /**
  192. * Obtains the next token.
  193. * @returns {string|null} Next token or `null` on eof
  194. * @inner
  195. */
  196. function next() {
  197. if (stack.length > 0)
  198. return stack.shift();
  199. if (stringDelim)
  200. return readString();
  201. var repeat,
  202. prev,
  203. curr,
  204. start,
  205. isDoc,
  206. isLeadingComment = offset === 0;
  207. do {
  208. if (offset === length)
  209. return null;
  210. repeat = false;
  211. while (whitespaceRe.test(curr = charAt(offset))) {
  212. if (curr === "\n") {
  213. isLeadingComment = true;
  214. ++line;
  215. }
  216. if (++offset === length)
  217. return null;
  218. }
  219. if (charAt(offset) === "/") {
  220. if (++offset === length) {
  221. throw illegal("comment");
  222. }
  223. if (charAt(offset) === "/") { // Line
  224. if (!alternateCommentMode) {
  225. // check for triple-slash comment
  226. isDoc = charAt(start = offset + 1) === "/";
  227. while (charAt(++offset) !== "\n") {
  228. if (offset === length) {
  229. return null;
  230. }
  231. }
  232. ++offset;
  233. if (isDoc) {
  234. setComment(start, offset - 1, isLeadingComment);
  235. // Trailing comment cannot not be multi-line,
  236. // so leading comment state should be reset to handle potential next comments
  237. isLeadingComment = true;
  238. }
  239. ++line;
  240. repeat = true;
  241. } else {
  242. // check for double-slash comments, consolidating consecutive lines
  243. start = offset;
  244. isDoc = false;
  245. if (isDoubleSlashCommentLine(offset)) {
  246. isDoc = true;
  247. do {
  248. offset = findEndOfLine(offset);
  249. if (offset === length) {
  250. break;
  251. }
  252. offset++;
  253. if (!isLeadingComment) {
  254. // Trailing comment cannot not be multi-line
  255. break;
  256. }
  257. } while (isDoubleSlashCommentLine(offset));
  258. } else {
  259. offset = Math.min(length, findEndOfLine(offset) + 1);
  260. }
  261. if (isDoc) {
  262. setComment(start, offset, isLeadingComment);
  263. isLeadingComment = true;
  264. }
  265. line++;
  266. repeat = true;
  267. }
  268. } else if ((curr = charAt(offset)) === "*") { /* Block */
  269. // check for /** (regular comment mode) or /* (alternate comment mode)
  270. start = offset + 1;
  271. isDoc = alternateCommentMode || charAt(start) === "*";
  272. do {
  273. if (curr === "\n") {
  274. ++line;
  275. }
  276. if (++offset === length) {
  277. throw illegal("comment");
  278. }
  279. prev = curr;
  280. curr = charAt(offset);
  281. } while (prev !== "*" || curr !== "/");
  282. ++offset;
  283. if (isDoc) {
  284. setComment(start, offset - 2, isLeadingComment);
  285. isLeadingComment = true;
  286. }
  287. repeat = true;
  288. } else {
  289. return "/";
  290. }
  291. }
  292. } while (repeat);
  293. // offset !== length if we got here
  294. var end = offset;
  295. delimRe.lastIndex = 0;
  296. var delim = delimRe.test(charAt(end++));
  297. if (!delim)
  298. while (end < length && !delimRe.test(charAt(end)))
  299. ++end;
  300. var token = source.substring(offset, offset = end);
  301. if (token === "\"" || token === "'")
  302. stringDelim = token;
  303. return token;
  304. }
  305. /**
  306. * Pushes a token back to the stack.
  307. * @param {string} token Token
  308. * @returns {undefined}
  309. * @inner
  310. */
  311. function push(token) {
  312. stack.push(token);
  313. }
  314. /**
  315. * Peeks for the next token.
  316. * @returns {string|null} Token or `null` on eof
  317. * @inner
  318. */
  319. function peek() {
  320. if (!stack.length) {
  321. var token = next();
  322. if (token === null)
  323. return null;
  324. push(token);
  325. }
  326. return stack[0];
  327. }
  328. /**
  329. * Skips a token.
  330. * @param {string} expected Expected token
  331. * @param {boolean} [optional=false] Whether the token is optional
  332. * @returns {boolean} `true` when skipped, `false` if not
  333. * @throws {Error} When a required token is not present
  334. * @inner
  335. */
  336. function skip(expected, optional) {
  337. var actual = peek(),
  338. equals = actual === expected;
  339. if (equals) {
  340. next();
  341. return true;
  342. }
  343. if (!optional)
  344. throw illegal("token '" + actual + "', '" + expected + "' expected");
  345. return false;
  346. }
  347. /**
  348. * Gets a comment.
  349. * @param {number} [trailingLine] Line number if looking for a trailing comment
  350. * @returns {string|null} Comment text
  351. * @inner
  352. */
  353. function cmnt(trailingLine) {
  354. var ret = null;
  355. var comment;
  356. if (trailingLine === undefined) {
  357. comment = comments[line - 1];
  358. delete comments[line - 1];
  359. if (comment && (alternateCommentMode || comment.type === "*" || comment.lineEmpty)) {
  360. ret = comment.leading ? comment.text : null;
  361. }
  362. } else {
  363. /* istanbul ignore else */
  364. if (lastCommentLine < trailingLine) {
  365. peek();
  366. }
  367. comment = comments[trailingLine];
  368. delete comments[trailingLine];
  369. if (comment && !comment.lineEmpty && (alternateCommentMode || comment.type === "/")) {
  370. ret = comment.leading ? null : comment.text;
  371. }
  372. }
  373. return ret;
  374. }
  375. return Object.defineProperty({
  376. next: next,
  377. peek: peek,
  378. push: push,
  379. skip: skip,
  380. cmnt: cmnt
  381. }, "line", {
  382. get: function() { return line; }
  383. });
  384. /* eslint-enable callback-return */
  385. }