parse-html.js 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. import { __assign } from "tslib";
  2. import { letterRe, digitRe, whitespaceRe, quoteRe, controlCharsRe } from '../regex-lib';
  3. import { assertNever } from '../utils';
  4. // For debugging: search for other "For debugging" lines
  5. // import CliTable from 'cli-table';
  6. /**
  7. * Parses an HTML string, calling the callbacks to notify of tags and text.
  8. *
  9. * ## History
  10. *
  11. * This file previously used a regular expression to find html tags in the input
  12. * text. Unfortunately, we ran into a bunch of catastrophic backtracking issues
  13. * with certain input text, causing Autolinker to either hang or just take a
  14. * really long time to parse the string.
  15. *
  16. * The current code is intended to be a O(n) algorithm that walks through
  17. * the string in one pass, and tries to be as cheap as possible. We don't need
  18. * to implement the full HTML spec, but rather simply determine where the string
  19. * looks like an HTML tag, and where it looks like text (so that we can autolink
  20. * that).
  21. *
  22. * This state machine parser is intended just to be a simple but performant
  23. * parser of HTML for the subset of requirements we have. We simply need to:
  24. *
  25. * 1. Determine where HTML tags are
  26. * 2. Determine the tag name (Autolinker specifically only cares about <a>,
  27. * <script>, and <style> tags, so as not to link any text within them)
  28. *
  29. * We don't need to:
  30. *
  31. * 1. Create a parse tree
  32. * 2. Auto-close tags with invalid markup
  33. * 3. etc.
  34. *
  35. * The other intention behind this is that we didn't want to add external
  36. * dependencies on the Autolinker utility which would increase its size. For
  37. * instance, adding htmlparser2 adds 125kb to the minified output file,
  38. * increasing its final size from 47kb to 172kb (at the time of writing). It
  39. * also doesn't work exactly correctly, treating the string "<3 blah blah blah"
  40. * as an HTML tag.
  41. *
  42. * Reference for HTML spec:
  43. *
  44. * https://www.w3.org/TR/html51/syntax.html#sec-tokenization
  45. *
  46. * @param {String} html The HTML to parse
  47. * @param {Object} callbacks
  48. * @param {Function} callbacks.onOpenTag Callback function to call when an open
  49. * tag is parsed. Called with the tagName as its argument.
  50. * @param {Function} callbacks.onCloseTag Callback function to call when a close
  51. * tag is parsed. Called with the tagName as its argument. If a self-closing
  52. * tag is found, `onCloseTag` is called immediately after `onOpenTag`.
  53. * @param {Function} callbacks.onText Callback function to call when text (i.e
  54. * not an HTML tag) is parsed. Called with the text (string) as its first
  55. * argument, and offset (number) into the string as its second.
  56. */
  57. export function parseHtml(html, _a) {
  58. var onOpenTag = _a.onOpenTag, onCloseTag = _a.onCloseTag, onText = _a.onText, onComment = _a.onComment, onDoctype = _a.onDoctype;
  59. var noCurrentTag = new CurrentTag();
  60. var charIdx = 0, len = html.length, state = 0 /* Data */, currentDataIdx = 0, // where the current data start index is
  61. currentTag = noCurrentTag; // describes the current tag that is being read
  62. // For debugging: search for other "For debugging" lines
  63. // const table = new CliTable( {
  64. // head: [ 'charIdx', 'char', 'state', 'currentDataIdx', 'currentOpenTagIdx', 'tag.type' ]
  65. // } );
  66. while (charIdx < len) {
  67. var char = html.charAt(charIdx);
  68. // For debugging: search for other "For debugging" lines
  69. // ALSO: Temporarily remove the 'const' keyword on the State enum
  70. // table.push(
  71. // [ charIdx, char, State[ state ], currentDataIdx, currentTag.idx, currentTag.idx === -1 ? '' : currentTag.type ]
  72. // );
  73. switch (state) {
  74. case 0 /* Data */:
  75. stateData(char);
  76. break;
  77. case 1 /* TagOpen */:
  78. stateTagOpen(char);
  79. break;
  80. case 2 /* EndTagOpen */:
  81. stateEndTagOpen(char);
  82. break;
  83. case 3 /* TagName */:
  84. stateTagName(char);
  85. break;
  86. case 4 /* BeforeAttributeName */:
  87. stateBeforeAttributeName(char);
  88. break;
  89. case 5 /* AttributeName */:
  90. stateAttributeName(char);
  91. break;
  92. case 6 /* AfterAttributeName */:
  93. stateAfterAttributeName(char);
  94. break;
  95. case 7 /* BeforeAttributeValue */:
  96. stateBeforeAttributeValue(char);
  97. break;
  98. case 8 /* AttributeValueDoubleQuoted */:
  99. stateAttributeValueDoubleQuoted(char);
  100. break;
  101. case 9 /* AttributeValueSingleQuoted */:
  102. stateAttributeValueSingleQuoted(char);
  103. break;
  104. case 10 /* AttributeValueUnquoted */:
  105. stateAttributeValueUnquoted(char);
  106. break;
  107. case 11 /* AfterAttributeValueQuoted */:
  108. stateAfterAttributeValueQuoted(char);
  109. break;
  110. case 12 /* SelfClosingStartTag */:
  111. stateSelfClosingStartTag(char);
  112. break;
  113. case 13 /* MarkupDeclarationOpenState */:
  114. stateMarkupDeclarationOpen(char);
  115. break;
  116. case 14 /* CommentStart */:
  117. stateCommentStart(char);
  118. break;
  119. case 15 /* CommentStartDash */:
  120. stateCommentStartDash(char);
  121. break;
  122. case 16 /* Comment */:
  123. stateComment(char);
  124. break;
  125. case 17 /* CommentEndDash */:
  126. stateCommentEndDash(char);
  127. break;
  128. case 18 /* CommentEnd */:
  129. stateCommentEnd(char);
  130. break;
  131. case 19 /* CommentEndBang */:
  132. stateCommentEndBang(char);
  133. break;
  134. case 20 /* Doctype */:
  135. stateDoctype(char);
  136. break;
  137. default:
  138. assertNever(state);
  139. }
  140. // For debugging: search for other "For debugging" lines
  141. // ALSO: Temporarily remove the 'const' keyword on the State enum
  142. // table.push(
  143. // [ charIdx, char, State[ state ], currentDataIdx, currentTag.idx, currentTag.idx === -1 ? '' : currentTag.type ]
  144. // );
  145. charIdx++;
  146. }
  147. if (currentDataIdx < charIdx) {
  148. emitText();
  149. }
  150. // For debugging: search for other "For debugging" lines
  151. // console.log( '\n' + table.toString() );
  152. // Called when non-tags are being read (i.e. the text around HTML †ags)
  153. // https://www.w3.org/TR/html51/syntax.html#data-state
  154. function stateData(char) {
  155. if (char === '<') {
  156. startNewTag();
  157. }
  158. }
  159. // Called after a '<' is read from the Data state
  160. // https://www.w3.org/TR/html51/syntax.html#tag-open-state
  161. function stateTagOpen(char) {
  162. if (char === '!') {
  163. state = 13 /* MarkupDeclarationOpenState */;
  164. }
  165. else if (char === '/') {
  166. state = 2 /* EndTagOpen */;
  167. currentTag = new CurrentTag(__assign(__assign({}, currentTag), { isClosing: true }));
  168. }
  169. else if (char === '<') {
  170. // start of another tag (ignore the previous, incomplete one)
  171. startNewTag();
  172. }
  173. else if (letterRe.test(char)) {
  174. // tag name start (and no '/' read)
  175. state = 3 /* TagName */;
  176. currentTag = new CurrentTag(__assign(__assign({}, currentTag), { isOpening: true }));
  177. }
  178. else {
  179. // Any other
  180. state = 0 /* Data */;
  181. currentTag = noCurrentTag;
  182. }
  183. }
  184. // After a '<x', '</x' sequence is read (where 'x' is a letter character),
  185. // this is to continue reading the tag name
  186. // https://www.w3.org/TR/html51/syntax.html#tag-name-state
  187. function stateTagName(char) {
  188. if (whitespaceRe.test(char)) {
  189. currentTag = new CurrentTag(__assign(__assign({}, currentTag), { name: captureTagName() }));
  190. state = 4 /* BeforeAttributeName */;
  191. }
  192. else if (char === '<') {
  193. // start of another tag (ignore the previous, incomplete one)
  194. startNewTag();
  195. }
  196. else if (char === '/') {
  197. currentTag = new CurrentTag(__assign(__assign({}, currentTag), { name: captureTagName() }));
  198. state = 12 /* SelfClosingStartTag */;
  199. }
  200. else if (char === '>') {
  201. currentTag = new CurrentTag(__assign(__assign({}, currentTag), { name: captureTagName() }));
  202. emitTagAndPreviousTextNode(); // resets to Data state as well
  203. }
  204. else if (!letterRe.test(char) && !digitRe.test(char) && char !== ':') {
  205. // Anything else that does not form an html tag. Note: the colon
  206. // character is accepted for XML namespaced tags
  207. resetToDataState();
  208. }
  209. else {
  210. // continue reading tag name
  211. }
  212. }
  213. // Called after the '/' is read from a '</' sequence
  214. // https://www.w3.org/TR/html51/syntax.html#end-tag-open-state
  215. function stateEndTagOpen(char) {
  216. if (char === '>') {
  217. // parse error. Encountered "</>". Skip it without treating as a tag
  218. resetToDataState();
  219. }
  220. else if (letterRe.test(char)) {
  221. state = 3 /* TagName */;
  222. }
  223. else {
  224. // some other non-tag-like character, don't treat this as a tag
  225. resetToDataState();
  226. }
  227. }
  228. // https://www.w3.org/TR/html51/syntax.html#before-attribute-name-state
  229. function stateBeforeAttributeName(char) {
  230. if (whitespaceRe.test(char)) {
  231. // stay in BeforeAttributeName state - continue reading chars
  232. }
  233. else if (char === '/') {
  234. state = 12 /* SelfClosingStartTag */;
  235. }
  236. else if (char === '>') {
  237. emitTagAndPreviousTextNode(); // resets to Data state as well
  238. }
  239. else if (char === '<') {
  240. // start of another tag (ignore the previous, incomplete one)
  241. startNewTag();
  242. }
  243. else if (char === "=" || quoteRe.test(char) || controlCharsRe.test(char)) {
  244. // "Parse error" characters that, according to the spec, should be
  245. // appended to the attribute name, but we'll treat these characters
  246. // as not forming a real HTML tag
  247. resetToDataState();
  248. }
  249. else {
  250. // Any other char, start of a new attribute name
  251. state = 5 /* AttributeName */;
  252. }
  253. }
  254. // https://www.w3.org/TR/html51/syntax.html#attribute-name-state
  255. function stateAttributeName(char) {
  256. if (whitespaceRe.test(char)) {
  257. state = 6 /* AfterAttributeName */;
  258. }
  259. else if (char === '/') {
  260. state = 12 /* SelfClosingStartTag */;
  261. }
  262. else if (char === '=') {
  263. state = 7 /* BeforeAttributeValue */;
  264. }
  265. else if (char === '>') {
  266. emitTagAndPreviousTextNode(); // resets to Data state as well
  267. }
  268. else if (char === '<') {
  269. // start of another tag (ignore the previous, incomplete one)
  270. startNewTag();
  271. }
  272. else if (quoteRe.test(char)) {
  273. // "Parse error" characters that, according to the spec, should be
  274. // appended to the attribute name, but we'll treat these characters
  275. // as not forming a real HTML tag
  276. resetToDataState();
  277. }
  278. else {
  279. // anything else: continue reading attribute name
  280. }
  281. }
  282. // https://www.w3.org/TR/html51/syntax.html#after-attribute-name-state
  283. function stateAfterAttributeName(char) {
  284. if (whitespaceRe.test(char)) {
  285. // ignore the character - continue reading
  286. }
  287. else if (char === '/') {
  288. state = 12 /* SelfClosingStartTag */;
  289. }
  290. else if (char === '=') {
  291. state = 7 /* BeforeAttributeValue */;
  292. }
  293. else if (char === '>') {
  294. emitTagAndPreviousTextNode();
  295. }
  296. else if (char === '<') {
  297. // start of another tag (ignore the previous, incomplete one)
  298. startNewTag();
  299. }
  300. else if (quoteRe.test(char)) {
  301. // "Parse error" characters that, according to the spec, should be
  302. // appended to the attribute name, but we'll treat these characters
  303. // as not forming a real HTML tag
  304. resetToDataState();
  305. }
  306. else {
  307. // Any other character, start a new attribute in the current tag
  308. state = 5 /* AttributeName */;
  309. }
  310. }
  311. // https://www.w3.org/TR/html51/syntax.html#before-attribute-value-state
  312. function stateBeforeAttributeValue(char) {
  313. if (whitespaceRe.test(char)) {
  314. // ignore the character - continue reading
  315. }
  316. else if (char === "\"") {
  317. state = 8 /* AttributeValueDoubleQuoted */;
  318. }
  319. else if (char === "'") {
  320. state = 9 /* AttributeValueSingleQuoted */;
  321. }
  322. else if (/[>=`]/.test(char)) {
  323. // Invalid chars after an '=' for an attribute value, don't count
  324. // the current tag as an HTML tag
  325. resetToDataState();
  326. }
  327. else if (char === '<') {
  328. // start of another tag (ignore the previous, incomplete one)
  329. startNewTag();
  330. }
  331. else {
  332. // Any other character, consider it an unquoted attribute value
  333. state = 10 /* AttributeValueUnquoted */;
  334. }
  335. }
  336. // https://www.w3.org/TR/html51/syntax.html#attribute-value-double-quoted-state
  337. function stateAttributeValueDoubleQuoted(char) {
  338. if (char === "\"") {
  339. // end the current double-quoted attribute
  340. state = 11 /* AfterAttributeValueQuoted */;
  341. }
  342. else {
  343. // consume the character as part of the double-quoted attribute value
  344. }
  345. }
  346. // https://www.w3.org/TR/html51/syntax.html#attribute-value-single-quoted-state
  347. function stateAttributeValueSingleQuoted(char) {
  348. if (char === "'") {
  349. // end the current single-quoted attribute
  350. state = 11 /* AfterAttributeValueQuoted */;
  351. }
  352. else {
  353. // consume the character as part of the double-quoted attribute value
  354. }
  355. }
  356. // https://www.w3.org/TR/html51/syntax.html#attribute-value-unquoted-state
  357. function stateAttributeValueUnquoted(char) {
  358. if (whitespaceRe.test(char)) {
  359. state = 4 /* BeforeAttributeName */;
  360. }
  361. else if (char === '>') {
  362. emitTagAndPreviousTextNode();
  363. }
  364. else if (char === '<') {
  365. // start of another tag (ignore the previous, incomplete one)
  366. startNewTag();
  367. }
  368. else {
  369. // Any other character, treat it as part of the attribute value
  370. }
  371. }
  372. // https://www.w3.org/TR/html51/syntax.html#after-attribute-value-quoted-state
  373. function stateAfterAttributeValueQuoted(char) {
  374. if (whitespaceRe.test(char)) {
  375. state = 4 /* BeforeAttributeName */;
  376. }
  377. else if (char === '/') {
  378. state = 12 /* SelfClosingStartTag */;
  379. }
  380. else if (char === '>') {
  381. emitTagAndPreviousTextNode();
  382. }
  383. else if (char === '<') {
  384. // start of another tag (ignore the previous, incomplete one)
  385. startNewTag();
  386. }
  387. else {
  388. // Any other character, "parse error". Spec says to switch to the
  389. // BeforeAttributeState and re-consume the character, as it may be
  390. // the start of a new attribute name
  391. state = 4 /* BeforeAttributeName */;
  392. reconsumeCurrentCharacter();
  393. }
  394. }
  395. // A '/' has just been read in the current tag (presumably for '/>'), and
  396. // this handles the next character
  397. // https://www.w3.org/TR/html51/syntax.html#self-closing-start-tag-state
  398. function stateSelfClosingStartTag(char) {
  399. if (char === '>') {
  400. currentTag = new CurrentTag(__assign(__assign({}, currentTag), { isClosing: true }));
  401. emitTagAndPreviousTextNode(); // resets to Data state as well
  402. }
  403. else {
  404. state = 4 /* BeforeAttributeName */;
  405. }
  406. }
  407. // https://www.w3.org/TR/html51/syntax.html#markup-declaration-open-state
  408. // (HTML Comments or !DOCTYPE)
  409. function stateMarkupDeclarationOpen(char) {
  410. if (html.substr(charIdx, 2) === '--') {
  411. // html comment
  412. charIdx += 2; // "consume" characters
  413. currentTag = new CurrentTag(__assign(__assign({}, currentTag), { type: 'comment' }));
  414. state = 14 /* CommentStart */;
  415. }
  416. else if (html.substr(charIdx, 7).toUpperCase() === 'DOCTYPE') {
  417. charIdx += 7; // "consume" characters
  418. currentTag = new CurrentTag(__assign(__assign({}, currentTag), { type: 'doctype' }));
  419. state = 20 /* Doctype */;
  420. }
  421. else {
  422. // At this point, the spec specifies that the state machine should
  423. // enter the "bogus comment" state, in which case any character(s)
  424. // after the '<!' that were read should become an HTML comment up
  425. // until the first '>' that is read (or EOF). Instead, we'll assume
  426. // that a user just typed '<!' as part of text data
  427. resetToDataState();
  428. }
  429. }
  430. // Handles after the sequence '<!--' has been read
  431. // https://www.w3.org/TR/html51/syntax.html#comment-start-state
  432. function stateCommentStart(char) {
  433. if (char === '-') {
  434. // We've read the sequence '<!---' at this point (3 dashes)
  435. state = 15 /* CommentStartDash */;
  436. }
  437. else if (char === '>') {
  438. // At this point, we'll assume the comment wasn't a real comment
  439. // so we'll just emit it as data. We basically read the sequence
  440. // '<!-->'
  441. resetToDataState();
  442. }
  443. else {
  444. // Any other char, take it as part of the comment
  445. state = 16 /* Comment */;
  446. }
  447. }
  448. // We've read the sequence '<!---' at this point (3 dashes)
  449. // https://www.w3.org/TR/html51/syntax.html#comment-start-dash-state
  450. function stateCommentStartDash(char) {
  451. if (char === '-') {
  452. // We've read '<!----' (4 dashes) at this point
  453. state = 18 /* CommentEnd */;
  454. }
  455. else if (char === '>') {
  456. // At this point, we'll assume the comment wasn't a real comment
  457. // so we'll just emit it as data. We basically read the sequence
  458. // '<!--->'
  459. resetToDataState();
  460. }
  461. else {
  462. // Anything else, take it as a valid comment
  463. state = 16 /* Comment */;
  464. }
  465. }
  466. // Currently reading the comment's text (data)
  467. // https://www.w3.org/TR/html51/syntax.html#comment-state
  468. function stateComment(char) {
  469. if (char === '-') {
  470. state = 17 /* CommentEndDash */;
  471. }
  472. else {
  473. // Any other character, stay in the Comment state
  474. }
  475. }
  476. // When we we've read the first dash inside a comment, it may signal the
  477. // end of the comment if we read another dash
  478. // https://www.w3.org/TR/html51/syntax.html#comment-end-dash-state
  479. function stateCommentEndDash(char) {
  480. if (char === '-') {
  481. state = 18 /* CommentEnd */;
  482. }
  483. else {
  484. // Wasn't a dash, must still be part of the comment
  485. state = 16 /* Comment */;
  486. }
  487. }
  488. // After we've read two dashes inside a comment, it may signal the end of
  489. // the comment if we then read a '>' char
  490. // https://www.w3.org/TR/html51/syntax.html#comment-end-state
  491. function stateCommentEnd(char) {
  492. if (char === '>') {
  493. emitTagAndPreviousTextNode();
  494. }
  495. else if (char === '!') {
  496. state = 19 /* CommentEndBang */;
  497. }
  498. else if (char === '-') {
  499. // A 3rd '-' has been read: stay in the CommentEnd state
  500. }
  501. else {
  502. // Anything else, switch back to the comment state since we didn't
  503. // read the full "end comment" sequence (i.e. '-->')
  504. state = 16 /* Comment */;
  505. }
  506. }
  507. // We've read the sequence '--!' inside of a comment
  508. // https://www.w3.org/TR/html51/syntax.html#comment-end-bang-state
  509. function stateCommentEndBang(char) {
  510. if (char === '-') {
  511. // We read the sequence '--!-' inside of a comment. The last dash
  512. // could signify that the comment is going to close
  513. state = 17 /* CommentEndDash */;
  514. }
  515. else if (char === '>') {
  516. // End of comment with the sequence '--!>'
  517. emitTagAndPreviousTextNode();
  518. }
  519. else {
  520. // The '--!' was not followed by a '>', continue reading the
  521. // comment's text
  522. state = 16 /* Comment */;
  523. }
  524. }
  525. /**
  526. * For DOCTYPES in particular, we don't care about the attributes. Just
  527. * advance to the '>' character and emit the tag, unless we find a '<'
  528. * character in which case we'll start a new tag.
  529. *
  530. * Example doctype tag:
  531. * <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  532. *
  533. * Actual spec: https://www.w3.org/TR/html51/syntax.html#doctype-state
  534. */
  535. function stateDoctype(char) {
  536. if (char === '>') {
  537. emitTagAndPreviousTextNode();
  538. }
  539. else if (char === '<') {
  540. startNewTag();
  541. }
  542. else {
  543. // stay in the Doctype state
  544. }
  545. }
  546. /**
  547. * Resets the state back to the Data state, and removes the current tag.
  548. *
  549. * We'll generally run this function whenever a "parse error" is
  550. * encountered, where the current tag that is being read no longer looks
  551. * like a real HTML tag.
  552. */
  553. function resetToDataState() {
  554. state = 0 /* Data */;
  555. currentTag = noCurrentTag;
  556. }
  557. /**
  558. * Starts a new HTML tag at the current index, ignoring any previous HTML
  559. * tag that was being read.
  560. *
  561. * We'll generally run this function whenever we read a new '<' character,
  562. * including when we read a '<' character inside of an HTML tag that we were
  563. * previously reading.
  564. */
  565. function startNewTag() {
  566. state = 1 /* TagOpen */;
  567. currentTag = new CurrentTag({ idx: charIdx });
  568. }
  569. /**
  570. * Once we've decided to emit an open tag, that means we can also emit the
  571. * text node before it.
  572. */
  573. function emitTagAndPreviousTextNode() {
  574. var textBeforeTag = html.slice(currentDataIdx, currentTag.idx);
  575. if (textBeforeTag) {
  576. // the html tag was the first element in the html string, or two
  577. // tags next to each other, in which case we should not emit a text
  578. // node
  579. onText(textBeforeTag, currentDataIdx);
  580. }
  581. if (currentTag.type === 'comment') {
  582. onComment(currentTag.idx);
  583. }
  584. else if (currentTag.type === 'doctype') {
  585. onDoctype(currentTag.idx);
  586. }
  587. else {
  588. if (currentTag.isOpening) {
  589. onOpenTag(currentTag.name, currentTag.idx);
  590. }
  591. if (currentTag.isClosing) {
  592. // note: self-closing tags will emit both opening and closing
  593. onCloseTag(currentTag.name, currentTag.idx);
  594. }
  595. }
  596. // Since we just emitted a tag, reset to the data state for the next char
  597. resetToDataState();
  598. currentDataIdx = charIdx + 1;
  599. }
  600. function emitText() {
  601. var text = html.slice(currentDataIdx, charIdx);
  602. onText(text, currentDataIdx);
  603. currentDataIdx = charIdx + 1;
  604. }
  605. /**
  606. * Captures the tag name from the start of the tag to the current character
  607. * index, and converts it to lower case
  608. */
  609. function captureTagName() {
  610. var startIdx = currentTag.idx + (currentTag.isClosing ? 2 : 1);
  611. return html.slice(startIdx, charIdx).toLowerCase();
  612. }
  613. /**
  614. * Causes the main loop to re-consume the current character, such as after
  615. * encountering a "parse error" that changed state and needs to reconsume
  616. * the same character in that new state.
  617. */
  618. function reconsumeCurrentCharacter() {
  619. charIdx--;
  620. }
  621. }
  622. var CurrentTag = /** @class */ (function () {
  623. function CurrentTag(cfg) {
  624. if (cfg === void 0) { cfg = {}; }
  625. this.idx = cfg.idx !== undefined ? cfg.idx : -1;
  626. this.type = cfg.type || 'tag';
  627. this.name = cfg.name || '';
  628. this.isOpening = !!cfg.isOpening;
  629. this.isClosing = !!cfg.isClosing;
  630. }
  631. return CurrentTag;
  632. }());
  633. //# sourceMappingURL=parse-html.js.map