parse-html.d.ts 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. /**
  2. * Parses an HTML string, calling the callbacks to notify of tags and text.
  3. *
  4. * ## History
  5. *
  6. * This file previously used a regular expression to find html tags in the input
  7. * text. Unfortunately, we ran into a bunch of catastrophic backtracking issues
  8. * with certain input text, causing Autolinker to either hang or just take a
  9. * really long time to parse the string.
  10. *
  11. * The current code is intended to be a O(n) algorithm that walks through
  12. * the string in one pass, and tries to be as cheap as possible. We don't need
  13. * to implement the full HTML spec, but rather simply determine where the string
  14. * looks like an HTML tag, and where it looks like text (so that we can autolink
  15. * that).
  16. *
  17. * This state machine parser is intended just to be a simple but performant
  18. * parser of HTML for the subset of requirements we have. We simply need to:
  19. *
  20. * 1. Determine where HTML tags are
  21. * 2. Determine the tag name (Autolinker specifically only cares about <a>,
  22. * <script>, and <style> tags, so as not to link any text within them)
  23. *
  24. * We don't need to:
  25. *
  26. * 1. Create a parse tree
  27. * 2. Auto-close tags with invalid markup
  28. * 3. etc.
  29. *
  30. * The other intention behind this is that we didn't want to add external
  31. * dependencies on the Autolinker utility which would increase its size. For
  32. * instance, adding htmlparser2 adds 125kb to the minified output file,
  33. * increasing its final size from 47kb to 172kb (at the time of writing). It
  34. * also doesn't work exactly correctly, treating the string "<3 blah blah blah"
  35. * as an HTML tag.
  36. *
  37. * Reference for HTML spec:
  38. *
  39. * https://www.w3.org/TR/html51/syntax.html#sec-tokenization
  40. *
  41. * @param {String} html The HTML to parse
  42. * @param {Object} callbacks
  43. * @param {Function} callbacks.onOpenTag Callback function to call when an open
  44. * tag is parsed. Called with the tagName as its argument.
  45. * @param {Function} callbacks.onCloseTag Callback function to call when a close
  46. * tag is parsed. Called with the tagName as its argument. If a self-closing
  47. * tag is found, `onCloseTag` is called immediately after `onOpenTag`.
  48. * @param {Function} callbacks.onText Callback function to call when text (i.e
  49. * not an HTML tag) is parsed. Called with the text (string) as its first
  50. * argument, and offset (number) into the string as its second.
  51. */
  52. export declare function parseHtml(html: string, { onOpenTag, onCloseTag, onText, onComment, onDoctype, }: {
  53. onOpenTag: (tagName: string, offset: number) => void;
  54. onCloseTag: (tagName: string, offset: number) => void;
  55. onText: (text: string, offset: number) => void;
  56. onComment: (offset: number) => void;
  57. onDoctype: (offset: number) => void;
  58. }): void;