autolinker.js 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866
  1. import { version } from './version';
  2. import { defaults, isBoolean, removeWithPredicate } from './utils';
  3. import { AnchorTagBuilder } from './anchor-tag-builder';
  4. import { HtmlTag } from './html-tag';
  5. import { parseMatches } from './parser/parse-matches';
  6. import { parseHtml } from './htmlParser/parse-html';
  7. import { mentionServices } from './parser/mention-utils';
  8. import { hashtagServices } from './parser/hashtag-utils';
  9. /**
  10. * @class Autolinker
  11. * @extends Object
  12. *
  13. * Utility class used to process a given string of text, and wrap the matches in
  14. * the appropriate anchor (<a>) tags to turn them into links.
  15. *
  16. * Any of the configuration options may be provided in an Object provided
  17. * to the Autolinker constructor, which will configure how the {@link #link link()}
  18. * method will process the links.
  19. *
  20. * For example:
  21. *
  22. * var autolinker = new Autolinker( {
  23. * newWindow : false,
  24. * truncate : 30
  25. * } );
  26. *
  27. * var html = autolinker.link( "Joe went to www.yahoo.com" );
  28. * // produces: 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>'
  29. *
  30. *
  31. * The {@link #static-link static link()} method may also be used to inline
  32. * options into a single call, which may be more convenient for one-off uses.
  33. * For example:
  34. *
  35. * var html = Autolinker.link( "Joe went to www.yahoo.com", {
  36. * newWindow : false,
  37. * truncate : 30
  38. * } );
  39. * // produces: 'Joe went to <a href="http://www.yahoo.com">yahoo.com</a>'
  40. *
  41. *
  42. * ## Custom Replacements of Links
  43. *
  44. * If the configuration options do not provide enough flexibility, a {@link #replaceFn}
  45. * may be provided to fully customize the output of Autolinker. This function is
  46. * called once for each URL/Email/Phone#/Hashtag/Mention (Twitter, Instagram, Soundcloud)
  47. * match that is encountered.
  48. *
  49. * For example:
  50. *
  51. * var input = "..."; // string with URLs, Email Addresses, Phone #s, Hashtags, and Mentions (Twitter, Instagram, Soundcloud)
  52. *
  53. * var linkedText = Autolinker.link( input, {
  54. * replaceFn : function( match ) {
  55. * console.log( "href = ", match.getAnchorHref() );
  56. * console.log( "text = ", match.getAnchorText() );
  57. *
  58. * switch( match.getType() ) {
  59. * case 'url' :
  60. * console.log( "url: ", match.getUrl() );
  61. *
  62. * if( match.getUrl().indexOf( 'mysite.com' ) === -1 ) {
  63. * var tag = match.buildTag(); // returns an `Autolinker.HtmlTag` instance, which provides mutator methods for easy changes
  64. * tag.setAttr( 'rel', 'nofollow' );
  65. * tag.addClass( 'external-link' );
  66. *
  67. * return tag;
  68. *
  69. * } else {
  70. * return true; // let Autolinker perform its normal anchor tag replacement
  71. * }
  72. *
  73. * case 'email' :
  74. * var email = match.getEmail();
  75. * console.log( "email: ", email );
  76. *
  77. * if( email === "my@own.address" ) {
  78. * return false; // don't auto-link this particular email address; leave as-is
  79. * } else {
  80. * return; // no return value will have Autolinker perform its normal anchor tag replacement (same as returning `true`)
  81. * }
  82. *
  83. * case 'phone' :
  84. * var phoneNumber = match.getPhoneNumber();
  85. * console.log( phoneNumber );
  86. *
  87. * return '<a href="http://newplace.to.link.phone.numbers.to/">' + phoneNumber + '</a>';
  88. *
  89. * case 'hashtag' :
  90. * var hashtag = match.getHashtag();
  91. * console.log( hashtag );
  92. *
  93. * return '<a href="http://newplace.to.link.hashtag.handles.to/">' + hashtag + '</a>';
  94. *
  95. * case 'mention' :
  96. * var mention = match.getMention();
  97. * console.log( mention );
  98. *
  99. * return '<a href="http://newplace.to.link.mention.to/">' + mention + '</a>';
  100. * }
  101. * }
  102. * } );
  103. *
  104. *
  105. * The function may return the following values:
  106. *
  107. * - `true` (Boolean): Allow Autolinker to replace the match as it normally
  108. * would.
  109. * - `false` (Boolean): Do not replace the current match at all - leave as-is.
  110. * - Any String: If a string is returned from the function, the string will be
  111. * used directly as the replacement HTML for the match.
  112. * - An {@link Autolinker.HtmlTag} instance, which can be used to build/modify
  113. * an HTML tag before writing out its HTML text.
  114. */
  115. var Autolinker = /** @class */ (function () {
  116. /**
  117. * @method constructor
  118. * @param {Object} [cfg] The configuration options for the Autolinker instance,
  119. * specified in an Object (map).
  120. */
  121. function Autolinker(cfg) {
  122. if (cfg === void 0) { cfg = {}; }
  123. /**
  124. * The Autolinker version number exposed on the instance itself.
  125. *
  126. * Ex: 0.25.1
  127. *
  128. * @property {String} version
  129. */
  130. this.version = Autolinker.version;
  131. /**
  132. * @cfg {Boolean/Object} [urls]
  133. *
  134. * `true` if URLs should be automatically linked, `false` if they should not
  135. * be. Defaults to `true`.
  136. *
  137. * Examples:
  138. *
  139. * urls: true
  140. *
  141. * // or
  142. *
  143. * urls: {
  144. * schemeMatches : true,
  145. * tldMatches : true,
  146. * ipV4Matches : true
  147. * }
  148. *
  149. * As shown above, this option also accepts an Object form with 3 properties
  150. * to allow for more customization of what exactly gets linked. All default
  151. * to `true`:
  152. *
  153. * @cfg {Boolean} [urls.schemeMatches] `true` to match URLs found prefixed
  154. * with a scheme, i.e. `http://google.com`, or `other+scheme://google.com`,
  155. * `false` to prevent these types of matches.
  156. * @cfg {Boolean} [urls.tldMatches] `true` to match URLs with known top
  157. * level domains (.com, .net, etc.) that are not prefixed with a scheme
  158. * (such as 'http://'). This option attempts to match anything that looks
  159. * like a URL in the given text. Ex: `google.com`, `asdf.org/?page=1`, etc.
  160. * `false` to prevent these types of matches.
  161. * @cfg {Boolean} [urls.ipV4Matches] `true` to match IPv4 addresses in text
  162. * that are not prefixed with a scheme (such as 'http://'). This option
  163. * attempts to match anything that looks like an IPv4 address in text. Ex:
  164. * `192.168.0.1`, `10.0.0.1/?page=1`, etc. `false` to prevent these types
  165. * of matches.
  166. */
  167. this.urls = {}; // default value just to get the above doc comment in the ES5 output and documentation generator
  168. /**
  169. * @cfg {Boolean} [email=true]
  170. *
  171. * `true` if email addresses should be automatically linked, `false` if they
  172. * should not be.
  173. */
  174. this.email = true; // default value just to get the above doc comment in the ES5 output and documentation generator
  175. /**
  176. * @cfg {Boolean} [phone=true]
  177. *
  178. * `true` if Phone numbers ("(555)555-5555") should be automatically linked,
  179. * `false` if they should not be.
  180. */
  181. this.phone = true; // default value just to get the above doc comment in the ES5 output and documentation generator
  182. /**
  183. * @cfg {Boolean/String} [hashtag=false]
  184. *
  185. * A string for the service name to have hashtags (ex: "#myHashtag")
  186. * auto-linked to. The currently-supported values are:
  187. *
  188. * - 'twitter'
  189. * - 'facebook'
  190. * - 'instagram'
  191. *
  192. * Pass `false` to skip auto-linking of hashtags.
  193. */
  194. this.hashtag = false; // default value just to get the above doc comment in the ES5 output and documentation generator
  195. /**
  196. * @cfg {String/Boolean} [mention=false]
  197. *
  198. * A string for the service name to have mentions (ex: "@myuser")
  199. * auto-linked to. The currently supported values are:
  200. *
  201. * - 'twitter'
  202. * - 'instagram'
  203. * - 'soundcloud'
  204. * - 'tiktok'
  205. *
  206. * Defaults to `false` to skip auto-linking of mentions.
  207. */
  208. this.mention = false; // default value just to get the above doc comment in the ES5 output and documentation generator
  209. /**
  210. * @cfg {Boolean} [newWindow=true]
  211. *
  212. * `true` if the links should open in a new window, `false` otherwise.
  213. */
  214. this.newWindow = true; // default value just to get the above doc comment in the ES5 output and documentation generator
  215. /**
  216. * @cfg {Boolean/Object} [stripPrefix=true]
  217. *
  218. * `true` if 'http://' (or 'https://') and/or the 'www.' should be stripped
  219. * from the beginning of URL links' text, `false` otherwise. Defaults to
  220. * `true`.
  221. *
  222. * Examples:
  223. *
  224. * stripPrefix: true
  225. *
  226. * // or
  227. *
  228. * stripPrefix: {
  229. * scheme : true,
  230. * www : true
  231. * }
  232. *
  233. * As shown above, this option also accepts an Object form with 2 properties
  234. * to allow for more customization of what exactly is prevented from being
  235. * displayed. Both default to `true`:
  236. *
  237. * @cfg {Boolean} [stripPrefix.scheme] `true` to prevent the scheme part of
  238. * a URL match from being displayed to the user. Example:
  239. * `'http://google.com'` will be displayed as `'google.com'`. `false` to
  240. * not strip the scheme. NOTE: Only an `'http://'` or `'https://'` scheme
  241. * will be removed, so as not to remove a potentially dangerous scheme
  242. * (such as `'file://'` or `'javascript:'`)
  243. * @cfg {Boolean} [stripPrefix.www] www (Boolean): `true` to prevent the
  244. * `'www.'` part of a URL match from being displayed to the user. Ex:
  245. * `'www.google.com'` will be displayed as `'google.com'`. `false` to not
  246. * strip the `'www'`.
  247. */
  248. this.stripPrefix = {
  249. scheme: true,
  250. www: true,
  251. }; // default value just to get the above doc comment in the ES5 output and documentation generator
  252. /**
  253. * @cfg {Boolean} [stripTrailingSlash=true]
  254. *
  255. * `true` to remove the trailing slash from URL matches, `false` to keep
  256. * the trailing slash.
  257. *
  258. * Example when `true`: `http://google.com/` will be displayed as
  259. * `http://google.com`.
  260. */
  261. this.stripTrailingSlash = true; // default value just to get the above doc comment in the ES5 output and documentation generator
  262. /**
  263. * @cfg {Boolean} [decodePercentEncoding=true]
  264. *
  265. * `true` to decode percent-encoded characters in URL matches, `false` to keep
  266. * the percent-encoded characters.
  267. *
  268. * Example when `true`: `https://en.wikipedia.org/wiki/San_Jos%C3%A9` will
  269. * be displayed as `https://en.wikipedia.org/wiki/San_José`.
  270. */
  271. this.decodePercentEncoding = true; // default value just to get the above doc comment in the ES5 output and documentation generator
  272. /**
  273. * @cfg {Number/Object} [truncate=0]
  274. *
  275. * ## Number Form
  276. *
  277. * A number for how many characters matched text should be truncated to
  278. * inside the text of a link. If the matched text is over this number of
  279. * characters, it will be truncated to this length by adding a two period
  280. * ellipsis ('..') to the end of the string.
  281. *
  282. * For example: A url like 'http://www.yahoo.com/some/long/path/to/a/file'
  283. * truncated to 25 characters might look something like this:
  284. * 'yahoo.com/some/long/pat..'
  285. *
  286. * Example Usage:
  287. *
  288. * truncate: 25
  289. *
  290. *
  291. * Defaults to `0` for "no truncation."
  292. *
  293. *
  294. * ## Object Form
  295. *
  296. * An Object may also be provided with two properties: `length` (Number) and
  297. * `location` (String). `location` may be one of the following: 'end'
  298. * (default), 'middle', or 'smart'.
  299. *
  300. * Example Usage:
  301. *
  302. * truncate: { length: 25, location: 'middle' }
  303. *
  304. * @cfg {Number} [truncate.length=0] How many characters to allow before
  305. * truncation will occur. Defaults to `0` for "no truncation."
  306. * @cfg {"end"/"middle"/"smart"} [truncate.location="end"]
  307. *
  308. * - 'end' (default): will truncate up to the number of characters, and then
  309. * add an ellipsis at the end. Ex: 'yahoo.com/some/long/pat..'
  310. * - 'middle': will truncate and add the ellipsis in the middle. Ex:
  311. * 'yahoo.com/s..th/to/a/file'
  312. * - 'smart': for URLs where the algorithm attempts to strip out unnecessary
  313. * parts first (such as the 'www.', then URL scheme, hash, etc.),
  314. * attempting to make the URL human-readable before looking for a good
  315. * point to insert the ellipsis if it is still too long. Ex:
  316. * 'yahoo.com/some..to/a/file'. For more details, see
  317. * {@link Autolinker.truncate.TruncateSmart}.
  318. */
  319. this.truncate = {
  320. length: 0,
  321. location: 'end',
  322. }; // default value just to get the above doc comment in the ES5 output and documentation generator
  323. /**
  324. * @cfg {String} className
  325. *
  326. * A CSS class name to add to the generated links. This class will be added
  327. * to all links, as well as this class plus match suffixes for styling
  328. * url/email/phone/hashtag/mention links differently.
  329. *
  330. * For example, if this config is provided as "myLink", then:
  331. *
  332. * - URL links will have the CSS classes: "myLink myLink-url"
  333. * - Email links will have the CSS classes: "myLink myLink-email", and
  334. * - Phone links will have the CSS classes: "myLink myLink-phone"
  335. * - Hashtag links will have the CSS classes: "myLink myLink-hashtag"
  336. * - Mention links will have the CSS classes: "myLink myLink-mention myLink-[type]"
  337. * where [type] is either "instagram", "twitter" or "soundcloud"
  338. */
  339. this.className = ''; // default value just to get the above doc comment in the ES5 output and documentation generator
  340. /**
  341. * @cfg {Function} replaceFn
  342. *
  343. * A function to individually process each match found in the input string.
  344. *
  345. * See the class's description for usage.
  346. *
  347. * The `replaceFn` can be called with a different context object (`this`
  348. * reference) using the {@link #context} cfg.
  349. *
  350. * This function is called with the following parameter:
  351. *
  352. * @cfg {Autolinker.match.Match} replaceFn.match The Match instance which
  353. * can be used to retrieve information about the match that the `replaceFn`
  354. * is currently processing. See {@link Autolinker.match.Match} subclasses
  355. * for details.
  356. */
  357. this.replaceFn = null; // default value just to get the above doc comment in the ES5 output and documentation generator
  358. /**
  359. * @cfg {Object} context
  360. *
  361. * The context object (`this` reference) to call the `replaceFn` with.
  362. *
  363. * Defaults to this Autolinker instance.
  364. */
  365. this.context = undefined; // default value just to get the above doc comment in the ES5 output and documentation generator
  366. /**
  367. * @cfg {Boolean} [sanitizeHtml=false]
  368. *
  369. * `true` to HTML-encode the start and end brackets of existing HTML tags found
  370. * in the input string. This will escape `<` and `>` characters to `&lt;` and
  371. * `&gt;`, respectively.
  372. *
  373. * Setting this to `true` will prevent XSS (Cross-site Scripting) attacks,
  374. * but will remove the significance of existing HTML tags in the input string. If
  375. * you would like to maintain the significance of existing HTML tags while also
  376. * making the output HTML string safe, leave this option as `false` and use a
  377. * tool like https://github.com/cure53/DOMPurify (or others) on the input string
  378. * before running Autolinker.
  379. */
  380. this.sanitizeHtml = false; // default value just to get the above doc comment in the ES5 output and documentation generator
  381. /**
  382. * @private
  383. * @property {Autolinker.AnchorTagBuilder} tagBuilder
  384. *
  385. * The AnchorTagBuilder instance used to build match replacement anchor tags.
  386. * Note: this is lazily instantiated in the {@link #getTagBuilder} method.
  387. */
  388. this.tagBuilder = null;
  389. // Note: when `this.something` is used in the rhs of these assignments,
  390. // it refers to the default values set above the constructor
  391. this.urls = normalizeUrlsCfg(cfg.urls);
  392. this.email = isBoolean(cfg.email) ? cfg.email : this.email;
  393. this.phone = isBoolean(cfg.phone) ? cfg.phone : this.phone;
  394. this.hashtag = cfg.hashtag || this.hashtag;
  395. this.mention = cfg.mention || this.mention;
  396. this.newWindow = isBoolean(cfg.newWindow) ? cfg.newWindow : this.newWindow;
  397. this.stripPrefix = normalizeStripPrefixCfg(cfg.stripPrefix);
  398. this.stripTrailingSlash = isBoolean(cfg.stripTrailingSlash)
  399. ? cfg.stripTrailingSlash
  400. : this.stripTrailingSlash;
  401. this.decodePercentEncoding = isBoolean(cfg.decodePercentEncoding)
  402. ? cfg.decodePercentEncoding
  403. : this.decodePercentEncoding;
  404. this.sanitizeHtml = cfg.sanitizeHtml || false;
  405. // Validate the value of the `mention` cfg
  406. var mention = this.mention;
  407. if (mention !== false && mentionServices.indexOf(mention) === -1) {
  408. throw new Error("invalid `mention` cfg '".concat(mention, "' - see docs"));
  409. }
  410. // Validate the value of the `hashtag` cfg
  411. var hashtag = this.hashtag;
  412. if (hashtag !== false && hashtagServices.indexOf(hashtag) === -1) {
  413. throw new Error("invalid `hashtag` cfg '".concat(hashtag, "' - see docs"));
  414. }
  415. this.truncate = normalizeTruncateCfg(cfg.truncate);
  416. this.className = cfg.className || this.className;
  417. this.replaceFn = cfg.replaceFn || this.replaceFn;
  418. this.context = cfg.context || this;
  419. }
  420. /**
  421. * Automatically links URLs, Email addresses, Phone Numbers, Twitter handles,
  422. * Hashtags, and Mentions found in the given chunk of HTML. Does not link URLs
  423. * found within HTML tags.
  424. *
  425. * For instance, if given the text: `You should go to http://www.yahoo.com`,
  426. * then the result will be `You should go to &lt;a href="http://www.yahoo.com"&gt;http://www.yahoo.com&lt;/a&gt;`
  427. *
  428. * Example:
  429. *
  430. * var linkedText = Autolinker.link( "Go to google.com", { newWindow: false } );
  431. * // Produces: "Go to <a href="http://google.com">google.com</a>"
  432. *
  433. * @static
  434. * @param {String} textOrHtml The HTML or text to find matches within (depending
  435. * on if the {@link #urls}, {@link #email}, {@link #phone}, {@link #mention},
  436. * {@link #hashtag}, and {@link #mention} options are enabled).
  437. * @param {Object} [options] Any of the configuration options for the Autolinker
  438. * class, specified in an Object (map). See the class description for an
  439. * example call.
  440. * @return {String} The HTML text, with matches automatically linked.
  441. */
  442. Autolinker.link = function (textOrHtml, options) {
  443. var autolinker = new Autolinker(options);
  444. return autolinker.link(textOrHtml);
  445. };
  446. /**
  447. * Parses the input `textOrHtml` looking for URLs, email addresses, phone
  448. * numbers, username handles, and hashtags (depending on the configuration
  449. * of the Autolinker instance), and returns an array of {@link Autolinker.match.Match}
  450. * objects describing those matches (without making any replacements).
  451. *
  452. * Note that if parsing multiple pieces of text, it is slightly more efficient
  453. * to create an Autolinker instance, and use the instance-level {@link #parse}
  454. * method.
  455. *
  456. * Example:
  457. *
  458. * var matches = Autolinker.parse( "Hello google.com, I am asdf@asdf.com", {
  459. * urls: true,
  460. * email: true
  461. * } );
  462. *
  463. * console.log( matches.length ); // 2
  464. * console.log( matches[ 0 ].getType() ); // 'url'
  465. * console.log( matches[ 0 ].getUrl() ); // 'google.com'
  466. * console.log( matches[ 1 ].getType() ); // 'email'
  467. * console.log( matches[ 1 ].getEmail() ); // 'asdf@asdf.com'
  468. *
  469. * @static
  470. * @param {String} textOrHtml The HTML or text to find matches within
  471. * (depending on if the {@link #urls}, {@link #email}, {@link #phone},
  472. * {@link #hashtag}, and {@link #mention} options are enabled).
  473. * @param {Object} [options] Any of the configuration options for the Autolinker
  474. * class, specified in an Object (map). See the class description for an
  475. * example call.
  476. * @return {Autolinker.match.Match[]} The array of Matches found in the
  477. * given input `textOrHtml`.
  478. */
  479. Autolinker.parse = function (textOrHtml, options) {
  480. var autolinker = new Autolinker(options);
  481. return autolinker.parse(textOrHtml);
  482. };
  483. /**
  484. * Parses the input `textOrHtml` looking for URLs, email addresses, phone
  485. * numbers, username handles, and hashtags (depending on the configuration
  486. * of the Autolinker instance), and returns an array of {@link Autolinker.match.Match}
  487. * objects describing those matches (without making any replacements).
  488. *
  489. * This method is used by the {@link #link} method, but can also be used to
  490. * simply do parsing of the input in order to discover what kinds of links
  491. * there are and how many.
  492. *
  493. * Example usage:
  494. *
  495. * var autolinker = new Autolinker( {
  496. * urls: true,
  497. * email: true
  498. * } );
  499. *
  500. * var matches = autolinker.parse( "Hello google.com, I am asdf@asdf.com" );
  501. *
  502. * console.log( matches.length ); // 2
  503. * console.log( matches[ 0 ].getType() ); // 'url'
  504. * console.log( matches[ 0 ].getUrl() ); // 'google.com'
  505. * console.log( matches[ 1 ].getType() ); // 'email'
  506. * console.log( matches[ 1 ].getEmail() ); // 'asdf@asdf.com'
  507. *
  508. * @param {String} textOrHtml The HTML or text to find matches within
  509. * (depending on if the {@link #urls}, {@link #email}, {@link #phone},
  510. * {@link #hashtag}, and {@link #mention} options are enabled).
  511. * @return {Autolinker.match.Match[]} The array of Matches found in the
  512. * given input `textOrHtml`.
  513. */
  514. Autolinker.prototype.parse = function (textOrHtml) {
  515. var _this = this;
  516. var skipTagNames = ['a', 'style', 'script'], skipTagsStackCount = 0, // used to only Autolink text outside of anchor/script/style tags. We don't want to autolink something that is already linked inside of an <a> tag, for instance
  517. matches = [];
  518. // Find all matches within the `textOrHtml` (but not matches that are
  519. // already nested within <a>, <style> and <script> tags)
  520. parseHtml(textOrHtml, {
  521. onOpenTag: function (tagName) {
  522. if (skipTagNames.indexOf(tagName) >= 0) {
  523. skipTagsStackCount++;
  524. }
  525. },
  526. onText: function (text, offset) {
  527. // Only process text nodes that are not within an <a>, <style> or <script> tag
  528. if (skipTagsStackCount === 0) {
  529. // "Walk around" common HTML entities. An '&nbsp;' (for example)
  530. // could be at the end of a URL, but we don't want to
  531. // include the trailing '&' in the URL. See issue #76
  532. // TODO: Handle HTML entities separately in parseHtml() and
  533. // don't emit them as "text" except for &amp; entities
  534. var htmlCharacterEntitiesRegex = /(&nbsp;|&#160;|&lt;|&#60;|&gt;|&#62;|&quot;|&#34;|&#39;)/gi; // NOTE: capturing group is significant to include the split characters in the .split() call below
  535. var textSplit = text.split(htmlCharacterEntitiesRegex);
  536. var currentOffset_1 = offset;
  537. textSplit.forEach(function (splitText, i) {
  538. // even number matches are text, odd numbers are html entities
  539. if (i % 2 === 0) {
  540. var textNodeMatches = _this.parseText(splitText, currentOffset_1);
  541. matches.push.apply(matches, textNodeMatches);
  542. }
  543. currentOffset_1 += splitText.length;
  544. });
  545. }
  546. },
  547. onCloseTag: function (tagName) {
  548. if (skipTagNames.indexOf(tagName) >= 0) {
  549. skipTagsStackCount = Math.max(skipTagsStackCount - 1, 0); // attempt to handle extraneous </a> tags by making sure the stack count never goes below 0
  550. }
  551. },
  552. onComment: function (_offset) { },
  553. onDoctype: function (_offset) { }, // no need to process doctype nodes
  554. });
  555. // After we have found all matches, remove subsequent matches that
  556. // overlap with a previous match. This can happen for instance with URLs,
  557. // where the url 'google.com/#link' would match '#link' as a hashtag.
  558. matches = this.compactMatches(matches);
  559. // And finally, remove matches for match types that have been turned
  560. // off. We needed to have all match types turned on initially so that
  561. // things like hashtags could be filtered out if they were really just
  562. // part of a URL match (for instance, as a named anchor).
  563. matches = this.removeUnwantedMatches(matches);
  564. return matches;
  565. };
  566. /**
  567. * After we have found all matches, we need to remove matches that overlap
  568. * with a previous match. This can happen for instance with URLs, where the
  569. * url 'google.com/#link' would match '#link' as a hashtag. Because the
  570. * '#link' part is contained in a larger match that comes before the HashTag
  571. * match, we'll remove the HashTag match.
  572. *
  573. * @private
  574. * @param {Autolinker.match.Match[]} matches
  575. * @return {Autolinker.match.Match[]}
  576. */
  577. Autolinker.prototype.compactMatches = function (matches) {
  578. // First, the matches need to be sorted in order of offset
  579. matches.sort(function (a, b) {
  580. return a.getOffset() - b.getOffset();
  581. });
  582. var i = 0;
  583. while (i < matches.length - 1) {
  584. var match = matches[i], offset = match.getOffset(), matchedTextLength = match.getMatchedText().length, endIdx = offset + matchedTextLength;
  585. if (i + 1 < matches.length) {
  586. // Remove subsequent matches that equal offset with current match
  587. if (matches[i + 1].getOffset() === offset) {
  588. var removeIdx = matches[i + 1].getMatchedText().length > matchedTextLength ? i : i + 1;
  589. matches.splice(removeIdx, 1);
  590. continue;
  591. }
  592. // Remove subsequent matches that overlap with the current match
  593. if (matches[i + 1].getOffset() < endIdx) {
  594. matches.splice(i + 1, 1);
  595. continue;
  596. }
  597. }
  598. i++;
  599. }
  600. return matches;
  601. };
  602. /**
  603. * Removes matches for matchers that were turned off in the options. For
  604. * example, if {@link #hashtag hashtags} were not to be matched, we'll
  605. * remove them from the `matches` array here.
  606. *
  607. * Note: we *must* use all Matchers on the input string, and then filter
  608. * them out later. For example, if the options were `{ url: false, hashtag: true }`,
  609. * we wouldn't want to match the text '#link' as a HashTag inside of the text
  610. * 'google.com/#link'. The way the algorithm works is that we match the full
  611. * URL first (which prevents the accidental HashTag match), and then we'll
  612. * simply throw away the URL match.
  613. *
  614. * @private
  615. * @param {Autolinker.match.Match[]} matches The array of matches to remove
  616. * the unwanted matches from. Note: this array is mutated for the
  617. * removals.
  618. * @return {Autolinker.match.Match[]} The mutated input `matches` array.
  619. */
  620. Autolinker.prototype.removeUnwantedMatches = function (matches) {
  621. if (!this.hashtag)
  622. removeWithPredicate(matches, function (match) {
  623. return match.getType() === 'hashtag';
  624. });
  625. if (!this.email)
  626. removeWithPredicate(matches, function (match) {
  627. return match.getType() === 'email';
  628. });
  629. if (!this.phone)
  630. removeWithPredicate(matches, function (match) {
  631. return match.getType() === 'phone';
  632. });
  633. if (!this.mention)
  634. removeWithPredicate(matches, function (match) {
  635. return match.getType() === 'mention';
  636. });
  637. if (!this.urls.schemeMatches) {
  638. removeWithPredicate(matches, function (m) {
  639. return m.getType() === 'url' && m.getUrlMatchType() === 'scheme';
  640. });
  641. }
  642. if (!this.urls.tldMatches) {
  643. removeWithPredicate(matches, function (m) { return m.getType() === 'url' && m.getUrlMatchType() === 'tld'; });
  644. }
  645. if (!this.urls.ipV4Matches) {
  646. removeWithPredicate(matches, function (m) { return m.getType() === 'url' && m.getUrlMatchType() === 'ipV4'; });
  647. }
  648. return matches;
  649. };
  650. /**
  651. * Parses the input `text` looking for URLs, email addresses, phone
  652. * numbers, username handles, and hashtags (depending on the configuration
  653. * of the Autolinker instance), and returns an array of {@link Autolinker.match.Match}
  654. * objects describing those matches.
  655. *
  656. * This method processes a **non-HTML string**, and is used to parse and
  657. * match within the text nodes of an HTML string. This method is used
  658. * internally by {@link #parse}.
  659. *
  660. * @private
  661. * @param {String} text The text to find matches within (depending on if the
  662. * {@link #urls}, {@link #email}, {@link #phone},
  663. * {@link #hashtag}, and {@link #mention} options are enabled). This must be a non-HTML string.
  664. * @param {Number} [offset=0] The offset of the text node within the
  665. * original string. This is used when parsing with the {@link #parse}
  666. * method to generate correct offsets within the {@link Autolinker.match.Match}
  667. * instances, but may be omitted if calling this method publicly.
  668. * @return {Autolinker.match.Match[]} The array of Matches found in the
  669. * given input `text`.
  670. */
  671. Autolinker.prototype.parseText = function (text, offset) {
  672. if (offset === void 0) { offset = 0; }
  673. offset = offset || 0;
  674. var matches = parseMatches(text, {
  675. tagBuilder: this.getTagBuilder(),
  676. stripPrefix: this.stripPrefix,
  677. stripTrailingSlash: this.stripTrailingSlash,
  678. decodePercentEncoding: this.decodePercentEncoding,
  679. hashtagServiceName: this.hashtag,
  680. mentionServiceName: this.mention || 'twitter',
  681. });
  682. // Correct the offset of each of the matches. They are originally
  683. // the offset of the match within the provided text node, but we
  684. // need to correct them to be relative to the original HTML input
  685. // string (i.e. the one provided to #parse).
  686. for (var i = 0, numTextMatches = matches.length; i < numTextMatches; i++) {
  687. matches[i].setOffset(offset + matches[i].getOffset());
  688. }
  689. return matches;
  690. };
  691. /**
  692. * Automatically links URLs, Email addresses, Phone numbers, Hashtags,
  693. * and Mentions (Twitter, Instagram, Soundcloud) found in the given chunk of HTML. Does not link
  694. * URLs found within HTML tags.
  695. *
  696. * For instance, if given the text: `You should go to http://www.yahoo.com`,
  697. * then the result will be `You should go to
  698. * &lt;a href="http://www.yahoo.com"&gt;http://www.yahoo.com&lt;/a&gt;`
  699. *
  700. * This method finds the text around any HTML elements in the input
  701. * `textOrHtml`, which will be the text that is processed. Any original HTML
  702. * elements will be left as-is, as well as the text that is already wrapped
  703. * in anchor (&lt;a&gt;) tags.
  704. *
  705. * @param {String} textOrHtml The HTML or text to autolink matches within
  706. * (depending on if the {@link #urls}, {@link #email}, {@link #phone}, {@link #hashtag}, and {@link #mention} options are enabled).
  707. * @return {String} The HTML, with matches automatically linked.
  708. */
  709. Autolinker.prototype.link = function (textOrHtml) {
  710. if (!textOrHtml) {
  711. return '';
  712. } // handle `null` and `undefined` (for JavaScript users that don't have TypeScript support)
  713. /* We would want to sanitize the start and end characters of a tag
  714. * before processing the string in order to avoid an XSS scenario.
  715. * This behaviour can be changed by toggling the sanitizeHtml option.
  716. */
  717. if (this.sanitizeHtml) {
  718. textOrHtml = textOrHtml.replace(/</g, '&lt;').replace(/>/g, '&gt;');
  719. }
  720. var matches = this.parse(textOrHtml), newHtml = [], lastIndex = 0;
  721. for (var i = 0, len = matches.length; i < len; i++) {
  722. var match = matches[i];
  723. newHtml.push(textOrHtml.substring(lastIndex, match.getOffset()));
  724. newHtml.push(this.createMatchReturnVal(match));
  725. lastIndex = match.getOffset() + match.getMatchedText().length;
  726. }
  727. newHtml.push(textOrHtml.substring(lastIndex)); // handle the text after the last match
  728. return newHtml.join('');
  729. };
  730. /**
  731. * Creates the return string value for a given match in the input string.
  732. *
  733. * This method handles the {@link #replaceFn}, if one was provided.
  734. *
  735. * @private
  736. * @param {Autolinker.match.Match} match The Match object that represents
  737. * the match.
  738. * @return {String} The string that the `match` should be replaced with.
  739. * This is usually the anchor tag string, but may be the `matchStr` itself
  740. * if the match is not to be replaced.
  741. */
  742. Autolinker.prototype.createMatchReturnVal = function (match) {
  743. // Handle a custom `replaceFn` being provided
  744. var replaceFnResult;
  745. if (this.replaceFn) {
  746. replaceFnResult = this.replaceFn.call(this.context, match); // Autolinker instance is the context
  747. }
  748. if (typeof replaceFnResult === 'string') {
  749. return replaceFnResult; // `replaceFn` returned a string, use that
  750. }
  751. else if (replaceFnResult === false) {
  752. return match.getMatchedText(); // no replacement for the match
  753. }
  754. else if (replaceFnResult instanceof HtmlTag) {
  755. return replaceFnResult.toAnchorString();
  756. }
  757. else {
  758. // replaceFnResult === true, or no/unknown return value from function
  759. // Perform Autolinker's default anchor tag generation
  760. var anchorTag = match.buildTag(); // returns an Autolinker.HtmlTag instance
  761. return anchorTag.toAnchorString();
  762. }
  763. };
  764. /**
  765. * Returns the {@link #tagBuilder} instance for this Autolinker instance,
  766. * lazily instantiating it if it does not yet exist.
  767. *
  768. * @private
  769. * @return {Autolinker.AnchorTagBuilder}
  770. */
  771. Autolinker.prototype.getTagBuilder = function () {
  772. var tagBuilder = this.tagBuilder;
  773. if (!tagBuilder) {
  774. tagBuilder = this.tagBuilder = new AnchorTagBuilder({
  775. newWindow: this.newWindow,
  776. truncate: this.truncate,
  777. className: this.className,
  778. });
  779. }
  780. return tagBuilder;
  781. };
  782. // NOTE: must be 'export default' here for UMD module
  783. /**
  784. * @static
  785. * @property {String} version
  786. *
  787. * The Autolinker version number in the form major.minor.patch
  788. *
  789. * Ex: 3.15.0
  790. */
  791. Autolinker.version = version;
  792. return Autolinker;
  793. }());
  794. export default Autolinker;
  795. /**
  796. * Normalizes the {@link #urls} config into an Object with its 2 properties:
  797. * `schemeMatches` and `tldMatches`, both booleans.
  798. *
  799. * See {@link #urls} config for details.
  800. *
  801. * @private
  802. * @param {Boolean/Object} urls
  803. * @return {Object}
  804. */
  805. function normalizeUrlsCfg(urls) {
  806. if (urls == null)
  807. urls = true; // default to `true`
  808. if (isBoolean(urls)) {
  809. return { schemeMatches: urls, tldMatches: urls, ipV4Matches: urls };
  810. }
  811. else {
  812. // object form
  813. return {
  814. schemeMatches: isBoolean(urls.schemeMatches) ? urls.schemeMatches : true,
  815. tldMatches: isBoolean(urls.tldMatches) ? urls.tldMatches : true,
  816. ipV4Matches: isBoolean(urls.ipV4Matches) ? urls.ipV4Matches : true,
  817. };
  818. }
  819. }
  820. /**
  821. * Normalizes the {@link #stripPrefix} config into an Object with 2
  822. * properties: `scheme`, and `www` - both Booleans.
  823. *
  824. * See {@link #stripPrefix} config for details.
  825. *
  826. * @private
  827. * @param {Boolean/Object} stripPrefix
  828. * @return {Object}
  829. */
  830. function normalizeStripPrefixCfg(stripPrefix) {
  831. if (stripPrefix == null)
  832. stripPrefix = true; // default to `true`
  833. if (isBoolean(stripPrefix)) {
  834. return { scheme: stripPrefix, www: stripPrefix };
  835. }
  836. else {
  837. // object form
  838. return {
  839. scheme: isBoolean(stripPrefix.scheme) ? stripPrefix.scheme : true,
  840. www: isBoolean(stripPrefix.www) ? stripPrefix.www : true,
  841. };
  842. }
  843. }
  844. /**
  845. * Normalizes the {@link #truncate} config into an Object with 2 properties:
  846. * `length` (Number), and `location` (String).
  847. *
  848. * See {@link #truncate} config for details.
  849. *
  850. * @private
  851. * @param {Number/Object} truncate
  852. * @return {Object}
  853. */
  854. function normalizeTruncateCfg(truncate) {
  855. if (typeof truncate === 'number') {
  856. return { length: truncate, location: 'end' };
  857. }
  858. else {
  859. // object, or undefined/null
  860. return defaults(truncate || {}, {
  861. length: Number.POSITIVE_INFINITY,
  862. location: 'end',
  863. });
  864. }
  865. }
  866. //# sourceMappingURL=autolinker.js.map