url-match.js 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.UrlMatch = void 0;
  4. var tslib_1 = require("tslib");
  5. var abstract_match_1 = require("./abstract-match");
  6. var uri_utils_1 = require("../parser/uri-utils");
  7. /**
  8. * A regular expression used to remove the 'www.' from URLs.
  9. */
  10. var wwwPrefixRegex = /^(https?:\/\/)?(www\.)?/i;
  11. /**
  12. * The regular expression used to remove the protocol-relative '//' from a URL
  13. * string, for purposes of formatting the anchor text. A protocol-relative URL
  14. * is, for example, "//yahoo.com"
  15. */
  16. var protocolRelativeRegex = /^\/\//;
  17. /**
  18. * @class Autolinker.match.Url
  19. * @extends Autolinker.match.AbstractMatch
  20. *
  21. * Represents a Url match found in an input string which should be Autolinked.
  22. *
  23. * See this class's superclass ({@link Autolinker.match.Match}) for more details.
  24. */
  25. var UrlMatch = /** @class */ (function (_super) {
  26. (0, tslib_1.__extends)(UrlMatch, _super);
  27. /**
  28. * @method constructor
  29. * @param {Object} cfg The configuration properties for the Match
  30. * instance, specified in an Object (map).
  31. */
  32. function UrlMatch(cfg) {
  33. var _this = _super.call(this, cfg) || this;
  34. /**
  35. * @public
  36. * @property {'url'} type
  37. *
  38. * A string name for the type of match that this class represents. Can be
  39. * used in a TypeScript discriminating union to type-narrow from the
  40. * `Match` type.
  41. */
  42. _this.type = 'url';
  43. /**
  44. * @cfg {String} url (required)
  45. *
  46. * The url that was matched.
  47. */
  48. _this.url = ''; // default value just to get the above doc comment in the ES5 output and documentation generator
  49. /**
  50. * @cfg {"scheme"/"www"/"tld"} urlMatchType (required)
  51. *
  52. * The type of URL match that this class represents. This helps to determine
  53. * if the match was made in the original text with a prefixed scheme (ex:
  54. * 'http://www.google.com'), a prefixed 'www' (ex: 'www.google.com'), or
  55. * was matched by a known top-level domain (ex: 'google.com').
  56. */
  57. _this.urlMatchType = 'scheme'; // default value just to get the above doc comment in the ES5 output and documentation generator
  58. /**
  59. * @cfg {Boolean} protocolRelativeMatch (required)
  60. *
  61. * `true` if the URL is a protocol-relative match. A protocol-relative match
  62. * is a URL that starts with '//', and will be either http:// or https://
  63. * based on the protocol that the site is loaded under.
  64. */
  65. _this.protocolRelativeMatch = false; // default value just to get the above doc comment in the ES5 output and documentation generator
  66. /**
  67. * @cfg {Object} stripPrefix (required)
  68. *
  69. * The Object form of {@link Autolinker#cfg-stripPrefix}.
  70. */
  71. _this.stripPrefix = {
  72. scheme: true,
  73. www: true,
  74. }; // default value just to get the above doc comment in the ES5 output and documentation generator
  75. /**
  76. * @cfg {Boolean} stripTrailingSlash (required)
  77. * @inheritdoc Autolinker#cfg-stripTrailingSlash
  78. */
  79. _this.stripTrailingSlash = true; // default value just to get the above doc comment in the ES5 output and documentation generator
  80. /**
  81. * @cfg {Boolean} decodePercentEncoding (required)
  82. * @inheritdoc Autolinker#cfg-decodePercentEncoding
  83. */
  84. _this.decodePercentEncoding = true; // default value just to get the above doc comment in the ES5 output and documentation generator
  85. /**
  86. * @private
  87. * @property {Boolean} protocolPrepended
  88. *
  89. * Will be set to `true` if the 'http://' protocol has been prepended to the {@link #url} (because the
  90. * {@link #url} did not have a protocol)
  91. */
  92. _this.protocolPrepended = false;
  93. _this.urlMatchType = cfg.urlMatchType;
  94. _this.url = cfg.url;
  95. _this.protocolRelativeMatch = cfg.protocolRelativeMatch;
  96. _this.stripPrefix = cfg.stripPrefix;
  97. _this.stripTrailingSlash = cfg.stripTrailingSlash;
  98. _this.decodePercentEncoding = cfg.decodePercentEncoding;
  99. return _this;
  100. }
  101. /**
  102. * Returns a string name for the type of match that this class represents.
  103. * For the case of UrlMatch, returns 'url'.
  104. *
  105. * @return {String}
  106. */
  107. UrlMatch.prototype.getType = function () {
  108. return 'url';
  109. };
  110. /**
  111. * Returns a string name for the type of URL match that this class
  112. * represents.
  113. *
  114. * This helps to determine if the match was made in the original text with a
  115. * prefixed scheme (ex: 'http://www.google.com'), a prefixed 'www' (ex:
  116. * 'www.google.com'), or was matched by a known top-level domain (ex:
  117. * 'google.com').
  118. *
  119. * @return {"scheme"/"www"/"tld"}
  120. */
  121. UrlMatch.prototype.getUrlMatchType = function () {
  122. return this.urlMatchType;
  123. };
  124. /**
  125. * Returns the url that was matched, assuming the protocol to be 'http://' if the original
  126. * match was missing a protocol.
  127. *
  128. * @return {String}
  129. */
  130. UrlMatch.prototype.getUrl = function () {
  131. var url = this.url;
  132. // if the url string doesn't begin with a scheme, assume 'http://'
  133. if (!this.protocolRelativeMatch &&
  134. this.urlMatchType !== 'scheme' &&
  135. !this.protocolPrepended) {
  136. url = this.url = 'http://' + url;
  137. this.protocolPrepended = true;
  138. }
  139. return url;
  140. };
  141. /**
  142. * Returns the anchor href that should be generated for the match.
  143. *
  144. * @return {String}
  145. */
  146. UrlMatch.prototype.getAnchorHref = function () {
  147. var url = this.getUrl();
  148. return url.replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html
  149. };
  150. /**
  151. * Returns the anchor text that should be generated for the match.
  152. *
  153. * @return {String}
  154. */
  155. UrlMatch.prototype.getAnchorText = function () {
  156. var anchorText = this.getMatchedText();
  157. if (this.protocolRelativeMatch) {
  158. // Strip off any protocol-relative '//' from the anchor text
  159. anchorText = stripProtocolRelativePrefix(anchorText);
  160. }
  161. if (this.stripPrefix.scheme) {
  162. anchorText = stripSchemePrefix(anchorText);
  163. }
  164. if (this.stripPrefix.www) {
  165. anchorText = stripWwwPrefix(anchorText);
  166. }
  167. if (this.stripTrailingSlash) {
  168. anchorText = removeTrailingSlash(anchorText); // remove trailing slash, if there is one
  169. }
  170. if (this.decodePercentEncoding) {
  171. anchorText = removePercentEncoding(anchorText);
  172. }
  173. return anchorText;
  174. };
  175. return UrlMatch;
  176. }(abstract_match_1.AbstractMatch));
  177. exports.UrlMatch = UrlMatch;
  178. // Utility Functionality
  179. /**
  180. * Strips the scheme prefix (such as "http://" or "https://") from the given
  181. * `url`.
  182. *
  183. * @private
  184. * @param {String} url The text of the anchor that is being generated, for
  185. * which to strip off the url scheme.
  186. * @return {String} The `url`, with the scheme stripped.
  187. */
  188. function stripSchemePrefix(url) {
  189. return url.replace(uri_utils_1.httpSchemePrefixRe, '');
  190. }
  191. /**
  192. * Strips the 'www' prefix from the given `url`.
  193. *
  194. * @private
  195. * @param {String} url The text of the anchor that is being generated, for
  196. * which to strip off the 'www' if it exists.
  197. * @return {String} The `url`, with the 'www' stripped.
  198. */
  199. function stripWwwPrefix(url) {
  200. return url.replace(wwwPrefixRegex, '$1'); // leave any scheme ($1), it one exists
  201. }
  202. /**
  203. * Strips any protocol-relative '//' from the anchor text.
  204. *
  205. * @private
  206. * @param {String} text The text of the anchor that is being generated, for which to strip off the
  207. * protocol-relative prefix (such as stripping off "//")
  208. * @return {String} The `anchorText`, with the protocol-relative prefix stripped.
  209. */
  210. function stripProtocolRelativePrefix(text) {
  211. return text.replace(protocolRelativeRegex, '');
  212. }
  213. /**
  214. * Removes any trailing slash from the given `anchorText`, in preparation for the text to be displayed.
  215. *
  216. * @private
  217. * @param {String} anchorText The text of the anchor that is being generated, for which to remove any trailing
  218. * slash ('/') that may exist.
  219. * @return {String} The `anchorText`, with the trailing slash removed.
  220. */
  221. function removeTrailingSlash(anchorText) {
  222. if (anchorText.charAt(anchorText.length - 1) === '/') {
  223. anchorText = anchorText.slice(0, -1);
  224. }
  225. return anchorText;
  226. }
  227. /**
  228. * Decodes percent-encoded characters from the given `anchorText`, in
  229. * preparation for the text to be displayed.
  230. *
  231. * @private
  232. * @param {String} anchorText The text of the anchor that is being
  233. * generated, for which to decode any percent-encoded characters.
  234. * @return {String} The `anchorText`, with the percent-encoded characters
  235. * decoded.
  236. */
  237. function removePercentEncoding(anchorText) {
  238. // First, convert a few of the known % encodings to the corresponding
  239. // HTML entities that could accidentally be interpretted as special
  240. // HTML characters
  241. var preProcessedEntityAnchorText = anchorText
  242. .replace(/%22/gi, '"') // " char
  243. .replace(/%26/gi, '&') // & char
  244. .replace(/%27/gi, ''') // ' char
  245. .replace(/%3C/gi, '&lt;') // < char
  246. .replace(/%3E/gi, '&gt;'); // > char
  247. try {
  248. // Now attempt to decode the rest of the anchor text
  249. return decodeURIComponent(preProcessedEntityAnchorText);
  250. }
  251. catch (e) {
  252. // Invalid % escape sequence in the anchor text
  253. return preProcessedEntityAnchorText;
  254. }
  255. }
  256. //# sourceMappingURL=url-match.js.map