url-match.js 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import { __extends } from "tslib";
  2. import { AbstractMatch } from './abstract-match';
  3. import { httpSchemePrefixRe } from '../parser/uri-utils';
  4. /**
  5. * A regular expression used to remove the 'www.' from URLs.
  6. */
  7. var wwwPrefixRegex = /^(https?:\/\/)?(www\.)?/i;
  8. /**
  9. * The regular expression used to remove the protocol-relative '//' from a URL
  10. * string, for purposes of formatting the anchor text. A protocol-relative URL
  11. * is, for example, "//yahoo.com"
  12. */
  13. var protocolRelativeRegex = /^\/\//;
  14. /**
  15. * @class Autolinker.match.Url
  16. * @extends Autolinker.match.AbstractMatch
  17. *
  18. * Represents a Url match found in an input string which should be Autolinked.
  19. *
  20. * See this class's superclass ({@link Autolinker.match.Match}) for more details.
  21. */
  22. var UrlMatch = /** @class */ (function (_super) {
  23. __extends(UrlMatch, _super);
  24. /**
  25. * @method constructor
  26. * @param {Object} cfg The configuration properties for the Match
  27. * instance, specified in an Object (map).
  28. */
  29. function UrlMatch(cfg) {
  30. var _this = _super.call(this, cfg) || this;
  31. /**
  32. * @public
  33. * @property {'url'} type
  34. *
  35. * A string name for the type of match that this class represents. Can be
  36. * used in a TypeScript discriminating union to type-narrow from the
  37. * `Match` type.
  38. */
  39. _this.type = 'url';
  40. /**
  41. * @cfg {String} url (required)
  42. *
  43. * The url that was matched.
  44. */
  45. _this.url = ''; // default value just to get the above doc comment in the ES5 output and documentation generator
  46. /**
  47. * @cfg {"scheme"/"www"/"tld"} urlMatchType (required)
  48. *
  49. * The type of URL match that this class represents. This helps to determine
  50. * if the match was made in the original text with a prefixed scheme (ex:
  51. * 'http://www.google.com'), a prefixed 'www' (ex: 'www.google.com'), or
  52. * was matched by a known top-level domain (ex: 'google.com').
  53. */
  54. _this.urlMatchType = 'scheme'; // default value just to get the above doc comment in the ES5 output and documentation generator
  55. /**
  56. * @cfg {Boolean} protocolRelativeMatch (required)
  57. *
  58. * `true` if the URL is a protocol-relative match. A protocol-relative match
  59. * is a URL that starts with '//', and will be either http:// or https://
  60. * based on the protocol that the site is loaded under.
  61. */
  62. _this.protocolRelativeMatch = false; // default value just to get the above doc comment in the ES5 output and documentation generator
  63. /**
  64. * @cfg {Object} stripPrefix (required)
  65. *
  66. * The Object form of {@link Autolinker#cfg-stripPrefix}.
  67. */
  68. _this.stripPrefix = {
  69. scheme: true,
  70. www: true,
  71. }; // default value just to get the above doc comment in the ES5 output and documentation generator
  72. /**
  73. * @cfg {Boolean} stripTrailingSlash (required)
  74. * @inheritdoc Autolinker#cfg-stripTrailingSlash
  75. */
  76. _this.stripTrailingSlash = true; // default value just to get the above doc comment in the ES5 output and documentation generator
  77. /**
  78. * @cfg {Boolean} decodePercentEncoding (required)
  79. * @inheritdoc Autolinker#cfg-decodePercentEncoding
  80. */
  81. _this.decodePercentEncoding = true; // default value just to get the above doc comment in the ES5 output and documentation generator
  82. /**
  83. * @private
  84. * @property {Boolean} protocolPrepended
  85. *
  86. * Will be set to `true` if the 'http://' protocol has been prepended to the {@link #url} (because the
  87. * {@link #url} did not have a protocol)
  88. */
  89. _this.protocolPrepended = false;
  90. _this.urlMatchType = cfg.urlMatchType;
  91. _this.url = cfg.url;
  92. _this.protocolRelativeMatch = cfg.protocolRelativeMatch;
  93. _this.stripPrefix = cfg.stripPrefix;
  94. _this.stripTrailingSlash = cfg.stripTrailingSlash;
  95. _this.decodePercentEncoding = cfg.decodePercentEncoding;
  96. return _this;
  97. }
  98. /**
  99. * Returns a string name for the type of match that this class represents.
  100. * For the case of UrlMatch, returns 'url'.
  101. *
  102. * @return {String}
  103. */
  104. UrlMatch.prototype.getType = function () {
  105. return 'url';
  106. };
  107. /**
  108. * Returns a string name for the type of URL match that this class
  109. * represents.
  110. *
  111. * This helps to determine if the match was made in the original text with a
  112. * prefixed scheme (ex: 'http://www.google.com'), a prefixed 'www' (ex:
  113. * 'www.google.com'), or was matched by a known top-level domain (ex:
  114. * 'google.com').
  115. *
  116. * @return {"scheme"/"www"/"tld"}
  117. */
  118. UrlMatch.prototype.getUrlMatchType = function () {
  119. return this.urlMatchType;
  120. };
  121. /**
  122. * Returns the url that was matched, assuming the protocol to be 'http://' if the original
  123. * match was missing a protocol.
  124. *
  125. * @return {String}
  126. */
  127. UrlMatch.prototype.getUrl = function () {
  128. var url = this.url;
  129. // if the url string doesn't begin with a scheme, assume 'http://'
  130. if (!this.protocolRelativeMatch &&
  131. this.urlMatchType !== 'scheme' &&
  132. !this.protocolPrepended) {
  133. url = this.url = 'http://' + url;
  134. this.protocolPrepended = true;
  135. }
  136. return url;
  137. };
  138. /**
  139. * Returns the anchor href that should be generated for the match.
  140. *
  141. * @return {String}
  142. */
  143. UrlMatch.prototype.getAnchorHref = function () {
  144. var url = this.getUrl();
  145. return url.replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html
  146. };
  147. /**
  148. * Returns the anchor text that should be generated for the match.
  149. *
  150. * @return {String}
  151. */
  152. UrlMatch.prototype.getAnchorText = function () {
  153. var anchorText = this.getMatchedText();
  154. if (this.protocolRelativeMatch) {
  155. // Strip off any protocol-relative '//' from the anchor text
  156. anchorText = stripProtocolRelativePrefix(anchorText);
  157. }
  158. if (this.stripPrefix.scheme) {
  159. anchorText = stripSchemePrefix(anchorText);
  160. }
  161. if (this.stripPrefix.www) {
  162. anchorText = stripWwwPrefix(anchorText);
  163. }
  164. if (this.stripTrailingSlash) {
  165. anchorText = removeTrailingSlash(anchorText); // remove trailing slash, if there is one
  166. }
  167. if (this.decodePercentEncoding) {
  168. anchorText = removePercentEncoding(anchorText);
  169. }
  170. return anchorText;
  171. };
  172. return UrlMatch;
  173. }(AbstractMatch));
  174. export { UrlMatch };
  175. // Utility Functionality
  176. /**
  177. * Strips the scheme prefix (such as "http://" or "https://") from the given
  178. * `url`.
  179. *
  180. * @private
  181. * @param {String} url The text of the anchor that is being generated, for
  182. * which to strip off the url scheme.
  183. * @return {String} The `url`, with the scheme stripped.
  184. */
  185. function stripSchemePrefix(url) {
  186. return url.replace(httpSchemePrefixRe, '');
  187. }
  188. /**
  189. * Strips the 'www' prefix from the given `url`.
  190. *
  191. * @private
  192. * @param {String} url The text of the anchor that is being generated, for
  193. * which to strip off the 'www' if it exists.
  194. * @return {String} The `url`, with the 'www' stripped.
  195. */
  196. function stripWwwPrefix(url) {
  197. return url.replace(wwwPrefixRegex, '$1'); // leave any scheme ($1), it one exists
  198. }
  199. /**
  200. * Strips any protocol-relative '//' from the anchor text.
  201. *
  202. * @private
  203. * @param {String} text The text of the anchor that is being generated, for which to strip off the
  204. * protocol-relative prefix (such as stripping off "//")
  205. * @return {String} The `anchorText`, with the protocol-relative prefix stripped.
  206. */
  207. function stripProtocolRelativePrefix(text) {
  208. return text.replace(protocolRelativeRegex, '');
  209. }
  210. /**
  211. * Removes any trailing slash from the given `anchorText`, in preparation for the text to be displayed.
  212. *
  213. * @private
  214. * @param {String} anchorText The text of the anchor that is being generated, for which to remove any trailing
  215. * slash ('/') that may exist.
  216. * @return {String} The `anchorText`, with the trailing slash removed.
  217. */
  218. function removeTrailingSlash(anchorText) {
  219. if (anchorText.charAt(anchorText.length - 1) === '/') {
  220. anchorText = anchorText.slice(0, -1);
  221. }
  222. return anchorText;
  223. }
  224. /**
  225. * Decodes percent-encoded characters from the given `anchorText`, in
  226. * preparation for the text to be displayed.
  227. *
  228. * @private
  229. * @param {String} anchorText The text of the anchor that is being
  230. * generated, for which to decode any percent-encoded characters.
  231. * @return {String} The `anchorText`, with the percent-encoded characters
  232. * decoded.
  233. */
  234. function removePercentEncoding(anchorText) {
  235. // First, convert a few of the known % encodings to the corresponding
  236. // HTML entities that could accidentally be interpretted as special
  237. // HTML characters
  238. var preProcessedEntityAnchorText = anchorText
  239. .replace(/%22/gi, '"') // " char
  240. .replace(/%26/gi, '&') // & char
  241. .replace(/%27/gi, ''') // ' char
  242. .replace(/%3C/gi, '&lt;') // < char
  243. .replace(/%3E/gi, '&gt;'); // > char
  244. try {
  245. // Now attempt to decode the rest of the anchor text
  246. return decodeURIComponent(preProcessedEntityAnchorText);
  247. }
  248. catch (e) {
  249. // Invalid % escape sequence in the anchor text
  250. return preProcessedEntityAnchorText;
  251. }
  252. }
  253. //# sourceMappingURL=url-match.js.map