uri-utils.js 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. "use strict";
  2. Object.defineProperty(exports, "__esModule", { value: true });
  3. exports.isValidIpV4Address = exports.isValidTldMatch = exports.isValidSchemeUrl = exports.isKnownTld = exports.isUrlSuffixStartChar = exports.isPathChar = exports.isDomainLabelChar = exports.isDomainLabelStartChar = exports.isSchemeChar = exports.isSchemeStartChar = exports.tldUrlHostRe = exports.schemeUrlRe = exports.invalidSchemeRe = exports.urlSuffixedCharsNotAllowedAtEndRe = exports.httpSchemePrefixRe = exports.httpSchemeRe = exports.urlSuffixNotAllowedAsLastCharRe = exports.urlSuffixAllowedSpecialCharsRe = exports.urlSuffixStartCharsRe = exports.domainNameCharRegex = void 0;
  4. var regex_lib_1 = require("../regex-lib");
  5. var tld_regex_1 = require("./tld-regex");
  6. /**
  7. * A regular expression that is simply the character class of the characters
  8. * that may be used in a domain name, minus the '-' or '.'
  9. */
  10. exports.domainNameCharRegex = regex_lib_1.alphaNumericAndMarksRe;
  11. /**
  12. * The set of characters that will start a URL suffix (i.e. the path, query, and
  13. * hash part of the URL)
  14. */
  15. exports.urlSuffixStartCharsRe = /[\/?#]/;
  16. /**
  17. * The set of characters that are allowed in the URL suffix (i.e. the path,
  18. * query, and hash part of the URL) which may also form the ending character of
  19. * the URL.
  20. *
  21. * The {@link #urlSuffixNotAllowedAsLastCharRe} are additional allowed URL
  22. * suffix characters, but (generally) should not be the last character of a URL.
  23. */
  24. exports.urlSuffixAllowedSpecialCharsRe = /[-+&@#/%=~_()|'$*\[\]{}\u2713]/;
  25. /**
  26. * URL suffix characters (i.e. path, query, and has part of the URL) that are
  27. * not allowed as the *last character* in the URL suffix as they would normally
  28. * form the end of a sentence.
  29. *
  30. * The {@link #urlSuffixAllowedSpecialCharsRe} contains additional allowed URL
  31. * suffix characters which are allowed as the last character.
  32. */
  33. exports.urlSuffixNotAllowedAsLastCharRe = /[?!:,.;^]/;
  34. /**
  35. * Regular expression to match an http:// or https:// scheme.
  36. */
  37. exports.httpSchemeRe = /https?:\/\//i;
  38. /**
  39. * Regular expression to match an http:// or https:// scheme as the prefix of
  40. * a string.
  41. */
  42. exports.httpSchemePrefixRe = new RegExp('^' + exports.httpSchemeRe.source, 'i');
  43. exports.urlSuffixedCharsNotAllowedAtEndRe = new RegExp(exports.urlSuffixNotAllowedAsLastCharRe.source + '$');
  44. /**
  45. * A regular expression used to determine the schemes we should not autolink
  46. */
  47. exports.invalidSchemeRe = /^(javascript|vbscript):/i;
  48. // A regular expression used to determine if the URL is a scheme match (such as
  49. // 'http://google.com', and as opposed to a "TLD match"). This regular
  50. // expression is used to parse out the host along with if the URL has an
  51. // authority component (i.e. '//')
  52. //
  53. // Capturing groups:
  54. // 1. '//' if the URL has an authority component, empty string otherwise
  55. // 2. The host (if one exists). Ex: 'google.com'
  56. //
  57. // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
  58. exports.schemeUrlRe = /^[A-Za-z][-.+A-Za-z0-9]*:(\/\/)?([^:/]*)/;
  59. // A regular expression used to determine if the URL is a TLD match (such as
  60. // 'google.com', and as opposed to a "scheme match"). This regular
  61. // expression is used to help parse out the TLD (top-level domain) of the host.
  62. //
  63. // See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
  64. exports.tldUrlHostRe = /^(?:\/\/)?([^/#?:]+)/; // optionally prefixed with protocol-relative '//' chars
  65. /**
  66. * Determines if the given character may start a scheme (ex: 'http').
  67. */
  68. function isSchemeStartChar(char) {
  69. return regex_lib_1.letterRe.test(char);
  70. }
  71. exports.isSchemeStartChar = isSchemeStartChar;
  72. /**
  73. * Determines if the given character is a valid character in a scheme (such as
  74. * 'http' or 'ssh+git'), but only after the start char (which is handled by
  75. * {@link isSchemeStartChar}.
  76. */
  77. function isSchemeChar(char) {
  78. return (regex_lib_1.letterRe.test(char) || regex_lib_1.digitRe.test(char) || char === '+' || char === '-' || char === '.');
  79. }
  80. exports.isSchemeChar = isSchemeChar;
  81. /**
  82. * Determines if the character can begin a domain label, which must be an
  83. * alphanumeric character and not an underscore or dash.
  84. *
  85. * A domain label is a segment of a hostname such as subdomain.google.com.
  86. */
  87. function isDomainLabelStartChar(char) {
  88. return regex_lib_1.alphaNumericAndMarksRe.test(char);
  89. }
  90. exports.isDomainLabelStartChar = isDomainLabelStartChar;
  91. /**
  92. * Determines if the character is part of a domain label (but not a domain label
  93. * start character).
  94. *
  95. * A domain label is a segment of a hostname such as subdomain.google.com.
  96. */
  97. function isDomainLabelChar(char) {
  98. return char === '_' || isDomainLabelStartChar(char);
  99. }
  100. exports.isDomainLabelChar = isDomainLabelChar;
  101. /**
  102. * Determines if the character is a path character ("pchar") as defined by
  103. * https://tools.ietf.org/html/rfc3986#appendix-A
  104. *
  105. * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  106. *
  107. * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  108. * pct-encoded = "%" HEXDIG HEXDIG
  109. * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
  110. * / "*" / "+" / "," / ";" / "="
  111. *
  112. * Note that this implementation doesn't follow the spec exactly, but rather
  113. * follows URL path characters found out in the wild (spec might be out of date?)
  114. */
  115. function isPathChar(char) {
  116. return (regex_lib_1.alphaNumericAndMarksRe.test(char) ||
  117. exports.urlSuffixAllowedSpecialCharsRe.test(char) ||
  118. exports.urlSuffixNotAllowedAsLastCharRe.test(char));
  119. }
  120. exports.isPathChar = isPathChar;
  121. /**
  122. * Determines if the character given may begin the "URL Suffix" section of a
  123. * URI (i.e. the path, query, or hash section). These are the '/', '?' and '#'
  124. * characters.
  125. *
  126. * See https://tools.ietf.org/html/rfc3986#appendix-A
  127. */
  128. function isUrlSuffixStartChar(char) {
  129. return exports.urlSuffixStartCharsRe.test(char);
  130. }
  131. exports.isUrlSuffixStartChar = isUrlSuffixStartChar;
  132. /**
  133. * Determines if the TLD read in the host is a known TLD (Top-Level Domain).
  134. *
  135. * Example: 'com' would be a known TLD (for a host of 'google.com'), but
  136. * 'local' would not (for a domain name of 'my-computer.local').
  137. */
  138. function isKnownTld(tld) {
  139. return tld_regex_1.tldRegex.test(tld.toLowerCase()); // make sure the tld is lowercase for the regex
  140. }
  141. exports.isKnownTld = isKnownTld;
  142. /**
  143. * Determines if the given `url` is a valid scheme-prefixed URL.
  144. */
  145. function isValidSchemeUrl(url) {
  146. // If the scheme is 'javascript:' or 'vbscript:', these link
  147. // types can be dangerous. Don't link them.
  148. if (exports.invalidSchemeRe.test(url)) {
  149. return false;
  150. }
  151. var schemeMatch = url.match(exports.schemeUrlRe);
  152. if (!schemeMatch) {
  153. return false;
  154. }
  155. var isAuthorityMatch = !!schemeMatch[1];
  156. var host = schemeMatch[2];
  157. if (isAuthorityMatch) {
  158. // Any match that has an authority ('//' chars) after the scheme is
  159. // valid, such as 'http://anything'
  160. return true;
  161. }
  162. // If there's no authority ('//' chars), check that we have a hostname
  163. // that looks valid.
  164. //
  165. // The host must contain at least one '.' char and have a domain label
  166. // with at least one letter to be considered valid.
  167. //
  168. // Accept:
  169. // - git:domain.com (scheme followed by a host
  170. // Do not accept:
  171. // - git:something ('something' doesn't look like a host)
  172. // - version:1.0 ('1.0' doesn't look like a host)
  173. if (host.indexOf('.') === -1 || !regex_lib_1.letterRe.test(host)) {
  174. return false;
  175. }
  176. return true;
  177. }
  178. exports.isValidSchemeUrl = isValidSchemeUrl;
  179. /**
  180. * Determines if the given `url` is a match with a valid TLD.
  181. */
  182. function isValidTldMatch(url) {
  183. // TLD URL such as 'google.com', we need to confirm that we have a valid
  184. // top-level domain
  185. var tldUrlHostMatch = url.match(exports.tldUrlHostRe);
  186. if (!tldUrlHostMatch) {
  187. // At this point, if the URL didn't match our TLD re, it must be invalid
  188. // (highly unlikely to happen, but just in case)
  189. return false;
  190. }
  191. var host = tldUrlHostMatch[0];
  192. var hostLabels = host.split('.');
  193. if (hostLabels.length < 2) {
  194. // 0 or 1 host label, there's no TLD. Ex: 'localhost'
  195. return false;
  196. }
  197. var tld = hostLabels[hostLabels.length - 1];
  198. if (!isKnownTld(tld)) {
  199. return false;
  200. }
  201. // TODO: Implement these conditions for TLD matcher:
  202. // (
  203. // this.longestDomainLabelLength <= 63 &&
  204. // this.domainNameLength <= 255
  205. // );
  206. return true;
  207. }
  208. exports.isValidTldMatch = isValidTldMatch;
  209. // Regular expression to confirm a valid IPv4 address (ex: '192.168.0.1')
  210. var ipV4Re = /^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$/;
  211. // Regular expression used to split the IPv4 address itself from any port/path/query/hash
  212. var ipV4PartRe = /[:/?#]/;
  213. /**
  214. * Determines if the given URL is a valid IPv4-prefixed URL.
  215. */
  216. function isValidIpV4Address(url) {
  217. // Grab just the IP address
  218. var ipV4Part = url.split(ipV4PartRe, 1)[0]; // only 1 result needed
  219. return ipV4Re.test(ipV4Part);
  220. }
  221. exports.isValidIpV4Address = isValidIpV4Address;
  222. //# sourceMappingURL=uri-utils.js.map