default.js 10 KB


  1. /**
  2. * default settings
  3. *
  4. * @author Zongmin Lei<leizongmin@gmail.com>
  5. */
  6. var FilterCSS = require("cssfilter").FilterCSS;
  7. var getDefaultCSSWhiteList = require("cssfilter").getDefaultWhiteList;
  8. var _ = require("./util");
  9. function getDefaultWhiteList() {
  10. return {
  11. a: ["target", "href", "title"],
  12. abbr: ["title"],
  13. address: [],
  14. area: ["shape", "coords", "href", "alt"],
  15. article: [],
  16. aside: [],
  17. audio: [
  18. "autoplay",
  19. "controls",
  20. "crossorigin",
  21. "loop",
  22. "muted",
  23. "preload",
  24. "src",
  25. ],
  26. b: [],
  27. bdi: ["dir"],
  28. bdo: ["dir"],
  29. big: [],
  30. blockquote: ["cite"],
  31. br: [],
  32. caption: [],
  33. center: [],
  34. cite: [],
  35. code: [],
  36. col: ["align", "valign", "span", "width"],
  37. colgroup: ["align", "valign", "span", "width"],
  38. dd: [],
  39. del: ["datetime"],
  40. details: ["open"],
  41. div: [],
  42. dl: [],
  43. dt: [],
  44. em: [],
  45. figcaption: [],
  46. figure: [],
  47. font: ["color", "size", "face"],
  48. footer: [],
  49. h1: [],
  50. h2: [],
  51. h3: [],
  52. h4: [],
  53. h5: [],
  54. h6: [],
  55. header: [],
  56. hr: [],
  57. i: [],
  58. img: ["src", "alt", "title", "width", "height"],
  59. ins: ["datetime"],
  60. li: [],
  61. mark: [],
  62. nav: [],
  63. ol: [],
  64. p: [],
  65. pre: [],
  66. s: [],
  67. section: [],
  68. small: [],
  69. span: [],
  70. sub: [],
  71. summary: [],
  72. sup: [],
  73. strong: [],
  74. strike: [],
  75. table: ["width", "border", "align", "valign"],
  76. tbody: ["align", "valign"],
  77. td: ["width", "rowspan", "colspan", "align", "valign"],
  78. tfoot: ["align", "valign"],
  79. th: ["width", "rowspan", "colspan", "align", "valign"],
  80. thead: ["align", "valign"],
  81. tr: ["rowspan", "align", "valign"],
  82. tt: [],
  83. u: [],
  84. ul: [],
  85. video: [
  86. "autoplay",
  87. "controls",
  88. "crossorigin",
  89. "loop",
  90. "muted",
  91. "playsinline",
  92. "poster",
  93. "preload",
  94. "src",
  95. "height",
  96. "width",
  97. ],
  98. };
  99. }
  100. var defaultCSSFilter = new FilterCSS();
  101. /**
  102. * default onTag function
  103. *
  104. * @param {String} tag
  105. * @param {String} html
  106. * @param {Object} options
  107. * @return {String}
  108. */
  109. function onTag(tag, html, options) {
  110. // do nothing
  111. }
  112. /**
  113. * default onIgnoreTag function
  114. *
  115. * @param {String} tag
  116. * @param {String} html
  117. * @param {Object} options
  118. * @return {String}
  119. */
  120. function onIgnoreTag(tag, html, options) {
  121. // do nothing
  122. }
  123. /**
  124. * default onTagAttr function
  125. *
  126. * @param {String} tag
  127. * @param {String} name
  128. * @param {String} value
  129. * @return {String}
  130. */
  131. function onTagAttr(tag, name, value) {
  132. // do nothing
  133. }
  134. /**
  135. * default onIgnoreTagAttr function
  136. *
  137. * @param {String} tag
  138. * @param {String} name
  139. * @param {String} value
  140. * @return {String}
  141. */
  142. function onIgnoreTagAttr(tag, name, value) {
  143. // do nothing
  144. }
  145. /**
  146. * default escapeHtml function
  147. *
  148. * @param {String} html
  149. */
  150. function escapeHtml(html) {
  151. return html.replace(REGEXP_LT, "&lt;").replace(REGEXP_GT, "&gt;");
  152. }
  153. /**
  154. * default safeAttrValue function
  155. *
  156. * @param {String} tag
  157. * @param {String} name
  158. * @param {String} value
  159. * @param {Object} cssFilter
  160. * @return {String}
  161. */
  162. function safeAttrValue(tag, name, value, cssFilter) {
  163. // unescape attribute value firstly
  164. value = friendlyAttrValue(value);
  165. if (name === "href" || name === "src") {
  166. // filter `href` and `src` attribute
  167. // only allow the value that starts with `http://` | `https://` | `mailto:` | `/` | `#`
  168. value = _.trim(value);
  169. if (value === "#") return "#";
  170. if (
  171. !(
  172. value.substr(0, 7) === "http://" ||
  173. value.substr(0, 8) === "https://" ||
  174. value.substr(0, 7) === "mailto:" ||
  175. value.substr(0, 4) === "tel:" ||
  176. value.substr(0, 11) === "data:image/" ||
  177. value.substr(0, 6) === "ftp://" ||
  178. value.substr(0, 2) === "./" ||
  179. value.substr(0, 3) === "../" ||
  180. value[0] === "#" ||
  181. value[0] === "/"
  182. )
  183. ) {
  184. return "";
  185. }
  186. } else if (name === "background") {
  187. // filter `background` attribute (maybe no use)
  188. // `javascript:`
  189. REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
  190. if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
  191. return "";
  192. }
  193. } else if (name === "style") {
  194. // `expression()`
  195. REGEXP_DEFAULT_ON_TAG_ATTR_7.lastIndex = 0;
  196. if (REGEXP_DEFAULT_ON_TAG_ATTR_7.test(value)) {
  197. return "";
  198. }
  199. // `url()`
  200. REGEXP_DEFAULT_ON_TAG_ATTR_8.lastIndex = 0;
  201. if (REGEXP_DEFAULT_ON_TAG_ATTR_8.test(value)) {
  202. REGEXP_DEFAULT_ON_TAG_ATTR_4.lastIndex = 0;
  203. if (REGEXP_DEFAULT_ON_TAG_ATTR_4.test(value)) {
  204. return "";
  205. }
  206. }
  207. if (cssFilter !== false) {
  208. cssFilter = cssFilter || defaultCSSFilter;
  209. value = cssFilter.process(value);
  210. }
  211. }
  212. // escape `<>"` before returns
  213. value = escapeAttrValue(value);
  214. return value;
  215. }
  216. // RegExp list
  217. var REGEXP_LT = /</g;
  218. var REGEXP_GT = />/g;
  219. var REGEXP_QUOTE = /"/g;
  220. var REGEXP_QUOTE_2 = /&quot;/g;
  221. var REGEXP_ATTR_VALUE_1 = /&#([a-zA-Z0-9]*);?/gim;
  222. var REGEXP_ATTR_VALUE_COLON = /&colon;?/gim;
  223. var REGEXP_ATTR_VALUE_NEWLINE = /&newline;?/gim;
  224. // var REGEXP_DEFAULT_ON_TAG_ATTR_3 = /\/\*|\*\//gm;
  225. var REGEXP_DEFAULT_ON_TAG_ATTR_4 =
  226. /((j\s*a\s*v\s*a|v\s*b|l\s*i\s*v\s*e)\s*s\s*c\s*r\s*i\s*p\s*t\s*|m\s*o\s*c\s*h\s*a):/gi;
  227. // var REGEXP_DEFAULT_ON_TAG_ATTR_5 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:/gi;
  228. // var REGEXP_DEFAULT_ON_TAG_ATTR_6 = /^[\s"'`]*(d\s*a\s*t\s*a\s*)\:\s*image\//gi;
  229. var REGEXP_DEFAULT_ON_TAG_ATTR_7 =
  230. /e\s*x\s*p\s*r\s*e\s*s\s*s\s*i\s*o\s*n\s*\(.*/gi;
  231. var REGEXP_DEFAULT_ON_TAG_ATTR_8 = /u\s*r\s*l\s*\(.*/gi;
  232. /**
  233. * escape double quote
  234. *
  235. * @param {String} str
  236. * @return {String} str
  237. */
  238. function escapeQuote(str) {
  239. return str.replace(REGEXP_QUOTE, "&quot;");
  240. }
  241. /**
  242. * unescape double quote
  243. *
  244. * @param {String} str
  245. * @return {String} str
  246. */
  247. function unescapeQuote(str) {
  248. return str.replace(REGEXP_QUOTE_2, '"');
  249. }
  250. /**
  251. * escape html entities
  252. *
  253. * @param {String} str
  254. * @return {String}
  255. */
  256. function escapeHtmlEntities(str) {
  257. return str.replace(REGEXP_ATTR_VALUE_1, function replaceUnicode(str, code) {
  258. return code[0] === "x" || code[0] === "X"
  259. ? String.fromCharCode(parseInt(code.substr(1), 16))
  260. : String.fromCharCode(parseInt(code, 10));
  261. });
  262. }
  263. /**
  264. * escape html5 new danger entities
  265. *
  266. * @param {String} str
  267. * @return {String}
  268. */
  269. function escapeDangerHtml5Entities(str) {
  270. return str
  271. .replace(REGEXP_ATTR_VALUE_COLON, ":")
  272. .replace(REGEXP_ATTR_VALUE_NEWLINE, " ");
  273. }
  274. /**
  275. * clear nonprintable characters
  276. *
  277. * @param {String} str
  278. * @return {String}
  279. */
  280. function clearNonPrintableCharacter(str) {
  281. var str2 = "";
  282. for (var i = 0, len = str.length; i < len; i++) {
  283. str2 += str.charCodeAt(i) < 32 ? " " : str.charAt(i);
  284. }
  285. return _.trim(str2);
  286. }
  287. /**
  288. * get friendly attribute value
  289. *
  290. * @param {String} str
  291. * @return {String}
  292. */
  293. function friendlyAttrValue(str) {
  294. str = unescapeQuote(str);
  295. str = escapeHtmlEntities(str);
  296. str = escapeDangerHtml5Entities(str);
  297. str = clearNonPrintableCharacter(str);
  298. return str;
  299. }
  300. /**
  301. * unescape attribute value
  302. *
  303. * @param {String} str
  304. * @return {String}
  305. */
  306. function escapeAttrValue(str) {
  307. str = escapeQuote(str);
  308. str = escapeHtml(str);
  309. return str;
  310. }
  311. /**
  312. * `onIgnoreTag` function for removing all the tags that are not in whitelist
  313. */
  314. function onIgnoreTagStripAll() {
  315. return "";
  316. }
  317. /**
  318. * remove tag body
  319. * specify a `tags` list, if the tag is not in the `tags` list then process by the specify function (optional)
  320. *
  321. * @param {array} tags
  322. * @param {function} next
  323. */
  324. function StripTagBody(tags, next) {
  325. if (typeof next !== "function") {
  326. next = function () {};
  327. }
  328. var isRemoveAllTag = !Array.isArray(tags);
  329. function isRemoveTag(tag) {
  330. if (isRemoveAllTag) return true;
  331. return _.indexOf(tags, tag) !== -1;
  332. }
  333. var removeList = [];
  334. var posStart = false;
  335. return {
  336. onIgnoreTag: function (tag, html, options) {
  337. if (isRemoveTag(tag)) {
  338. if (options.isClosing) {
  339. var ret = "[/removed]";
  340. var end = options.position + ret.length;
  341. removeList.push([
  342. posStart !== false ? posStart : options.position,
  343. end,
  344. ]);
  345. posStart = false;
  346. return ret;
  347. } else {
  348. if (!posStart) {
  349. posStart = options.position;
  350. }
  351. return "[removed]";
  352. }
  353. } else {
  354. return next(tag, html, options);
  355. }
  356. },
  357. remove: function (html) {
  358. var rethtml = "";
  359. var lastPos = 0;
  360. _.forEach(removeList, function (pos) {
  361. rethtml += html.slice(lastPos, pos[0]);
  362. lastPos = pos[1];
  363. });
  364. rethtml += html.slice(lastPos);
  365. return rethtml;
  366. },
  367. };
  368. }
  369. /**
  370. * remove html comments
  371. *
  372. * @param {String} html
  373. * @return {String}
  374. */
  375. function stripCommentTag(html) {
  376. var retHtml = "";
  377. var lastPos = 0;
  378. while (lastPos < html.length) {
  379. var i = html.indexOf("<!--", lastPos);
  380. if (i === -1) {
  381. retHtml += html.slice(lastPos);
  382. break;
  383. }
  384. retHtml += html.slice(lastPos, i);
  385. var j = html.indexOf("-->", i);
  386. if (j === -1) {
  387. break;
  388. }
  389. lastPos = j + 3;
  390. }
  391. return retHtml;
  392. }
  393. /**
  394. * remove invisible characters
  395. *
  396. * @param {String} html
  397. * @return {String}
  398. */
  399. function stripBlankChar(html) {
  400. var chars = html.split("");
  401. chars = chars.filter(function (char) {
  402. var c = char.charCodeAt(0);
  403. if (c === 127) return false;
  404. if (c <= 31) {
  405. if (c === 10 || c === 13) return true;
  406. return false;
  407. }
  408. return true;
  409. });
  410. return chars.join("");
  411. }
  412. exports.whiteList = getDefaultWhiteList();
  413. exports.getDefaultWhiteList = getDefaultWhiteList;
  414. exports.onTag = onTag;
  415. exports.onIgnoreTag = onIgnoreTag;
  416. exports.onTagAttr = onTagAttr;
  417. exports.onIgnoreTagAttr = onIgnoreTagAttr;
  418. exports.safeAttrValue = safeAttrValue;
  419. exports.escapeHtml = escapeHtml;
  420. exports.escapeQuote = escapeQuote;
  421. exports.unescapeQuote = unescapeQuote;
  422. exports.escapeHtmlEntities = escapeHtmlEntities;
  423. exports.escapeDangerHtml5Entities = escapeDangerHtml5Entities;
  424. exports.clearNonPrintableCharacter = clearNonPrintableCharacter;
  425. exports.friendlyAttrValue = friendlyAttrValue;
  426. exports.escapeAttrValue = escapeAttrValue;
  427. exports.onIgnoreTagStripAll = onIgnoreTagStripAll;
  428. exports.StripTagBody = StripTagBody;
  429. exports.stripCommentTag = stripCommentTag;
  430. exports.stripBlankChar = stripBlankChar;
  431. exports.cssFilter = defaultCSSFilter;
  432. exports.getDefaultCSSWhiteList = getDefaultCSSWhiteList;