const sanitize = (html, tags = undefined, attributes = undefined) => {
var attributes = attributes || [
{ attribute: "src", tags: "*", regex: /^(?:https|http|\/\/):/ },
{ attribute: "href", tags: "*", regex: /^(?!javascript:).+/ },
{ attribute: "width", tags: "*", regex: /^[0-9]+$/ },
{ attribute: "height", tags: "*", regex: /^[0-9]+$/ },
{ attribute: "id", tags: "*", regex: /^[a-zA-Z]+$/ },
{ attribute: "class", tags: "*", regex: /^[a-zA-Z ]+$/ },
{ attribute: "value", tags: ["INPUT", "TEXTAREA"], regex: /^.+$/ },
{ attribute: "checked", tags: ["INPUT"], regex: /^(?:true|false)+$/ },
{
attribute: "placeholder",
tags: ["INPUT", "TEXTAREA"],
regex: /^.+$/,
},
{
attribute: "alt",
tags: ["IMG", "AREA", "INPUT"],
//"^" and "$" match beggining and end
regex: /^[0-9a-zA-Z]+$/,
},
{ attribute: "autofocus", tags: ["INPUT"], regex: /^(?:true|false)+$/ },
{ attribute: "for", tags: ["LABEL", "OUTPUT"], regex: /^[a-zA-Z0-9]+$/ },
]
var tags = tags || [
"I",
"P",
"B",
"BODY",
"HTML",
"DEL",
"INS",
"STRONG",
"SMALL",
"A",
"IMG",
"CITE",
"FIGCAPTION",
"ASIDE",
"ARTICLE",
"SUMMARY",
"DETAILS",
"NAV",
"TD",
"TH",
"TABLE",
"THEAD",
"TBODY",
"NAV",
"SPAN",
"BR",
"CODE",
"PRE",
"BLOCKQUOTE",
"EM",
"HR",
"H1",
"H2",
"H3",
"H4",
"H5",
"H6",
"DIV",
"MAIN",
"HEADER",
"FOOTER",
"SELECT",
"COL",
"AREA",
"ADDRESS",
"ABBR",
"BDI",
"BDO",
]
attributes = attributes.map((el) => {
if (typeof el === "string") {
return { attribute: el, tags: "*", regex: /^.+$/ }
}
let output = el
if (!el.hasOwnProperty("tags")) {
output.tags = "*"
}
if (!el.hasOwnProperty("regex")) {
output.regex = /^.+$/
}
return output
})
var el = new DOMParser().parseFromString(html, "text/html")
var elements = el.querySelectorAll("*")
for (let i = 0; i < elements.length; i++) {
const current = elements[i]
let attr_list = get_attributes(current)
for (let j = 0; j < attr_list.length; j++) {
const attribute = attr_list[j]
if (!attribute_matches(current, attribute)) {
current.removeAttribute(attr_list[j])
}
}
if (!tags.includes(current.tagName)) {
current.remove()
}
}
return el.documentElement.innerHTML
function attribute_matches(element, attribute) {
let output = attributes.filter((attr) => {
let returnval =
attr.attribute === attribute &&
(attr.tags === "*" || attr.tags.includes(element.tagName)) &&
attr.regex.test(element.getAttribute(attribute))
return returnval
})
return output.length > 0
}
function get_attributes(element) {
for (
var i = 0, atts = element.attributes, n = atts.length, arr = [];
i < n;
i++
) {
arr.push(atts[i].nodeName)
}
return arr
}
}
* {
font-family: sans-serif;
}
textarea {
width: 49%;
height: 300px;
padding: 10px;
box-sizing: border-box;
resize: none;
}
<h1>Sanitize HTML client side</h1>
<textarea id='input' placeholder="Unsanitized HTML">
<!-- This removes both the src and onerror attributes because src is not a valid url. -->
<img src="error" onerror="alert('XSS')">
<div id="something_harmless" onload="alert('More XSS')">
<b>Bold text!</b> and <em>Italic text!</em>, some more text. <del>Deleted text!</del>
</div>
<script>
alert("This would be XSS");
</script>
</textarea>
<textarea id='output' placeholder="Sanitized HTML will appear here" readonly></textarea>
<script>
document.querySelector("#input").onkeyup = () => {
document.querySelector("#output").value = sanitize(document.querySelector("#input").value);
}
</script>