How to decode HTML entities
Asked Answered
M

3

13

I have string variable with HTML entities:

var str = 'Some text & text';

I want to convert (decode) it to original characters:

Some text & text.

JavaScript doesn't have built-in function to achieve wanted result. I can't use jQuery or DOM objects because I need it to work in Google Apps Script.

How can I do that in simple way?

Mortensen answered 6/7, 2012 at 16:17 Comment(0)
M
21

You can use built-in Xml Services (reference):

var str = 'Some text & text';
var decode = XmlService.parse('<d>' + str + '</d>');
var strDecoded = decode.getRootElement().getText();

or you can use built-in E4X XML class.

var str = 'Some text &#x26; text';
var decode = new XML('<d>' + str + '</d>');
var strDecoded = decode.toString();
Mortensen answered 6/7, 2012 at 16:17 Comment(3)
thanks for sharing this awesome hack! :) is there a way to go the other i.e. convert special characters in a string to their corresponding html entities? Thanks! :)Undis
@Hafez Xml.parse has been replaced with XmlService.parse.Courtesan
for new XML: ReferenceError: XML is not definedPreponderate
S
2

In 2024 none of those solutions (XML, XmlService) worked 😟 for me, so I did it 💪 manually.

Following is my hard work solution.

To use it just call const fixedString = htmlEntitiesDecode(crazyEncodedString);

function htmlEntitiesDecode(input) {
  entities.forEach(function(substitution) {
    var regex = new RegExp(substitution.entity, 'g');
    input = input.replace(regex, substitution.character);
  });
  return input;
}

const entities = [
  { entity: "&Agrave;", character: "À" },
  { entity: "&Aacute;", character: "Á" },
  { entity: "&Acirc;", character: "Â" },
  { entity: "&Atilde;", character: "Ã" },
  { entity: "&Auml;", character: "Ä" },
  { entity: "&Aring;", character: "Å" },
  { entity: "&agrave;", character: "à" },
  { entity: "&aacute;", character: "á" },
  { entity: "&acirc;", character: "â" },
  { entity: "&atilde;", character: "ã" },
  { entity: "&auml;", character: "ä" },
  { entity: "&aring;", character: "å" },
  { entity: "&AElig;", character: "Æ" },
  { entity: "&aelig;", character: "æ" },
  { entity: "&szlig;", character: "ß" },
  { entity: "&Ccedil;", character: "Ç" },
  { entity: "&ccedil;", character: "ç" },
  { entity: "&Egrave;", character: "È" },
  { entity: "&Eacute;", character: "É" },
  { entity: "&Ecirc;", character: "Ê" },
  { entity: "&Euml;", character: "Ë" },
  { entity: "&egrave;", character: "è" },
  { entity: "&eacute;", character: "é" },
  { entity: "&ecirc;", character: "ê" },
  { entity: "&euml;", character: "ë" },
  { entity: "&#131;", character: "ƒ" },
  { entity: "&Igrave;", character: "Ì" },
  { entity: "&Iacute;", character: "Í" },
  { entity: "&Icirc;", character: "Î" },
  { entity: "&Iuml;", character: "Ï" },
  { entity: "&igrave;", character: "ì" },
  { entity: "&iacute;", character: "í" },
  { entity: "&icirc;", character: "î" },
  { entity: "&iuml;", character: "ï" },
  { entity: "&Ntilde;", character: "Ñ" },
  { entity: "&ntilde;", character: "ñ" },
  { entity: "&Ograve;", character: "Ò" },
  { entity: "&Oacute;", character: "Ó" },
  { entity: "&Ocirc;", character: "Ô" },
  { entity: "&Otilde;", character: "Õ" },
  { entity: "&Ouml;", character: "Ö" },
  { entity: "&ograve;", character: "ò" },
  { entity: "&oacute;", character: "ó" },
  { entity: "&ocirc;", character: "ô" },
  { entity: "&otilde;", character: "õ" },
  { entity: "&ouml;", character: "ö" },
  { entity: "&Oslash;", character: "Ø" },
  { entity: "&oslash;", character: "ø" },
  { entity: "&#140;", character: "Œ" },
  { entity: "&#156;", character: "œ" },
  { entity: "&#138;", character: "Š" },
  { entity: "&#154;", character: "š" },
  { entity: "&Ugrave;", character: "Ù" },
  { entity: "&Uacute;", character: "Ú" },
  { entity: "&Ucirc;", character: "Û" },
  { entity: "&Uuml;", character: "Ü" },
  { entity: "&ugrave;", character: "ù" },
  { entity: "&uacute;", character: "ú" },
  { entity: "&ucirc;", character: "û" },
  { entity: "&uuml;", character: "ü" },
  { entity: "&#181;", character: "µ" },
  { entity: "&#215;", character: "×" },
  { entity: "&Yacute;", character: "Ý" },
  { entity: "&#159;", character: "Ÿ" },
  { entity: "&yacute;", character: "ý" },
  { entity: "&yuml;", character: "ÿ" },
  { entity: "&#176;", character: "°" },
  { entity: "&#134;", character: "†" },
  { entity: "&#135;", character: "‡" },
  { entity: "&lt;", character: "<" },
  { entity: "&gt;", character: ">" },
  { entity: "&#177;", character: "±" },
  { entity: "&#171;", character: "«" },
  { entity: "&#187;", character: "»" },
  { entity: "&#191;", character: "¿" },
  { entity: "&#161;", character: "¡" },
  { entity: "&#183;", character: "·" },
  { entity: "&#149;", character: "•" },
  { entity: "&#153;", character: "™" },
  { entity: "&copy;", character: "©" },
  { entity: "&reg;", character: "®" },
  { entity: "&#167;", character: "§" },
  { entity: "&#182;", character: "¶" },
  { entity: "&quot;", character: "\"" },
  { entity: "&nbsp;", character: " " },
  { entity: "&ndash;", character: "-" },
  { entity: "&amp;", character: "&" },
  { entity: "&ldquo;", character: "“" },
  { entity: "&bull;", character: "•" },
  { entity: "&rdquo;", character: "”" },
  { entity: "&ordf;", character: "ª" },
  { entity: "&ordm;", character: "º" },
  { entity: "&ordf;", character: "ª" },
  { entity: "&ordf;", character: "ª" },
  { entity: "&ordf;", character: "ª" },
  { entity: "&ordf;", character: "ª" },
  { entity: "&ordf;", character: "ª" },
];

Here is the gist with the code.

If I missed some symbol, please, comment here or there.

Salsify answered 12/1 at 17:7 Comment(0)
P
0

You can use Drive API Advanced Service for this. First you need to enable it. Then when you insert (create) a new Google Doc file with data from an HTML blob, it automatically renders HTML in your Doc. After that you get the text your Doc with the following code:

function htmltotext(html) {
  var id = Drive.Files.insert(
{title: 'temp',
mimeType: MimeType.GOOGLE_DOCS},
Utilities.newBlob(html, MimeType.HTML)).id;
  var doc = DocumentApp.openById(id);
  var text = doc.getBody().getText();
  doc.saveAndClose();
  Drive.Files.remove(id); // to remove completely avoiding trash
  return text;
}

Thanks @tanaike for suggestion

Preponderate answered 26/8, 2023 at 13:40 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.