Douglas Crockford wrote:

Does anyone know a technique in javascript to transform from (for

example) ♥ to the char 'â™¥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP

that isn't XML, but might contain some XML char entities.

String.prototype.deentityify = function (o) {

var i, j, s = this, o = String.prototype.deentityify.data, v;

for (;;) {

i = s.lastIndexOf('&');

if (i < 0) {

break;

}

j = s.indexOf(';', i);

if (i + 1 >= j) {

break;

}

v = o[s.substring(i + 1, j)];

if (!v) {

break;

}

s = s.substring(0, i) + v + s.substring(j + 1);

}

return s;

}

String.prototype.deentityify.data = {

apos: "'",

lt: '<',

gt: '>'};

var s = "<cool>".deentityify();

http://www.JSLint.com

Thanks for the response, but I needed something that can handle any char

reference. So I built pu the hash with some js code generated from a quick

Perl script taking values from the W3C's XHTML DTD. Add a bit of regex and

I had it done straight away before I saw your response. Once I'd resigned

to coding this in javascript (instead of tricking the browser into decoding

the references) it was actually pretty easy.

I'll post the output in case anyone wants to do this (it's about 3k, or 1k

with content-type gzip)

var CHAR_REF_REGEX = /&(\w{2,7}|#\d{3,4});/g;

function expand_char_references( str )

{

var rtn = "";

var hit;

var last_ref_end = 0;

while( (hit = CHAR_REF_REGEX.exec( str )) != null )

{

var charcode = new Number( hit[1] );

if( isNaN( charcode ) )

charcode = CHAR_ENTITIES[ hit[1].toLowerCase() ];

rtn += str.substring( last_ref_end , hit.index ) +

String.fromCharCode( charcode );

last_ref_end = hit.index + hit[0].length;

}

// if no matches, this will just be all of str:

rtn += str.substring( last_ref_end );

return rtn;

}

var

CHAR_ENTITIES={'nbsp':160,'iexcl':161,'cent':162,' pound':163,'curren':164,'yen':165,'brvbar':166,'se ct':167,'uml':168,'copy':169,'ordf':170,'laquo':17 1,'not':172,'shy':173,'reg':174,'macr':175,'deg':1 76,'plusmn':177,'sup2':178,'sup3':179,'acute':180, 'micro':181,'para':182,'middot':183,'cedil':184,'s up1':185,'ordm':186,'raquo':187,'frac14':188,'frac 12':189,'frac34':190,'iquest':191,'agrave':192,'aa cute':193,'acirc':194,'atilde':195,'auml':196,'ari ng':197,'aelig':198,'ccedil':199,'egrave':200,'eac ute':201,'ecirc':202,'euml':203,'igrave':204,'iacu te':205,'icirc':206,'iuml':207,'eth':208,'ntilde': 209,'ograve':210,'oacute':211,'ocirc':212,'otilde' :213,'ouml':214,'times':215,'oslash':216,'ugrave': 217,'uacute':218,'ucirc':219,'uuml':220,'yacute':2 21,'thorn':222,'szlig':223,'agrave':224,'aacute':2 25,'acirc':226,'atilde':227,'auml':228,'aring':229 ,'aelig':230,'ccedil':231,'egrave':232,'eacute':23 3,'ecirc':234,'euml':235,'igrave':236,'iacute':237 ,'icirc':238,'iuml':239,'eth':240,'ntilde':241,'og rave':242,'oacute':243,'ocirc':244,'otilde':245,'o uml':246,'divide':247,'oslash':248,'ugrave':249,'u acute':250,'ucirc':251,'uuml':252,'yacute':253,'th orn':254,'yuml':255,'lt':38,'gt':62,'amp':38,'apos ':39,'quot':34,'oelig':338,'oelig':339,'scaron':35 2,'scaron':353,'yuml':376,'circ':710,'tilde':732,' ensp':8194,'emsp':8195,'thinsp':8201,'zwnj':8204,' zwj':8205,'lrm':8206,'rlm':8207,'ndash':8211,'mdas h':8212,'lsquo':8216,'rsquo':8217,'sbquo':8218,'ld quo':8220,'rdquo':8221,'bdquo':8222,'dagger':8224, 'dagger':8225,'permil':8240,'lsaquo':8249,'rsaquo' :8250,'euro':8364,'fnof':402,'alpha':913,'beta':91 4,'gamma':915,'delta':916,'epsilon':917,'zeta':918 ,'eta':919,'theta':920,'iota':921,'kappa':922,'lam bda':923,'mu':924,'nu':925,'xi':926,'omicron':927, 'pi':928,'rho':929,'sigma':931,'tau':932,'upsilon' :933,'phi':934,'chi':935,'psi':936,'omega':937,'al pha':945,'beta':946,'gamma':947,'delta':948,'epsil on':949,'zeta':950,'eta':951,'theta':952,'iota':95 3,'kappa':954,'lambda':955,'mu':956,'nu':957,'xi': 958,'omicron':959,'pi':960,'rho':961,'sigmaf':962, 'sigma':963,'tau':964,'upsilon':965,'phi':966,'chi ':967,'psi':968,'omega':969,'thetasym':977,'upsih' :978,'piv':982,'bull':8226,'hellip':8230,'prime':8 242,'prime':8243,'oline':8254,'frasl':8260,'weierp ':8472,'image':8465,'real':8476,'trade':8482,'alef sym':8501,'larr':8592,'uarr':8593,'rarr':8594,'dar r':8595,'harr':8596,'crarr':8629,'larr':8656,'uarr ':8657,'rarr':8658,'darr':8659,'harr':8660,'forall ':8704,'part':8706,'exist':8707,'empty':8709,'nabl a':8711,'isin':8712,'notin':8713,'ni':8715,'prod': 8719,'sum':8721,'minus':8722,'lowast':8727,'radic' :8730,'prop':8733,'infin':8734,'ang':8736,'and':87 43,'or':8744,'cap':8745,'cup':8746,'int':8747,'the re4':8756,'sim':8764,'cong':8773,'asymp':8776,'ne' :8800,'equiv':8801,'le':8804,'ge':8805,'sub':8834, 'sup':8835,'nsub':8836,'sube':8838,'supe':8839,'op lus':8853,'otimes':8855,'perp':8869,'sdot':8901,'l ceil':8968,'rceil':8969,'lfloor':8970,'rfloor':897 1,'lang':9001,'rang':9002,'loz':9674,'spades':9824 ,'clubs':9827,'hearts':9829,'diams':9830}