Douglas Crockford wrote:
Does anyone know a technique in javascript to transform from (for
example) ♥ to the char '♥'?
I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.
String.prototype.deentityify = function (o) {
var i, j, s = this, o = String.prototype.deentityify.data, v;
for (;;) {
i = s.lastIndexOf('&');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}
String.prototype.deentityify.data = {
apos: "'",
lt: '<',
gt: '>'};
var s = "<cool>".deentityify();
http://www.JSLint.com
Thanks for the response, but I needed something that can handle any char
reference. So I built pu the hash with some js code generated from a quick
Perl script taking values from the W3C's XHTML DTD. Add a bit of regex and
I had it done straight away before I saw your response. Once I'd resigned
to coding this in javascript (instead of tricking the browser into decoding
the references) it was actually pretty easy.
I'll post the output in case anyone wants to do this (it's about 3k, or 1k
with content-type gzip)
var CHAR_REF_REGEX = /&(\w{2,7}|#\d{3,4});/g;
function expand_char_references( str )
{
var rtn = "";
var hit;
var last_ref_end = 0;
while( (hit = CHAR_REF_REGEX.exec( str )) != null )
{
var charcode = new Number( hit[1] );
if( isNaN( charcode ) )
charcode = CHAR_ENTITIES[ hit[1].toLowerCase() ];
rtn += str.substring( last_ref_end , hit.index ) +
String.fromCharCode( charcode );
last_ref_end = hit.index + hit[0].length;
}
// if no matches, this will just be all of str:
rtn += str.substring( last_ref_end );
return rtn;
}
var
CHAR_ENTITIES={'nbsp':160,'iexcl':161,'cent':162,' pound':163,'curren':164,'yen':165,'brvbar':166,'se ct':167,'uml':168,'copy':169,'ordf':170,'laquo':17 1,'not':172,'shy':173,'reg':174,'macr':175,'deg':1 76,'plusmn':177,'sup2':178,'sup3':179,'acute':180, 'micro':181,'para':182,'middot':183,'cedil':184,'s up1':185,'ordm':186,'raquo':187,'frac14':188,'frac 12':189,'frac34':190,'iquest':191,'agrave':192,'aa cute':193,'acirc':194,'atilde':195,'auml':196,'ari ng':197,'aelig':198,'ccedil':199,'egrave':200,'eac ute':201,'ecirc':202,'euml':203,'igrave':204,'iacu te':205,'icirc':206,'iuml':207,'eth':208,'ntilde': 209,'ograve':210,'oacute':211,'ocirc':212,'otilde' :213,'ouml':214,'times':215,'oslash':216,'ugrave': 217,'uacute':218,'ucirc':219,'uuml':220,'yacute':2 21,'thorn':222,'szlig':223,'agrave':224,'aacute':2 25,'acirc':226,'atilde':227,'auml':228,'aring':229 ,'aelig':230,'ccedil':231,'egrave':232,'eacute':23 3,'ecirc':234,'euml':235,'igrave':236,'iacute':237 ,'icirc':238,'iuml':239,'eth':240,'ntilde':241,'og rave':242,'oacute':243,'ocirc':244,'otilde':245,'o uml':246,'divide':247,'oslash':248,'ugrave':249,'u acute':250,'ucirc':251,'uuml':252,'yacute':253,'th orn':254,'yuml':255,'lt':38,'gt':62,'amp':38,'apos ':39,'quot':34,'oelig':338,'oelig':339,'scaron':35 2,'scaron':353,'yuml':376,'circ':710,'tilde':732,' ensp':8194,'emsp':8195,'thinsp':8201,'zwnj':8204,' zwj':8205,'lrm':8206,'rlm':8207,'ndash':8211,'mdas h':8212,'lsquo':8216,'rsquo':8217,'sbquo':8218,'ld quo':8220,'rdquo':8221,'bdquo':8222,'dagger':8224, 'dagger':8225,'permil':8240,'lsaquo':8249,'rsaquo' :8250,'euro':8364,'fnof':402,'alpha':913,'beta':91 4,'gamma':915,'delta':916,'epsilon':917,'zeta':918 ,'eta':919,'theta':920,'iota':921,'kappa':922,'lam bda':923,'mu':924,'nu':925,'xi':926,'omicron':927, 'pi':928,'rho':929,'sigma':931,'tau':932,'upsilon' :933,'phi':934,'chi':935,'psi':936,'omega':937,'al pha':945,'beta':946,'gamma':947,'delta':948,'epsil on':949,'zeta':950,'eta':951,'theta':952,'iota':95 3,'kappa':954,'lambda':955,'mu':956,'nu':957,'xi': 958,'omicron':959,'pi':960,'rho':961,'sigmaf':962, 'sigma':963,'tau':964,'upsilon':965,'phi':966,'chi ':967,'psi':968,'omega':969,'thetasym':977,'upsih' :978,'piv':982,'bull':8226,'hellip':8230,'prime':8 242,'prime':8243,'oline':8254,'frasl':8260,'weierp ':8472,'image':8465,'real':8476,'trade':8482,'alef sym':8501,'larr':8592,'uarr':8593,'rarr':8594,'dar r':8595,'harr':8596,'crarr':8629,'larr':8656,'uarr ':8657,'rarr':8658,'darr':8659,'harr':8660,'forall ':8704,'part':8706,'exist':8707,'empty':8709,'nabl a':8711,'isin':8712,'notin':8713,'ni':8715,'prod': 8719,'sum':8721,'minus':8722,'lowast':8727,'radic' :8730,'prop':8733,'infin':8734,'ang':8736,'and':87 43,'or':8744,'cap':8745,'cup':8746,'int':8747,'the re4':8756,'sim':8764,'cong':8773,'asymp':8776,'ne' :8800,'equiv':8801,'le':8804,'ge':8805,'sub':8834, 'sup':8835,'nsub':8836,'sube':8838,'supe':8839,'op lus':8853,'otimes':8855,'perp':8869,'sdot':8901,'l ceil':8968,'rceil':8969,'lfloor':8970,'rfloor':897 1,'lang':9001,'rang':9002,'loz':9674,'spades':9824 ,'clubs':9827,'hearts':9829,'diams':9830}