By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
449,208 Members | 1,859 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 449,208 IT Pros & Developers. It's quick & easy.

expanding character entity references in javascript

P: n/a
Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.

Thanks,
Jim
Jul 23 '05 #1
Share this Question
Share on Google+
3 Replies


P: n/a
Jim Higson wrote:
Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


Btw, I'm using XHTML so setting innerHTML on a temp element and then reading
the contents isn't really an option. Under moz this gives an error because
innerHTML is read only.

Jim
Jul 23 '05 #2

P: n/a
> Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


String.prototype.deentityify = function (o) {
var i, j, s = this, o = String.prototype.deentityify.data, v;
for (;;) {
i = s.lastIndexOf('&');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototype.deentityify.data = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;".deentityify();

http://www.JSLint.com
Jul 23 '05 #3

P: n/a
Douglas Crockford wrote:
Does anyone know a technique in javascript to transform from (for
example) &hearts; to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


String.prototype.deentityify = function (o) {
var i, j, s = this, o = String.prototype.deentityify.data, v;
for (;;) {
i = s.lastIndexOf('&');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototype.deentityify.data = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;".deentityify();

http://www.JSLint.com

Thanks for the response, but I needed something that can handle any char
reference. So I built pu the hash with some js code generated from a quick
Perl script taking values from the W3C's XHTML DTD. Add a bit of regex and
I had it done straight away before I saw your response. Once I'd resigned
to coding this in javascript (instead of tricking the browser into decoding
the references) it was actually pretty easy.

I'll post the output in case anyone wants to do this (it's about 3k, or 1k
with content-type gzip)
var CHAR_REF_REGEX = /&(\w{2,7}|#\d{3,4});/g;
function expand_char_references( str )
{
var rtn = "";
var hit;
var last_ref_end = 0;
while( (hit = CHAR_REF_REGEX.exec( str )) != null )
{
var charcode = new Number( hit[1] );

if( isNaN( charcode ) )
charcode = CHAR_ENTITIES[ hit[1].toLowerCase() ];

rtn += str.substring( last_ref_end , hit.index ) +
String.fromCharCode( charcode );
last_ref_end = hit.index + hit[0].length;
}
// if no matches, this will just be all of str:
rtn += str.substring( last_ref_end );
return rtn;
}
var
CHAR_ENTITIES={'nbsp':160,'iexcl':161,'cent':162,' pound':163,'curren':164,'yen':165,'brvbar':166,'se ct':167,'uml':168,'copy':169,'ordf':170,'laquo':17 1,'not':172,'shy':173,'reg':174,'macr':175,'deg':1 76,'plusmn':177,'sup2':178,'sup3':179,'acute':180, 'micro':181,'para':182,'middot':183,'cedil':184,'s up1':185,'ordm':186,'raquo':187,'frac14':188,'frac 12':189,'frac34':190,'iquest':191,'agrave':192,'aa cute':193,'acirc':194,'atilde':195,'auml':196,'ari ng':197,'aelig':198,'ccedil':199,'egrave':200,'eac ute':201,'ecirc':202,'euml':203,'igrave':204,'iacu te':205,'icirc':206,'iuml':207,'eth':208,'ntilde': 209,'ograve':210,'oacute':211,'ocirc':212,'otilde' :213,'ouml':214,'times':215,'oslash':216,'ugrave': 217,'uacute':218,'ucirc':219,'uuml':220,'yacute':2 21,'thorn':222,'szlig':223,'agrave':224,'aacute':2 25,'acirc':226,'atilde':227,'auml':228,'aring':229 ,'aelig':230,'ccedil':231,'egrave':232,'eacute':23 3,'ecirc':234,'euml':235,'igrave':236,'iacute':237 ,'icirc':238,'iuml':239,'eth':240,'ntilde':241,'og rave':242,'oacute':243,'ocirc':244,'otilde':245,'o uml':246,'divide':247,'oslash':248,'ugrave':249,'u acute':250,'ucirc':251,'uuml':252,'yacute':253,'th orn':254,'yuml':255,'lt':38,'gt':62,'amp':38,'apos ':39,'quot':34,'oelig':338,'oelig':339,'scaron':35 2,'scaron':353,'yuml':376,'circ':710,'tilde':732,' ensp':8194,'emsp':8195,'thinsp':8201,'zwnj':8204,' zwj':8205,'lrm':8206,'rlm':8207,'ndash':8211,'mdas h':8212,'lsquo':8216,'rsquo':8217,'sbquo':8218,'ld quo':8220,'rdquo':8221,'bdquo':8222,'dagger':8224, 'dagger':8225,'permil':8240,'lsaquo':8249,'rsaquo' :8250,'euro':8364,'fnof':402,'alpha':913,'beta':91 4,'gamma':915,'delta':916,'epsilon':917,'zeta':918 ,'eta':919,'theta':920,'iota':921,'kappa':922,'lam bda':923,'mu':924,'nu':925,'xi':926,'omicron':927, 'pi':928,'rho':929,'sigma':931,'tau':932,'upsilon' :933,'phi':934,'chi':935,'psi':936,'omega':937,'al pha':945,'beta':946,'gamma':947,'delta':948,'epsil on':949,'zeta':950,'eta':951,'theta':952,'iota':95 3,'kappa':954,'lambda':955,'mu':956,'nu':957,'xi': 958,'omicron':959,'pi':960,'rho':961,'sigmaf':962, 'sigma':963,'tau':964,'upsilon':965,'phi':966,'chi ':967,'psi':968,'omega':969,'thetasym':977,'upsih' :978,'piv':982,'bull':8226,'hellip':8230,'prime':8 242,'prime':8243,'oline':8254,'frasl':8260,'weierp ':8472,'image':8465,'real':8476,'trade':8482,'alef sym':8501,'larr':8592,'uarr':8593,'rarr':8594,'dar r':8595,'harr':8596,'crarr':8629,'larr':8656,'uarr ':8657,'rarr':8658,'darr':8659,'harr':8660,'forall ':8704,'part':8706,'exist':8707,'empty':8709,'nabl a':8711,'isin':8712,'notin':8713,'ni':8715,'prod': 8719,'sum':8721,'minus':8722,'lowast':8727,'radic' :8730,'prop':8733,'infin':8734,'ang':8736,'and':87 43,'or':8744,'cap':8745,'cup':8746,'int':8747,'the re4':8756,'sim':8764,'cong':8773,'asymp':8776,'ne' :8800,'equiv':8801,'le':8804,'ge':8805,'sub':8834, 'sup':8835,'nsub':8836,'sube':8838,'supe':8839,'op lus':8853,'otimes':8855,'perp':8869,'sdot':8901,'l ceil':8968,'rceil':8969,'lfloor':8970,'rfloor':897 1,'lang':9001,'rang':9002,'loz':9674,'spades':9824 ,'clubs':9827,'hearts':9829,'diams':9830}

Jul 23 '05 #4

This discussion thread is closed

Replies have been disabled for this discussion.