469,306 Members | 2,444 Online
Bytes | Developer Community
New Post

Home Posts Topics Members FAQ

Post your question to a community of 469,306 developers. It's quick & easy.

expanding character entity references in javascript

Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.

Thanks,
Jim
Jul 23 '05 #1
3 2070
Jim Higson wrote:
Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


Btw, I'm using XHTML so setting innerHTML on a temp element and then reading
the contents isn't really an option. Under moz this gives an error because
innerHTML is read only.

Jim
Jul 23 '05 #2
> Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


String.prototype.deentityify = function (o) {
var i, j, s = this, o = String.prototype.deentityify.data, v;
for (;;) {
i = s.lastIndexOf('&');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototype.deentityify.data = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;".deentityify();

http://www.JSLint.com
Jul 23 '05 #3
Douglas Crockford wrote:
Does anyone know a technique in javascript to transform from (for
example) &hearts; to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


String.prototype.deentityify = function (o) {
var i, j, s = this, o = String.prototype.deentityify.data, v;
for (;;) {
i = s.lastIndexOf('&');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototype.deentityify.data = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;".deentityify();

http://www.JSLint.com

Thanks for the response, but I needed something that can handle any char
reference. So I built pu the hash with some js code generated from a quick
Perl script taking values from the W3C's XHTML DTD. Add a bit of regex and
I had it done straight away before I saw your response. Once I'd resigned
to coding this in javascript (instead of tricking the browser into decoding
the references) it was actually pretty easy.

I'll post the output in case anyone wants to do this (it's about 3k, or 1k
with content-type gzip)
var CHAR_REF_REGEX = /&(\w{2,7}|#\d{3,4});/g;
function expand_char_references( str )
{
var rtn = "";
var hit;
var last_ref_end = 0;
while( (hit = CHAR_REF_REGEX.exec( str )) != null )
{
var charcode = new Number( hit[1] );

if( isNaN( charcode ) )
charcode = CHAR_ENTITIES[ hit[1].toLowerCase() ];

rtn += str.substring( last_ref_end , hit.index ) +
String.fromCharCode( charcode );
last_ref_end = hit.index + hit[0].length;
}
// if no matches, this will just be all of str:
rtn += str.substring( last_ref_end );
return rtn;
}
var
CHAR_ENTITIES={'nbsp':160,'iexcl':161,'cent':162,' pound':163,'curren':164,'yen':165,'brvbar':166,'se ct':167,'uml':168,'copy':169,'ordf':170,'laquo':17 1,'not':172,'shy':173,'reg':174,'macr':175,'deg':1 76,'plusmn':177,'sup2':178,'sup3':179,'acute':180, 'micro':181,'para':182,'middot':183,'cedil':184,'s up1':185,'ordm':186,'raquo':187,'frac14':188,'frac 12':189,'frac34':190,'iquest':191,'agrave':192,'aa cute':193,'acirc':194,'atilde':195,'auml':196,'ari ng':197,'aelig':198,'ccedil':199,'egrave':200,'eac ute':201,'ecirc':202,'euml':203,'igrave':204,'iacu te':205,'icirc':206,'iuml':207,'eth':208,'ntilde': 209,'ograve':210,'oacute':211,'ocirc':212,'otilde' :213,'ouml':214,'times':215,'oslash':216,'ugrave': 217,'uacute':218,'ucirc':219,'uuml':220,'yacute':2 21,'thorn':222,'szlig':223,'agrave':224,'aacute':2 25,'acirc':226,'atilde':227,'auml':228,'aring':229 ,'aelig':230,'ccedil':231,'egrave':232,'eacute':23 3,'ecirc':234,'euml':235,'igrave':236,'iacute':237 ,'icirc':238,'iuml':239,'eth':240,'ntilde':241,'og rave':242,'oacute':243,'ocirc':244,'otilde':245,'o uml':246,'divide':247,'oslash':248,'ugrave':249,'u acute':250,'ucirc':251,'uuml':252,'yacute':253,'th orn':254,'yuml':255,'lt':38,'gt':62,'amp':38,'apos ':39,'quot':34,'oelig':338,'oelig':339,'scaron':35 2,'scaron':353,'yuml':376,'circ':710,'tilde':732,' ensp':8194,'emsp':8195,'thinsp':8201,'zwnj':8204,' zwj':8205,'lrm':8206,'rlm':8207,'ndash':8211,'mdas h':8212,'lsquo':8216,'rsquo':8217,'sbquo':8218,'ld quo':8220,'rdquo':8221,'bdquo':8222,'dagger':8224, 'dagger':8225,'permil':8240,'lsaquo':8249,'rsaquo' :8250,'euro':8364,'fnof':402,'alpha':913,'beta':91 4,'gamma':915,'delta':916,'epsilon':917,'zeta':918 ,'eta':919,'theta':920,'iota':921,'kappa':922,'lam bda':923,'mu':924,'nu':925,'xi':926,'omicron':927, 'pi':928,'rho':929,'sigma':931,'tau':932,'upsilon' :933,'phi':934,'chi':935,'psi':936,'omega':937,'al pha':945,'beta':946,'gamma':947,'delta':948,'epsil on':949,'zeta':950,'eta':951,'theta':952,'iota':95 3,'kappa':954,'lambda':955,'mu':956,'nu':957,'xi': 958,'omicron':959,'pi':960,'rho':961,'sigmaf':962, 'sigma':963,'tau':964,'upsilon':965,'phi':966,'chi ':967,'psi':968,'omega':969,'thetasym':977,'upsih' :978,'piv':982,'bull':8226,'hellip':8230,'prime':8 242,'prime':8243,'oline':8254,'frasl':8260,'weierp ':8472,'image':8465,'real':8476,'trade':8482,'alef sym':8501,'larr':8592,'uarr':8593,'rarr':8594,'dar r':8595,'harr':8596,'crarr':8629,'larr':8656,'uarr ':8657,'rarr':8658,'darr':8659,'harr':8660,'forall ':8704,'part':8706,'exist':8707,'empty':8709,'nabl a':8711,'isin':8712,'notin':8713,'ni':8715,'prod': 8719,'sum':8721,'minus':8722,'lowast':8727,'radic' :8730,'prop':8733,'infin':8734,'ang':8736,'and':87 43,'or':8744,'cap':8745,'cup':8746,'int':8747,'the re4':8756,'sim':8764,'cong':8773,'asymp':8776,'ne' :8800,'equiv':8801,'le':8804,'ge':8805,'sub':8834, 'sup':8835,'nsub':8836,'sube':8838,'supe':8839,'op lus':8853,'otimes':8855,'perp':8869,'sdot':8901,'l ceil':8968,'rceil':8969,'lfloor':8970,'rfloor':897 1,'lang':9001,'rang':9002,'loz':9674,'spades':9824 ,'clubs':9827,'hearts':9829,'diams':9830}

Jul 23 '05 #4

This discussion thread is closed

Replies have been disabled for this discussion.

Similar topics

19 posts views Thread by Ian | last post: by
1 post views Thread by User Axes Dean Eyed | last post: by
50 posts views Thread by The Bicycling Guitarist | last post: by
40 posts views Thread by Shmuel (Seymour J.) Metz | last post: by
44 posts views Thread by Kulgan | last post: by
4 posts views Thread by Paul Rubin | last post: by
reply views Thread by zhoujie | last post: by
1 post views Thread by Geralt96 | last post: by
By using this site, you agree to our Privacy Policy and Terms of Use.