473,233 Members | 1,402 Online
Bytes | Software Development & Data Engineering Community
Post Job

Home Posts Topics Members FAQ

Join Bytes to post your question to a community of 473,233 software developers and data experts.

expanding character entity references in javascript

Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.

Thanks,
Jim
Jul 23 '05 #1
3 2224
Jim Higson wrote:
Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


Btw, I'm using XHTML so setting innerHTML on a temp element and then reading
the contents isn't really an option. Under moz this gives an error because
innerHTML is read only.

Jim
Jul 23 '05 #2
> Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


String.prototype.deentityify = function (o) {
var i, j, s = this, o = String.prototype.deentityify.data, v;
for (;;) {
i = s.lastIndexOf('&');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototype.deentityify.data = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;".deentityify();

http://www.JSLint.com
Jul 23 '05 #3
Douglas Crockford wrote:
Does anyone know a technique in javascript to transform from (for
example) &hearts; to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


String.prototype.deentityify = function (o) {
var i, j, s = this, o = String.prototype.deentityify.data, v;
for (;;) {
i = s.lastIndexOf('&');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototype.deentityify.data = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;".deentityify();

http://www.JSLint.com

Thanks for the response, but I needed something that can handle any char
reference. So I built pu the hash with some js code generated from a quick
Perl script taking values from the W3C's XHTML DTD. Add a bit of regex and
I had it done straight away before I saw your response. Once I'd resigned
to coding this in javascript (instead of tricking the browser into decoding
the references) it was actually pretty easy.

I'll post the output in case anyone wants to do this (it's about 3k, or 1k
with content-type gzip)
var CHAR_REF_REGEX = /&(\w{2,7}|#\d{3,4});/g;
function expand_char_references( str )
{
var rtn = "";
var hit;
var last_ref_end = 0;
while( (hit = CHAR_REF_REGEX.exec( str )) != null )
{
var charcode = new Number( hit[1] );

if( isNaN( charcode ) )
charcode = CHAR_ENTITIES[ hit[1].toLowerCase() ];

rtn += str.substring( last_ref_end , hit.index ) +
String.fromCharCode( charcode );
last_ref_end = hit.index + hit[0].length;
}
// if no matches, this will just be all of str:
rtn += str.substring( last_ref_end );
return rtn;
}
var
CHAR_ENTITIES={'nbsp':160,'iexcl':161,'cent':162,' pound':163,'curren':164,'yen':165,'brvbar':166,'se ct':167,'uml':168,'copy':169,'ordf':170,'laquo':17 1,'not':172,'shy':173,'reg':174,'macr':175,'deg':1 76,'plusmn':177,'sup2':178,'sup3':179,'acute':180, 'micro':181,'para':182,'middot':183,'cedil':184,'s up1':185,'ordm':186,'raquo':187,'frac14':188,'frac 12':189,'frac34':190,'iquest':191,'agrave':192,'aa cute':193,'acirc':194,'atilde':195,'auml':196,'ari ng':197,'aelig':198,'ccedil':199,'egrave':200,'eac ute':201,'ecirc':202,'euml':203,'igrave':204,'iacu te':205,'icirc':206,'iuml':207,'eth':208,'ntilde': 209,'ograve':210,'oacute':211,'ocirc':212,'otilde' :213,'ouml':214,'times':215,'oslash':216,'ugrave': 217,'uacute':218,'ucirc':219,'uuml':220,'yacute':2 21,'thorn':222,'szlig':223,'agrave':224,'aacute':2 25,'acirc':226,'atilde':227,'auml':228,'aring':229 ,'aelig':230,'ccedil':231,'egrave':232,'eacute':23 3,'ecirc':234,'euml':235,'igrave':236,'iacute':237 ,'icirc':238,'iuml':239,'eth':240,'ntilde':241,'og rave':242,'oacute':243,'ocirc':244,'otilde':245,'o uml':246,'divide':247,'oslash':248,'ugrave':249,'u acute':250,'ucirc':251,'uuml':252,'yacute':253,'th orn':254,'yuml':255,'lt':38,'gt':62,'amp':38,'apos ':39,'quot':34,'oelig':338,'oelig':339,'scaron':35 2,'scaron':353,'yuml':376,'circ':710,'tilde':732,' ensp':8194,'emsp':8195,'thinsp':8201,'zwnj':8204,' zwj':8205,'lrm':8206,'rlm':8207,'ndash':8211,'mdas h':8212,'lsquo':8216,'rsquo':8217,'sbquo':8218,'ld quo':8220,'rdquo':8221,'bdquo':8222,'dagger':8224, 'dagger':8225,'permil':8240,'lsaquo':8249,'rsaquo' :8250,'euro':8364,'fnof':402,'alpha':913,'beta':91 4,'gamma':915,'delta':916,'epsilon':917,'zeta':918 ,'eta':919,'theta':920,'iota':921,'kappa':922,'lam bda':923,'mu':924,'nu':925,'xi':926,'omicron':927, 'pi':928,'rho':929,'sigma':931,'tau':932,'upsilon' :933,'phi':934,'chi':935,'psi':936,'omega':937,'al pha':945,'beta':946,'gamma':947,'delta':948,'epsil on':949,'zeta':950,'eta':951,'theta':952,'iota':95 3,'kappa':954,'lambda':955,'mu':956,'nu':957,'xi': 958,'omicron':959,'pi':960,'rho':961,'sigmaf':962, 'sigma':963,'tau':964,'upsilon':965,'phi':966,'chi ':967,'psi':968,'omega':969,'thetasym':977,'upsih' :978,'piv':982,'bull':8226,'hellip':8230,'prime':8 242,'prime':8243,'oline':8254,'frasl':8260,'weierp ':8472,'image':8465,'real':8476,'trade':8482,'alef sym':8501,'larr':8592,'uarr':8593,'rarr':8594,'dar r':8595,'harr':8596,'crarr':8629,'larr':8656,'uarr ':8657,'rarr':8658,'darr':8659,'harr':8660,'forall ':8704,'part':8706,'exist':8707,'empty':8709,'nabl a':8711,'isin':8712,'notin':8713,'ni':8715,'prod': 8719,'sum':8721,'minus':8722,'lowast':8727,'radic' :8730,'prop':8733,'infin':8734,'ang':8736,'and':87 43,'or':8744,'cap':8745,'cup':8746,'int':8747,'the re4':8756,'sim':8764,'cong':8773,'asymp':8776,'ne' :8800,'equiv':8801,'le':8804,'ge':8805,'sub':8834, 'sup':8835,'nsub':8836,'sube':8838,'supe':8839,'op lus':8853,'otimes':8855,'perp':8869,'sdot':8901,'l ceil':8968,'rceil':8969,'lfloor':8970,'rfloor':897 1,'lang':9001,'rang':9002,'loz':9674,'spades':9824 ,'clubs':9827,'hearts':9829,'diams':9830}

Jul 23 '05 #4

This thread has been closed and replies have been disabled. Please start a new discussion.

Similar topics

76
by: Zenobia | last post by:
How do I display character 151 (long hyphen) in XHTML (utf-8) ? Is there another character that will substitute? The W3C validation parser, http://validator.w3.org, tells me that this character...
19
by: Ian | last post by:
I'm using the following meta tag with my documents: <meta http-equiv="Content-Type" content= "text/html; charset=us-ascii" /> and yet using character entities like &rsquo; and &mdash; It...
1
by: User Axes Dean Eyed | last post by:
The following statement does not yield what I expect it to: var txtNode = document.createTextNode("ESI&minus;"); txtNode holds a value of "ESI&minus;" instead of "ESI-" where '-' is the...
50
by: The Bicycling Guitarist | last post by:
A browser conforming to HTML 4.0 is required to recognize &#number; notations. If I use XHTML 1.0 and charset UTF-8 though, does &eacute; have as much support as é ? Sometimes when I run...
2
by: jesl | last post by:
Group, I have created a User Control with the property "Html" of type string. If I declare this control on an ASPX page with the value "<b>This is an entity: &lt;</b>" for the property "Html", the...
40
by: Shmuel (Seymour J.) Metz | last post by:
I'd like to include some Hebrew names in a web page. HTML 4 doesn't appear to include character attributes for ISO-8859-8. I'd prefer avoiding numeric references, e.g.,...
44
by: Kulgan | last post by:
Hi I am struggling to find definitive information on how IE 5.5, 6 and 7 handle character input (I am happy with the display of text). I have two main questions: 1. Does IE automaticall...
4
by: Paul Rubin | last post by:
I'm new to xml mongering so forgive me if there's an obvious well-known answer to this. It's not real obvious from the library documentation I've looked at so far. Basically I have to munch of a...
18
by: R. P. | last post by:
I wonder how to indicate in a stylesheet that character entities in an element are not to be transformed as would be the case in XML-to-XML transforms. I want to keep those &amp; &quot; and other character...
3
isladogs
by: isladogs | last post by:
The next Access Europe meeting will be on Wednesday 3 Jan 2024 starting at 18:00 UK time (6PM UTC) and finishing at about 19:15 (7.15PM). For other local times, please check World Time Buddy In...
0
by: jianzs | last post by:
Introduction Cloud-native applications are conventionally identified as those designed and nurtured on cloud infrastructure. Such applications, rooted in cloud technologies, skillfully benefit from...
0
by: abbasky | last post by:
### Vandf component communication method one: data sharing ​ Vandf components can achieve data exchange through data sharing, state sharing, events, and other methods. Vandf's data exchange method...
2
isladogs
by: isladogs | last post by:
The next Access Europe meeting will be on Wednesday 7 Feb 2024 starting at 18:00 UK time (6PM UTC) and finishing at about 19:30 (7.30PM). In this month's session, the creator of the excellent VBE...
0
by: stefan129 | last post by:
Hey forum members, I'm exploring options for SSL certificates for multiple domains. Has anyone had experience with multi-domain SSL certificates? Any recommendations on reliable providers or specific...
0
by: MeoLessi9 | last post by:
I have VirtualBox installed on Windows 11 and now I would like to install Kali on a virtual machine. However, on the official website, I see two options: "Installer images" and "Virtual machines"....
0
by: DolphinDB | last post by:
The formulas of 101 quantitative trading alphas used by WorldQuant were presented in the paper 101 Formulaic Alphas. However, some formulas are complex, leading to challenges in calculation. Take...
0
by: Aftab Ahmad | last post by:
Hello Experts! I have written a code in MS Access for a cmd called "WhatsApp Message" to open WhatsApp using that very code but the problem is that it gives a popup message everytime I clicked on...
0
isladogs
by: isladogs | last post by:
The next Access Europe meeting will be on Wednesday 6 Mar 2024 starting at 18:00 UK time (6PM UTC) and finishing at about 19:15 (7.15PM). In this month's session, we are pleased to welcome back...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.