473,785 Members | 2,831 Online
Bytes | Software Development & Data Engineering Community
+ Post

Home Posts Topics Members FAQ

expanding character entity references in javascript

Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.

Thanks,
Jim
Jul 23 '05 #1
3 2266
Jim Higson wrote:
Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


Btw, I'm using XHTML so setting innerHTML on a temp element and then reading
the contents isn't really an option. Under moz this gives an error because
innerHTML is read only.

Jim
Jul 23 '05 #2
> Does anyone know a technique in javascript to transform from (for example)
♥ to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


String.prototyp e.deentityify = function (o) {
var i, j, s = this, o = String.prototyp e.deentityify.d ata, v;
for (;;) {
i = s.lastIndexOf(' &');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototyp e.deentityify.d ata = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;". deentityify();

http://www.JSLint.com
Jul 23 '05 #3
Douglas Crockford wrote:
Does anyone know a technique in javascript to transform from (for
example) &hearts; to the char '♥'?

I'm doing this because I have to interpret some data I got over XHTMLHTTP
that isn't XML, but might contain some XML char entities.


String.prototyp e.deentityify = function (o) {
var i, j, s = this, o = String.prototyp e.deentityify.d ata, v;
for (;;) {
i = s.lastIndexOf(' &');
if (i < 0) {
break;
}
j = s.indexOf(';', i);
if (i + 1 >= j) {
break;
}
v = o[s.substring(i + 1, j)];
if (!v) {
break;
}
s = s.substring(0, i) + v + s.substring(j + 1);
}
return s;
}

String.prototyp e.deentityify.d ata = {
apos: "'",
lt: '<',
gt: '>'};

var s = "&lt;cool&gt;". deentityify();

http://www.JSLint.com

Thanks for the response, but I needed something that can handle any char
reference. So I built pu the hash with some js code generated from a quick
Perl script taking values from the W3C's XHTML DTD. Add a bit of regex and
I had it done straight away before I saw your response. Once I'd resigned
to coding this in javascript (instead of tricking the browser into decoding
the references) it was actually pretty easy.

I'll post the output in case anyone wants to do this (it's about 3k, or 1k
with content-type gzip)
var CHAR_REF_REGEX = /&(\w{2,7}|#\d{3 ,4});/g;
function expand_char_ref erences( str )
{
var rtn = "";
var hit;
var last_ref_end = 0;
while( (hit = CHAR_REF_REGEX. exec( str )) != null )
{
var charcode = new Number( hit[1] );

if( isNaN( charcode ) )
charcode = CHAR_ENTITIES[ hit[1].toLowerCase() ];

rtn += str.substring( last_ref_end , hit.index ) +
String.fromChar Code( charcode );
last_ref_end = hit.index + hit[0].length;
}
// if no matches, this will just be all of str:
rtn += str.substring( last_ref_end );
return rtn;
}
var
CHAR_ENTITIES={ 'nbsp':160,'iex cl':161,'cent': 162,'pound':163 ,'curren':164,' yen':165,'brvba r':166,'sect':1 67,'uml':168,'c opy':169,'ordf' :170,'laquo':17 1,'not':172,'sh y':173,'reg':17 4,'macr':175,'d eg':176,'plusmn ':177,'sup2':17 8,'sup3':179,'a cute':180,'micr o':181,'para':1 82,'middot':183 ,'cedil':184,'s up1':185,'ordm' :186,'raquo':18 7,'frac14':188, 'frac12':189,'f rac34':190,'iqu est':191,'agrav e':192,'aacute' :193,'acirc':19 4,'atilde':195, 'auml':196,'ari ng':197,'aelig' :198,'ccedil':1 99,'egrave':200 ,'eacute':201,' ecirc':202,'eum l':203,'igrave' :204,'iacute':2 05,'icirc':206, 'iuml':207,'eth ':208,'ntilde': 209,'ograve':21 0,'oacute':211, 'ocirc':212,'ot ilde':213,'ouml ':214,'times':2 15,'oslash':216 ,'ugrave':217,' uacute':218,'uc irc':219,'uuml' :220,'yacute':2 21,'thorn':222, 'szlig':223,'ag rave':224,'aacu te':225,'acirc' :226,'atilde':2 27,'auml':228,' aring':229,'ael ig':230,'ccedil ':231,'egrave': 232,'eacute':23 3,'ecirc':234,' euml':235,'igra ve':236,'iacute ':237,'icirc':2 38,'iuml':239,' eth':240,'ntild e':241,'ograve' :242,'oacute':2 43,'ocirc':244, 'otilde':245,'o uml':246,'divid e':247,'oslash' :248,'ugrave':2 49,'uacute':250 ,'ucirc':251,'u uml':252,'yacut e':253,'thorn': 254,'yuml':255, 'lt':38,'gt':62 ,'amp':38,'apos ':39,'quot':34, 'oelig':338,'oe lig':339,'scaro n':352,'scaron' :353,'yuml':376 ,'circ':710,'ti lde':732,'ensp' :8194,'emsp':81 95,'thinsp':820 1,'zwnj':8204,' zwj':8205,'lrm' :8206,'rlm':820 7,'ndash':8211, 'mdash':8212,'l squo':8216,'rsq uo':8217,'sbquo ':8218,'ldquo': 8220,'rdquo':82 21,'bdquo':8222 ,'dagger':8224, 'dagger':8225,' permil':8240,'l saquo':8249,'rs aquo':8250,'eur o':8364,'fnof': 402,'alpha':913 ,'beta':914,'ga mma':915,'delta ':916,'epsilon' :917,'zeta':918 ,'eta':919,'the ta':920,'iota': 921,'kappa':922 ,'lambda':923,' mu':924,'nu':92 5,'xi':926,'omi cron':927,'pi': 928,'rho':929,' sigma':931,'tau ':932,'upsilon' :933,'phi':934, 'chi':935,'psi' :936,'omega':93 7,'alpha':945,' beta':946,'gamm a':947,'delta': 948,'epsilon':9 49,'zeta':950,' eta':951,'theta ':952,'iota':95 3,'kappa':954,' lambda':955,'mu ':956,'nu':957, 'xi':958,'omicr on':959,'pi':96 0,'rho':961,'si gmaf':962,'sigm a':963,'tau':96 4,'upsilon':965 ,'phi':966,'chi ':967,'psi':968 ,'omega':969,'t hetasym':977,'u psih':978,'piv' :982,'bull':822 6,'hellip':8230 ,'prime':8242,' prime':8243,'ol ine':8254,'fras l':8260,'weierp ':8472,'image': 8465,'real':847 6,'trade':8482, 'alefsym':8501, 'larr':8592,'ua rr':8593,'rarr' :8594,'darr':85 95,'harr':8596, 'crarr':8629,'l arr':8656,'uarr ':8657,'rarr':8 658,'darr':8659 ,'harr':8660,'f orall':8704,'pa rt':8706,'exist ':8707,'empty': 8709,'nabla':87 11,'isin':8712, 'notin':8713,'n i':8715,'prod': 8719,'sum':8721 ,'minus':8722,' lowast':8727,'r adic':8730,'pro p':8733,'infin' :8734,'ang':873 6,'and':8743,'o r':8744,'cap':8 745,'cup':8746, 'int':8747,'the re4':8756,'sim' :8764,'cong':87 73,'asymp':8776 ,'ne':8800,'equ iv':8801,'le':8 804,'ge':8805,' sub':8834,'sup' :8835,'nsub':88 36,'sube':8838, 'supe':8839,'op lus':8853,'otim es':8855,'perp' :8869,'sdot':89 01,'lceil':8968 ,'rceil':8969,' lfloor':8970,'r floor':8971,'la ng':9001,'rang' :9002,'loz':967 4,'spades':9824 ,'clubs':9827,' hearts':9829,'d iams':9830}

Jul 23 '05 #4

This thread has been closed and replies have been disabled. Please start a new discussion.

Similar topics

76
15155
by: Zenobia | last post by:
How do I display character 151 (long hyphen) in XHTML (utf-8) ? Is there another character that will substitute? The W3C validation parser, http://validator.w3.org, tells me that this character and the ones around it are illegal - then, after resubmission it flags no errors. So, are there any illegal characters between 0 and 255 in the UTF-8 character set or is it just my imagination that the W3C validation parser thinks there are -...
19
3403
by: Ian | last post by:
I'm using the following meta tag with my documents: <meta http-equiv="Content-Type" content= "text/html; charset=us-ascii" /> and yet using character entities like &rsquo; and &mdash; It validates at W3C and WDG, and runs in standards compliance mode in Firefox 0.9. What I'm wondering is, is this a good practice? I assume my pages will load faster if declared as using the
1
1851
by: User Axes Dean Eyed | last post by:
The following statement does not yield what I expect it to: var txtNode = document.createTextNode("ESI&minus;"); txtNode holds a value of "ESI&minus;" instead of "ESI-" where '-' is the character entity reference for the minus sign, that I expect. I have tried other variations. I am working with Mozilla and IE because I want to make NO use of extensions/browser-dependent code, and both (try to) implement the DOM to varying degrees.
50
4356
by: The Bicycling Guitarist | last post by:
A browser conforming to HTML 4.0 is required to recognize &#number; notations. If I use XHTML 1.0 and charset UTF-8 though, does &eacute; have as much support as é ? Sometimes when I run the TIDY utility on my code, it replaces my character notations with weird looking things I don't recognize. Also, when I converted to UTF-8 from ISO-8859-1, I discovered many special characters
2
1299
by: jesl | last post by:
Group, I have created a User Control with the property "Html" of type string. If I declare this control on an ASPX page with the value "<b>This is an entity: &lt;</b>" for the property "Html", the ASP.NET parser seems to automatically convert the entity reference "&lt;" to it's corresponding character value "<". For example, if the tagprefix and tagname for the user control is "dn" and "test":
40
3122
by: Shmuel (Seymour J.) Metz | last post by:
I'd like to include some Hebrew names in a web page. HTML 4 doesn't appear to include character attributes for ISO-8859-8. I'd prefer avoiding numeric references, e.g., "&#x05E9;&#x05DE;&#x05D5;&#x05D0;&#x05DC;". Is there currently a standardized set of character attributes for Hebrew? If so, is there a downloadable set of definitions for those attributes? Thanks. --
44
9499
by: Kulgan | last post by:
Hi I am struggling to find definitive information on how IE 5.5, 6 and 7 handle character input (I am happy with the display of text). I have two main questions: 1. Does IE automaticall convert text input in HTML forms from the
4
2094
by: Paul Rubin | last post by:
I'm new to xml mongering so forgive me if there's an obvious well-known answer to this. It's not real obvious from the library documentation I've looked at so far. Basically I have to munch of a bunch of xml files which contain character entities like &uacute; which are apparently nonstandard. They appear in w3.org tables but xml.etree.cElementTree.ElementTree.parse barfs at them and xmllint barfs at them. Basically I want to know if...
18
2764
by: R. P. | last post by:
I wonder how to indicate in a stylesheet that character entities in an element are not to be transformed as would be the case in XML-to-XML transforms. I want to keep those &amp; &quot; and other character entities in the output as they are in the input. My stylesheet converts them the '&' etc., making the output XML not formed properly. R.P
0
9645
marktang
by: marktang | last post by:
ONU (Optical Network Unit) is one of the key components for providing high-speed Internet services. Its primary function is to act as an endpoint device located at the user's premises. However, people are often confused as to whether an ONU can Work As a Router. In this blog post, we’ll explore What is ONU, What Is Router, ONU & Router’s main usage, and What is the difference between ONU and Router. Let’s take a closer look ! Part I. Meaning of...
0
10324
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers, it seems that the internal comparison operator "<=>" tries to promote arguments from unsigned to signed. This is as boiled down as I can make it. Here is my compilation command: g++-12 -std=c++20 -Wnarrowing bit_field.cpp Here is the code in...
0
10147
jinu1996
by: jinu1996 | last post by:
In today's digital age, having a compelling online presence is paramount for businesses aiming to thrive in a competitive landscape. At the heart of this digital strategy lies an intricately woven tapestry of website design and digital marketing. It's not merely about having a website; it's about crafting an immersive digital experience that captivates audiences and drives business growth. The Art of Business Website Design Your website is...
1
10090
by: Hystou | last post by:
Overview: Windows 11 and 10 have less user interface control over operating system update behaviour than previous versions of Windows. In Windows 11 and 10, there is no way to turn off the Windows Update option using the Control Panel or Settings app; it automatically checks for updates and installs any it finds, whether you like it or not. For most users, this new feature is actually very convenient. If you want to control the update process,...
0
6739
by: conductexam | last post by:
I have .net C# application in which I am extracting data from word file and save it in database particularly. To store word all data as it is I am converting the whole word file firstly in HTML and then checking html paragraph one by one. At the time of converting from word file to html my equations which are in the word document file was convert into image. Globals.ThisAddIn.Application.ActiveDocument.Select();...
0
5380
by: TSSRALBI | last post by:
Hello I'm a network technician in training and I need your help. I am currently learning how to create and manage the different types of VPNs and I have a question about LAN-to-LAN VPNs. The last exercise I practiced was to create a LAN-to-LAN VPN between two Pfsense firewalls, by using IPSEC protocols. I succeeded, with both firewalls in the same network. But I'm wondering if it's possible to do the same thing, with 2 Pfsense firewalls...
0
5511
by: adsilva | last post by:
A Windows Forms form does not have the event Unload, like VB6. What one acts like?
2
3645
muto222
by: muto222 | last post by:
How can i add a mobile payment intergratation into php mysql website.
3
2879
bsmnconsultancy
by: bsmnconsultancy | last post by:
In today's digital era, a well-designed website is crucial for businesses looking to succeed. Whether you're a small business owner or a large corporation in Toronto, having a strong online presence can significantly impact your brand's success. BSMN Consultancy, a leader in Website Development in Toronto offers valuable insights into creating effective websites that not only look great but also perform exceptionally well. In this comprehensive...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.