473,729 Members | 2,340 Online
Bytes | Software Development & Data Engineering Community
+ Post

Home Posts Topics Members FAQ

Regular expression to parse and split string into array

17 New Member
Hello everyone,

I am using a regular expression to parse a text string into various parts -- for ex: string "How do you do" will be changed to array with all the words and white spaces.

I am using the following code (which has been copied from internet)
Expand|Select|Wrap|Line Numbers
  1. <html>
  2. <body>
  3.  
  4. <script type="text/javascript">
  5.  
Expand|Select|Wrap|Line Numbers
  1. var text = "Hello how@are you.com";
  2. var result = tokenize(text,true,true);
  3. document.write(result.join(','));
  4.  
  5. function tokenize(text,capture,noflatten)
  6. {
  7.         _normalizer_regex_str='(?:(?:^| +)["\'.\\-]+ *)|(?: *[\'".\\-]+(?: +|$)|@| +)';
  8.         _normalizer_regex=new RegExp(_normalizer_regex_str,'g');
  9.         _normalizer_regex_capture=new RegExp('('+_normalizer_regex_str+')','g');
  10.         return(noflatten?text:flatten_string(text)).split(capture?_normalizer_regex_capture:_normalizer_regex);
  11. }
  12.  
  13. function flatten_string(text)
  14. {
  15.         var    accents={a:/à|á|â|ã|ä|Ã¥/g,c:/ç/g,d:/ð/g,e:/è|é|ê|ë/g,i:/ì|Ã*|î|ï/g,n:/ñ/g,o:/ø|ö|õ|ô|ó|ò/g,u:/ü|û|ú|ù/g,y:/ÿ|ý/g,ae:/æ/g,oe:/Å“/g}
  16.  
  17.         text=text.toLowerCase();
  18.         for(var i in accents)
  19.         {
  20.             text=text.replace(accents[i],i);
  21.         }
  22.         return text;
  23. }   
Expand|Select|Wrap|Line Numbers
  1. </script>
  2.  
  3. </body>
  4. </html>
  5.  
This code is working fine in Mozilla Firefox 2.0 but not working fine in IE 7.0.
If you execute this code, you will see that the result in both browsers are different.
While firefox also returns the splitting delimiters as a part of the array, IE 7.0 seems to ignore the delimiters and simply pass back the array without the delimiters.

I am new to regular expresssions and not able to find out how this regular expression works (since it has been copied from internet).

If someone can help me fix the above code to return same results in case of IE7 and Firefox, that would be great help.

Thanks,
Rupinder
Sep 14 '07 #1
3 8920
acoder
16,027 Recognized Expert Moderator MVP
Changed the thread title to better describe the problem.

Read about regular expressions in Javascript here.
Sep 14 '07 #2
rupinderbatra
17 New Member
Hello Everyone,

I was able to find the solution to the problem. The original coder on the internet has extended the String.split function to achieve proper functionality.
Sharing it below for others to use:
Expand|Select|Wrap|Line Numbers
  1. String.prototype._split=String.prototype.split;
  2. String.prototype.split=function(separator,limit)
  3.     {
  4.         var flags="";
  5.         if(separator===null||limit===null)
  6.         {
  7.             return[];
  8.         }
  9.         else if(typeof separator=='string')
  10.         {
  11.             return this._split(separator,limit);
  12.         }
  13.         else if(separator===undefined)
  14.         {
  15.             return[this.toString()];
  16.         }
  17.         else if(separator instanceof RegExp)
  18.         {
  19.             if(!separator._2||!separator._1)
  20.             {
  21.                 flags=separator.toString().replace(/^[\S\s]+\//,"");
  22.                 if(!separator._1)
  23.                 {
  24.                     if(!separator.global)
  25.                     {
  26.                         separator._1=new RegExp(separator.source,"g"+flags);
  27.                     }
  28.                     else
  29.                     {
  30.                         separator._1=1;
  31.                     }
  32.                 }
  33.             }
  34.             separator1=separator._1==1?separator:separator._1;
  35.             var separator2=(separator._2?separator._2:separator._2=new RegExp("^"+separator1.source+"$",flags));
  36.             if(limit===undefined||limit<0)
  37.             {
  38.                 limit=false;
  39.             }
  40.             else
  41.             {
  42.                 limit=Math.floor(limit);
  43.                 if(!limit)return[];
  44.             }
  45.             var match,output=[],lastLastIndex=0,i=0;
  46.             while((limit?i++<=limit:true)&&(match=separator1.exec(this)))
  47.             {
  48.                 if((match[0].length===0)&&(separator1.lastIndex>match.index))
  49.                 {
  50.                     separator1.lastIndex--;
  51.                 }
  52.                 if(separator1.lastIndex>lastLastIndex)
  53.                 {
  54.                     if(match.length>1)
  55.                     {
  56.                         match[0].replace(separator2,function(){for(var j=1;j<arguments.length-2;j++){if(arguments[j]===undefined)match[j]=undefined;}});
  57.                     }
  58.                     output=output.concat(this.substring(lastLastIndex,match.index),(match.index===this.length?[]:match.slice(1)));
  59.                     lastLastIndex=separator1.lastIndex;
  60.                 }
  61.                 if(match[0].length===0)
  62.                 {
  63.                     separator1.lastIndex++;
  64.                 }
  65.             }
  66.             return(lastLastIndex===this.length)?(separator1.test("")?output:output.concat("")):(limit?output:output.concat(this.substring(lastLastIndex)));
  67.         }
  68.         else
  69.         {
  70.             return this._split(separator,limit);
  71.         }
  72.     }
  73.  
Thanks,
Rupinder
Sep 14 '07 #3
acoder
16,027 Recognized Expert Moderator MVP
Thanks for posting your solution. Glad to hear that you got it working. Post again any time if you have more questions.
Sep 14 '07 #4

Sign in to post your reply or Sign up for a free account.

Similar topics

8
2428
by: Michael McGarry | last post by:
Hi, I am horrible with Regular Expressions, can anyone recommend a book on it? Also I am trying to parse the following string to extract the number after load average. ".... load average: 0.04, 0.02, 0.01" how can I extract this number with RE or otherwise?
11
3912
by: Martin Robins | last post by:
I am trying to parse a string that is similar in form to an OLEDB connection string using regular expressions; in principle it is working, but certain character combinations in the string being parsed can completely wreck it. The string I am trying to parse is as follows: commandText=insert into (Text) values (@message + N': ' + @category);commandType=StoredProcedure; message=@message; category=@category I am looking to retrive name value...
0
636
by: Zachary Turner | last post by:
I am hopeing someone can help me with a regular expression. I want to use RegExp.Split, to split a string such as the following text_1 /text_3/text_4/.../text_n/ into an array that contains the following elements: text_1 text_2 text_3
6
255
by: Craig Buchanan | last post by:
I have a string in the format "name" <address> that i would like to split into an array of two values. name should be the first value, address the second value. what does my regex pattern need to be? If the regex doesn't find occurances of two double quotes and an occurance of < and an occurance of >, will i get a null string array? btw, is there a difference between: dim X() as string and
9
1791
by: Schorschi | last post by:
Not having used regular expressions much, I need some help. Given a string... "This\0Guy\0Needs\0Some\0Help\0\0\0\0\0" Need result as array of strings... "This","Guy", "Needs", "Some", "Help" Where '\0' is a literal zero-byte. I think I need two regular expressions? One to strip the multiple instances of '\0' bytes, and another to split the string.
8
5038
by: moondaddy | last post by:
I'm writing an app in vb.net 1.1 and I need to parse strings that look similar to the one below. All 5 rows will make up one string. I have a form where a use can copy/paste data like what you see below from excel, word, notepad, etc.. into a textbox on my form. I need to break each line into 2 numbers which I'll use as parameters for another function. in all cases each line will be separated with a vbNewline and in most cases the 2...
25
5161
by: Mike | last post by:
I have a regular expression (^(.+)(?=\s*).*\1 ) that results in matches. I would like to get what the actual regular expression is. In other words, when I apply ^(.+)(?=\s*).*\1 to " HEART (CONDUCTION DEFECT) 37.33/2 HEART (CONDUCTION DEFECT) WITH CATHETER 37.34/2 " the expression is "HEART (CONDUCTION DEFECT)". How do I gain access to the expression (not the matches) at runtime? Thanks, Mike
11
3108
by: Steve | last post by:
Hi All, I'm having a tough time converting the following regex.compile patterns into the new re.compile format. There is also a differences in the regsub.sub() vs. re.sub() Could anyone lend a hand? import regsub
0
2592
by: ahropak | last post by:
Hi, I have a question regarding a regular expression within Regex.Split() method which will help me to break each line of code into tokens. I'm trying to parse some lines of C# source code and split them into tokens. My logic is very simple: construct a regular expression with 'operators and punctuators' which will serve as delimiters and split a string into array of tokens including those delimiters. For example: I have the...
0
8763
by: Hystou | last post by:
Most computers default to English, but sometimes we require a different language, especially when relocating. Forgot to request a specific language before your computer shipped? No problem! You can effortlessly switch the default language on Windows 10 without reinstalling. I'll walk you through it. First, let's disable language synchronization. With a Microsoft account, language settings sync across devices. To prevent any complications,...
0
9427
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers, it seems that the internal comparison operator "<=>" tries to promote arguments from unsigned to signed. This is as boiled down as I can make it. Here is my compilation command: g++-12 -std=c++20 -Wnarrowing bit_field.cpp Here is the code in...
0
9284
jinu1996
by: jinu1996 | last post by:
In today's digital age, having a compelling online presence is paramount for businesses aiming to thrive in a competitive landscape. At the heart of this digital strategy lies an intricately woven tapestry of website design and digital marketing. It's not merely about having a website; it's about crafting an immersive digital experience that captivates audiences and drives business growth. The Art of Business Website Design Your website is...
1
9202
by: Hystou | last post by:
Overview: Windows 11 and 10 have less user interface control over operating system update behaviour than previous versions of Windows. In Windows 11 and 10, there is no way to turn off the Windows Update option using the Control Panel or Settings app; it automatically checks for updates and installs any it finds, whether you like it or not. For most users, this new feature is actually very convenient. If you want to control the update process,...
0
9148
tracyyun
by: tracyyun | last post by:
Dear forum friends, With the development of smart home technology, a variety of wireless communication protocols have appeared on the market, such as Zigbee, Z-Wave, Wi-Fi, Bluetooth, etc. Each protocol has its own unique characteristics and advantages, but as a user who is planning to build a smart home system, I am a bit confused by the choice of these technologies. I'm particularly interested in Zigbee because I've heard it does some...
0
6022
by: conductexam | last post by:
I have .net C# application in which I am extracting data from word file and save it in database particularly. To store word all data as it is I am converting the whole word file firstly in HTML and then checking html paragraph one by one. At the time of converting from word file to html my equations which are in the word document file was convert into image. Globals.ThisAddIn.Application.ActiveDocument.Select();...
0
4528
by: TSSRALBI | last post by:
Hello I'm a network technician in training and I need your help. I am currently learning how to create and manage the different types of VPNs and I have a question about LAN-to-LAN VPNs. The last exercise I practiced was to create a LAN-to-LAN VPN between two Pfsense firewalls, by using IPSEC protocols. I succeeded, with both firewalls in the same network. But I'm wondering if it's possible to do the same thing, with 2 Pfsense firewalls...
0
4796
by: adsilva | last post by:
A Windows Forms form does not have the event Unload, like VB6. What one acts like?
2
2683
muto222
by: muto222 | last post by:
How can i add a mobile payment intergratation into php mysql website.

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.