473,471 Members | 2,613 Online
Bytes | Software Development & Data Engineering Community
Create Post

Home Posts Topics Members FAQ

Regular expression to parse and split string into array

17 New Member
Hello everyone,

I am using a regular expression to parse a text string into various parts -- for ex: string "How do you do" will be changed to array with all the words and white spaces.

I am using the following code (which has been copied from internet)
Expand|Select|Wrap|Line Numbers
  1. <html>
  2. <body>
  3.  
  4. <script type="text/javascript">
  5.  
Expand|Select|Wrap|Line Numbers
  1. var text = "Hello how@are you.com";
  2. var result = tokenize(text,true,true);
  3. document.write(result.join(','));
  4.  
  5. function tokenize(text,capture,noflatten)
  6. {
  7.         _normalizer_regex_str='(?:(?:^| +)["\'.\\-]+ *)|(?: *[\'".\\-]+(?: +|$)|@| +)';
  8.         _normalizer_regex=new RegExp(_normalizer_regex_str,'g');
  9.         _normalizer_regex_capture=new RegExp('('+_normalizer_regex_str+')','g');
  10.         return(noflatten?text:flatten_string(text)).split(capture?_normalizer_regex_capture:_normalizer_regex);
  11. }
  12.  
  13. function flatten_string(text)
  14. {
  15.         var    accents={a:/à|á|â|ã|ä|Ã¥/g,c:/ç/g,d:/ð/g,e:/è|é|ê|ë/g,i:/ì|Ã*|î|ï/g,n:/ñ/g,o:/ø|ö|õ|ô|ó|ò/g,u:/ü|û|ú|ù/g,y:/ÿ|ý/g,ae:/æ/g,oe:/Å“/g}
  16.  
  17.         text=text.toLowerCase();
  18.         for(var i in accents)
  19.         {
  20.             text=text.replace(accents[i],i);
  21.         }
  22.         return text;
  23. }   
Expand|Select|Wrap|Line Numbers
  1. </script>
  2.  
  3. </body>
  4. </html>
  5.  
This code is working fine in Mozilla Firefox 2.0 but not working fine in IE 7.0.
If you execute this code, you will see that the result in both browsers are different.
While firefox also returns the splitting delimiters as a part of the array, IE 7.0 seems to ignore the delimiters and simply pass back the array without the delimiters.

I am new to regular expresssions and not able to find out how this regular expression works (since it has been copied from internet).

If someone can help me fix the above code to return same results in case of IE7 and Firefox, that would be great help.

Thanks,
Rupinder
Sep 14 '07 #1
3 8882
acoder
16,027 Recognized Expert Moderator MVP
Changed the thread title to better describe the problem.

Read about regular expressions in Javascript here.
Sep 14 '07 #2
rupinderbatra
17 New Member
Hello Everyone,

I was able to find the solution to the problem. The original coder on the internet has extended the String.split function to achieve proper functionality.
Sharing it below for others to use:
Expand|Select|Wrap|Line Numbers
  1. String.prototype._split=String.prototype.split;
  2. String.prototype.split=function(separator,limit)
  3.     {
  4.         var flags="";
  5.         if(separator===null||limit===null)
  6.         {
  7.             return[];
  8.         }
  9.         else if(typeof separator=='string')
  10.         {
  11.             return this._split(separator,limit);
  12.         }
  13.         else if(separator===undefined)
  14.         {
  15.             return[this.toString()];
  16.         }
  17.         else if(separator instanceof RegExp)
  18.         {
  19.             if(!separator._2||!separator._1)
  20.             {
  21.                 flags=separator.toString().replace(/^[\S\s]+\//,"");
  22.                 if(!separator._1)
  23.                 {
  24.                     if(!separator.global)
  25.                     {
  26.                         separator._1=new RegExp(separator.source,"g"+flags);
  27.                     }
  28.                     else
  29.                     {
  30.                         separator._1=1;
  31.                     }
  32.                 }
  33.             }
  34.             separator1=separator._1==1?separator:separator._1;
  35.             var separator2=(separator._2?separator._2:separator._2=new RegExp("^"+separator1.source+"$",flags));
  36.             if(limit===undefined||limit<0)
  37.             {
  38.                 limit=false;
  39.             }
  40.             else
  41.             {
  42.                 limit=Math.floor(limit);
  43.                 if(!limit)return[];
  44.             }
  45.             var match,output=[],lastLastIndex=0,i=0;
  46.             while((limit?i++<=limit:true)&&(match=separator1.exec(this)))
  47.             {
  48.                 if((match[0].length===0)&&(separator1.lastIndex>match.index))
  49.                 {
  50.                     separator1.lastIndex--;
  51.                 }
  52.                 if(separator1.lastIndex>lastLastIndex)
  53.                 {
  54.                     if(match.length>1)
  55.                     {
  56.                         match[0].replace(separator2,function(){for(var j=1;j<arguments.length-2;j++){if(arguments[j]===undefined)match[j]=undefined;}});
  57.                     }
  58.                     output=output.concat(this.substring(lastLastIndex,match.index),(match.index===this.length?[]:match.slice(1)));
  59.                     lastLastIndex=separator1.lastIndex;
  60.                 }
  61.                 if(match[0].length===0)
  62.                 {
  63.                     separator1.lastIndex++;
  64.                 }
  65.             }
  66.             return(lastLastIndex===this.length)?(separator1.test("")?output:output.concat("")):(limit?output:output.concat(this.substring(lastLastIndex)));
  67.         }
  68.         else
  69.         {
  70.             return this._split(separator,limit);
  71.         }
  72.     }
  73.  
Thanks,
Rupinder
Sep 14 '07 #3
acoder
16,027 Recognized Expert Moderator MVP
Thanks for posting your solution. Glad to hear that you got it working. Post again any time if you have more questions.
Sep 14 '07 #4

Sign in to post your reply or Sign up for a free account.

Similar topics

8
by: Michael McGarry | last post by:
Hi, I am horrible with Regular Expressions, can anyone recommend a book on it? Also I am trying to parse the following string to extract the number after load average. ".... load average:...
11
by: Martin Robins | last post by:
I am trying to parse a string that is similar in form to an OLEDB connection string using regular expressions; in principle it is working, but certain character combinations in the string being...
0
by: Zachary Turner | last post by:
I am hopeing someone can help me with a regular expression. I want to use RegExp.Split, to split a string such as the following text_1 /text_3/text_4/.../text_n/ into an array that contains...
6
by: Craig Buchanan | last post by:
I have a string in the format "name" <address> that i would like to split into an array of two values. name should be the first value, address the second value. what does my regex pattern need to...
9
by: Schorschi | last post by:
Not having used regular expressions much, I need some help. Given a string... "This\0Guy\0Needs\0Some\0Help\0\0\0\0\0" Need result as array of strings... "This","Guy", "Needs", "Some", "Help" ...
8
by: moondaddy | last post by:
I'm writing an app in vb.net 1.1 and I need to parse strings that look similar to the one below. All 5 rows will make up one string. I have a form where a use can copy/paste data like what you...
25
by: Mike | last post by:
I have a regular expression (^(.+)(?=\s*).*\1 ) that results in matches. I would like to get what the actual regular expression is. In other words, when I apply ^(.+)(?=\s*).*\1 to " HEART...
11
by: Steve | last post by:
Hi All, I'm having a tough time converting the following regex.compile patterns into the new re.compile format. There is also a differences in the regsub.sub() vs. re.sub() Could anyone lend...
0
by: ahropak | last post by:
Hi, I have a question regarding a regular expression within Regex.Split() method which will help me to break each line of code into tokens. I'm trying to parse some lines of C# source code and...
0
by: Hystou | last post by:
There are some requirements for setting up RAID: 1. The motherboard and BIOS support RAID configuration. 2. The motherboard has 2 or more available SATA protocol SSD/HDD slots (including MSATA, M.2...
0
marktang
by: marktang | last post by:
ONU (Optical Network Unit) is one of the key components for providing high-speed Internet services. Its primary function is to act as an endpoint device located at the user's premises. However,...
0
by: Hystou | last post by:
Most computers default to English, but sometimes we require a different language, especially when relocating. Forgot to request a specific language before your computer shipped? No problem! You can...
0
Oralloy
by: Oralloy | last post by:
Hello folks, I am unable to find appropriate documentation on the type promotion of bit-fields when using the generalised comparison operator "<=>". The problem is that using the GNU compilers,...
0
jinu1996
by: jinu1996 | last post by:
In today's digital age, having a compelling online presence is paramount for businesses aiming to thrive in a competitive landscape. At the heart of this digital strategy lies an intricately woven...
1
isladogs
by: isladogs | last post by:
The next Access Europe User Group meeting will be on Wednesday 1 May 2024 starting at 18:00 UK time (6PM UTC+1) and finishing by 19:30 (7.30PM). In this session, we are pleased to welcome a new...
0
by: conductexam | last post by:
I have .net C# application in which I am extracting data from word file and save it in database particularly. To store word all data as it is I am converting the whole word file firstly in HTML and...
0
by: adsilva | last post by:
A Windows Forms form does not have the event Unload, like VB6. What one acts like?
0
by: 6302768590 | last post by:
Hai team i want code for transfer the data from one system to another through IP address by using C# our system has to for every 5mins then we have to update the data what the data is updated ...

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.