By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
434,797 Members | 1,262 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 434,797 IT Pros & Developers. It's quick & easy.

Replacing whole word using regex in C#

P: 1
Hi All

Really hoping someone can help me out here with my deficient regex skills :)

I have a function which takes a string of HTML and replaces a term (word or phrase) with a link. The pupose is that I seek out terms which are in a glossary on our site, and automatically link to this definition. Its slightly complex becase certain elements have to be ignored, for exampleI dont want to add links within existing links, or for example link terms contained in e.g. <h1><h1> tags.
Anyway I have a function which I didnt actually write, hense the problem I have in modifying it.

The problem I have is that I dont want it to replace terms which are not WHOLE words. So for example if I an searching for the term "fund", it current does the replace if it finds the word "funds".

I have tried using the regex \b escape, but this doesnt seem to work.

Ay help would be much much appreciated.

Thanks in advance
John
Expand|Select|Wrap|Line Numbers
  1. // Replaces all instances of text match in HTML string, ignoring instances in HTML 
  2. #region public static string PlainTextReplace(string html, string oldString, string newString, string Definition)
  3.  
  4. // Regex matches for PlainTextReplace
  5. static Regex rxPlainText = new Regex(@"^[^\<]+", RegexOptions.IgnoreCase);
  6. static Regex rxTag = new Regex(@"</?\s*(?'tagname'[^>\s]+).*?>", RegexOptions.Compiled);
  7. static Regex[] rxForbiddenTags = new Regex[]{
  8. new Regex(@"^h\d$", RegexOptions.Compiled), // Matches <h?>
  9. new Regex("^a$", RegexOptions.Compiled)    // Matches <a>
  10. }; 
  11.  
  12. public static string PlainTextReplace(string html, string oldString, string urlString, string Definition)
  13. {
  14.     int iStringPos=0;
  15.     Stack tagStack = new Stack();
  16.     StringBuilder sbResult = new StringBuilder();
  17.     Match match;
  18.     while (iStringPos < html.Length)
  19.     {
  20.         bool bContainsForbiddenTag = false;
  21.         IEnumerator enumTags = tagStack.GetEnumerator();
  22.  
  23.         while (enumTags.MoveNext())
  24.         {
  25.             string sCurrentTag = (string) enumTags.Current;
  26.             foreach (Regex rxForbiddenTag in rxForbiddenTags) // loop through all enclosing tags and check for forbidden ones.
  27.             {
  28.                 match = rxForbiddenTag.Match(sCurrentTag);
  29.                 if (match.Success)
  30.                 {
  31.                     bContainsForbiddenTag = true;
  32.                     break;
  33.                 }
  34.             }
  35.             if (bContainsForbiddenTag)
  36.                 break;
  37.         }
  38.  
  39.         //if (tagStack.Count == 0) // only perform replacement at tag depth 0.
  40.         if (!bContainsForbiddenTag) // Ignores tag depth. Skips all text enclosed in one or more forbidden tags.
  41.         {
  42.             match = rxPlainText.Match(html, iStringPos, html.Length - iStringPos);
  43.             if (match.Success)
  44.             {
  45.                 string searchString = match.Value;
  46.                 int index = searchString.ToLower().IndexOf(oldString.ToLower());
  47.                 if (index != -1)
  48.                 {
  49.                     searchString = "<a href=\"/" + Globals.SiteAlias + "/jargon-" + urlString + ".aspx\" class=\"jargon\" title=\"" + Definition + "\">" + searchString.Substring(index, oldString.Length) + "</a>";
  50.                 }
  51.  
  52.                 // Do the replace and move on.
  53.                 sbResult.Append( Regex.Replace(match.Value, oldString, searchString, RegexOptions.IgnoreCase) );
  54.                 // THIS DOESNT WORK
  55.                 //sbResult.Append( Regex.Replace(match.Value, @"\b" + oldString + "\b", searchString, RegexOptions.IgnoreCase) );
  56.                 iStringPos = match.Index + match.Length;
  57.             }
  58.         }
  59.  
  60.         match = rxTag.Match(html, iStringPos, html.Length - iStringPos);
  61.         if (match.Success)
  62.         {
  63.             if (match.Value.StartsWith("</"))
  64.             {
  65.                 try
  66.                 {
  67.                     if(match.Groups["tagname"].Value.ToLower().Trim().Equals(((string) tagStack.Peek())))
  68.                         tagStack.Pop();    
  69.                 }
  70.                 catch
  71.                 {
  72.  
  73.                 }                    
  74.             }
  75.             else if (match.Value.EndsWith("/>") || match.Value.StartsWith("<!--"))
  76.             {
  77.                 // ignore
  78.             }
  79.             else
  80.             {
  81.                 tagStack.Push(match.Groups["tagname"].Value.ToLower().Trim());
  82.             }
  83.             sbResult.Append( html.Substring(iStringPos, match.Index + match.Length - iStringPos));
  84.             iStringPos = match.Index + match.Length;
  85.         }
  86.  
  87.     }
  88.     return sbResult.ToString();
  89. }
  90. #endregion
Oct 2 '07 #1
Share this Question
Share on Google+
1 Reply


P: 1
Hi,

sbResult.Append(Regex.Replace(match.Value, "\\b" + oldString + "\\b", searchString, RegexOptions.IgnoreCase));

the \\b will work :)

Regards,
Gaurav Bhatt
<link removed>
Apr 27 '10 #2

Post your reply

Sign in to post your reply or Sign up for a free account.