Really hoping someone can help me out here with my deficient regex skills :)
I have a function which takes a string of HTML and replaces a term (word or phrase) with a link. The pupose is that I seek out terms which are in a glossary on our site, and automatically link to this definition. Its slightly complex becase certain elements have to be ignored, for exampleI dont want to add links within existing links, or for example link terms contained in e.g. <h1><h1> tags.
Anyway I have a function which I didnt actually write, hense the problem I have in modifying it.
The problem I have is that I dont want it to replace terms which are not WHOLE words. So for example if I an searching for the term "fund", it current does the replace if it finds the word "funds".
I have tried using the regex \b escape, but this doesnt seem to work.
Ay help would be much much appreciated.
Thanks in advance
John
Expand|Select|Wrap|Line Numbers
- // Replaces all instances of text match in HTML string, ignoring instances in HTML
- #region public static string PlainTextReplace(string html, string oldString, string newString, string Definition)
- // Regex matches for PlainTextReplace
- static Regex rxPlainText = new Regex(@"^[^\<]+", RegexOptions.IgnoreCase);
- static Regex rxTag = new Regex(@"</?\s*(?'tagname'[^>\s]+).*?>", RegexOptions.Compiled);
- static Regex[] rxForbiddenTags = new Regex[]{
- new Regex(@"^h\d$", RegexOptions.Compiled), // Matches <h?>
- new Regex("^a$", RegexOptions.Compiled) // Matches <a>
- };
- public static string PlainTextReplace(string html, string oldString, string urlString, string Definition)
- {
- int iStringPos=0;
- Stack tagStack = new Stack();
- StringBuilder sbResult = new StringBuilder();
- Match match;
- while (iStringPos < html.Length)
- {
- bool bContainsForbiddenTag = false;
- IEnumerator enumTags = tagStack.GetEnumerator();
- while (enumTags.MoveNext())
- {
- string sCurrentTag = (string) enumTags.Current;
- foreach (Regex rxForbiddenTag in rxForbiddenTags) // loop through all enclosing tags and check for forbidden ones.
- {
- match = rxForbiddenTag.Match(sCurrentTag);
- if (match.Success)
- {
- bContainsForbiddenTag = true;
- break;
- }
- }
- if (bContainsForbiddenTag)
- break;
- }
- //if (tagStack.Count == 0) // only perform replacement at tag depth 0.
- if (!bContainsForbiddenTag) // Ignores tag depth. Skips all text enclosed in one or more forbidden tags.
- {
- match = rxPlainText.Match(html, iStringPos, html.Length - iStringPos);
- if (match.Success)
- {
- string searchString = match.Value;
- int index = searchString.ToLower().IndexOf(oldString.ToLower());
- if (index != -1)
- {
- searchString = "<a href=\"/" + Globals.SiteAlias + "/jargon-" + urlString + ".aspx\" class=\"jargon\" title=\"" + Definition + "\">" + searchString.Substring(index, oldString.Length) + "</a>";
- }
- // Do the replace and move on.
- sbResult.Append( Regex.Replace(match.Value, oldString, searchString, RegexOptions.IgnoreCase) );
- // THIS DOESNT WORK
- //sbResult.Append( Regex.Replace(match.Value, @"\b" + oldString + "\b", searchString, RegexOptions.IgnoreCase) );
- iStringPos = match.Index + match.Length;
- }
- }
- match = rxTag.Match(html, iStringPos, html.Length - iStringPos);
- if (match.Success)
- {
- if (match.Value.StartsWith("</"))
- {
- try
- {
- if(match.Groups["tagname"].Value.ToLower().Trim().Equals(((string) tagStack.Peek())))
- tagStack.Pop();
- }
- catch
- {
- }
- }
- else if (match.Value.EndsWith("/>") || match.Value.StartsWith("<!--"))
- {
- // ignore
- }
- else
- {
- tagStack.Push(match.Groups["tagname"].Value.ToLower().Trim());
- }
- sbResult.Append( html.Substring(iStringPos, match.Index + match.Length - iStringPos));
- iStringPos = match.Index + match.Length;
- }
- }
- return sbResult.ToString();
- }
- #endregion