I have these two methods that are chewing up a ton of CPU time in my
application. Does anyone have any suggestions on how to optimize them or
rewrite them without Regex? The most time-consuming operation by a long-shot
is the regex.Replace. Basically the only purpose of it is to remove spaces
between opening/closing tags and the element name. Surely there is a better
way.
private string FixupJavascript(string htmlCode)
{
string result = FixupHTML(htmlCode);
result = result.Replace("\\", "\\\\");
result = result.Replace("\"", "\\\"");
Regex regex = new Regex("<\\s*script", RegexOptions.IgnoreCase);
result = regex.Replace(result, "<scr\" + \"ipt");
regex = new Regex("<\\s*\\/script>", RegexOptions.IgnoreCase);
result = regex.Replace(result, "</scr\" + \"ipt>");
result = result.Replace(Environment.NewLine,
string.Empty).Replace("\t", string.Empty).Replace("\n",
string.Empty).Trim();
return result.Trim();
}
private string FixupHTML(string htmlCode)
{
string result = htmlCode;
Regex regex = new Regex("<\\s*\\/*html[^>]*>",
RegexOptions.IgnoreCase);
result = regex.Replace(result, string.Empty);
regex = new Regex("<\\s*head.*>.*<\\s*\\/head[^>]*>",
RegexOptions.IgnoreCase);
result = regex.Replace(result, string.Empty);
regex = new Regex("<\\s*\\/*body[^>]*>",
RegexOptions.IgnoreCase);
result = regex.Replace(result, string.Empty);
regex = new Regex("[^:]\\/\\/.*", RegexOptions.IgnoreCase);
result = regex.Replace(result, string.Empty);
return result;
}