By using this site, you agree to our updated Privacy Policy and our Terms of Use. Manage your Cookies Settings.
446,383 Members | 1,993 Online
Bytes IT Community
+ Ask a Question
Need help? Post your question and get tips & solutions from a community of 446,383 IT Pros & Developers. It's quick & easy.

Regex performance issue

P: n/a
Hi all,

Sorry for the lengthy post but as I learned I should post
concise-and-complete code.

So the code belows shows that the execution of ValidateAddress consumes a
lot of time. In the test it is called a 100 times but in my real app it
could be called 50000 or more times.

So my question is if it is somehow possible to speed this up and if so how
this can be done.

Thanks a lot in advance,

Bart

------ Code -----

using System;
using System.Text.RegularExpressions;

namespace ValidateAddress_speed_test
{
class Program
{
#region Regular expression strings

private const string dbBoolAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)(DBX)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string dbMemAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)(DBB|DBW|DBD|DBR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";

private const string boolAddress_pattern =
@"^(M|E|A)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string memAddress_pattern =
@"^(EB|EW|ED|AB|AW|AD|MB|MW|MD|MR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";

#endregion

private static void ValidateAddress(string address)
{
if (address == string.Empty)
throw new ArgumentOutOfRangeException("The addres cannot be
an empty string.");
Regex dbBool_Regex = new Regex(dbBoolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbBool_Regex.IsMatch(address))
return;

Regex dbMem_Regex = new Regex(dbMemAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbMem_Regex.IsMatch(address))
return;

Regex boolMem_Regex = new Regex(boolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (boolMem_Regex.IsMatch(address))
return;

Regex Mem_Regex = new Regex(memAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (Mem_Regex.IsMatch(address))
return;
throw new ArgumentOutOfRangeException(string.Format("{0} is not
a valid address.", address));
}
static void Main(string[] args)
{
Console.WriteLine("Test started...");
System.Diagnostics.Stopwatch sw = new
System.Diagnostics.Stopwatch();
sw.Start();
for (int i = 0; i < 100; i++)
{
//ValidateAddress("DB0.DBX0.0");
//ValidateAddress("DB0.DBW0");
//ValidateAddress("M0.0");
ValidateAddress("MB0");
}

sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds.ToString( ) + " ms");
Console.WriteLine("Press any key to quit");
Console.ReadLine();
}
}
}

Oct 10 '08 #1
Share this Question
Share on Google+
6 Replies


P: n/a
Hello Bart,

In your validateAddress function you're recompiling the same regexes over
and over again. A compiled regex is faster than an uncompiled one, but the
compilation takes time.

To solve this, put your regexes in a private static readonly Regex instance
and reuse that. Like this:

private static readonly Regex dbBoolAddressRegex = new Regex(dbBoolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

then from your validate method, use this instance.

Be sure to read up on thread safety. I'm nnot sure if you'll need to make
sure calls to the regex instances are synchronized. But that is something
you'll probably find in the docs, or which doesn't apply to you anyway.

Jesse
Hi all,

Sorry for the lengthy post but as I learned I should post
concise-and-complete code.

So the code belows shows that the execution of ValidateAddress
consumes a lot of time. In the test it is called a 100 times but in my
real app it could be called 50000 or more times.

So my question is if it is somehow possible to speed this up and if so
how this can be done.

Thanks a lot in advance,

Bart

------ Code -----

using System;
using System.Text.RegularExpressions;
namespace ValidateAddress_speed_test
{
class Program
{
#region Regular expression strings
private const string dbBoolAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-
6][0-5][0-3][0-6])(\.)(DBX)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9
][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string dbMemAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-
6][0-5][0-3][0-6])(\.)(DBB|DBW|DBD|DBR)([0-9]|[1-9][0-9]|[1-9][0-9][0-
9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";
private const string boolAddress_pattern =
@"^(M|E|A)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6]
[0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string memAddress_pattern =
@"^(EB|EW|ED|AB|AW|AD|MB|MW|MD|MR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1
-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";
#endregion

private static void ValidateAddress(string address)
{
if (address == string.Empty)
throw new ArgumentOutOfRangeException("The addres
cannot be
an empty string.");
Regex dbBool_Regex = new Regex(dbBoolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbBool_Regex.IsMatch(address))
return;
Regex dbMem_Regex = new Regex(dbMemAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbMem_Regex.IsMatch(address))
return;
Regex boolMem_Regex = new Regex(boolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (boolMem_Regex.IsMatch(address))
return;
Regex Mem_Regex = new Regex(memAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (Mem_Regex.IsMatch(address))
return;
throw new ArgumentOutOfRangeException(string.Format("{0}
is not
a valid address.", address));
}
static void Main(string[] args)
{
Console.WriteLine("Test started...");
System.Diagnostics.Stopwatch sw = new
System.Diagnostics.Stopwatch();
sw.Start();
for (int i = 0; i < 100; i++)
{
//ValidateAddress("DB0.DBX0.0");
//ValidateAddress("DB0.DBW0");
//ValidateAddress("M0.0");
ValidateAddress("MB0");
}
sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds.ToString( ) + "
ms");
Console.WriteLine("Press any key to quit");
Console.ReadLine();
}
}
}
--
Jesse Houwing
jesse.houwing at sogeti.nl
Oct 10 '08 #2

P: n/a
Further to Jesse's point - the Regex class is itself immutable; it is
my /understanding/ that methods like IsMatch etc are thread-safe. MSDN
doesn't make it very clear, though.

Marc
Oct 10 '08 #3

P: n/a
bart brought next idea :
Hi all,

Sorry for the lengthy post but as I learned I should post
concise-and-complete code.

So the code belows shows that the execution of ValidateAddress consumes a lot
of time. In the test it is called a 100 times but in my real app it could be
called 50000 or more times.

So my question is if it is somehow possible to speed this up and if so how
this can be done.

Thanks a lot in advance,

Bart

------ Code -----

using System;
using System.Text.RegularExpressions;

namespace ValidateAddress_speed_test
{
class Program
{
#region Regular expression strings

private const string dbBoolAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)(DBX)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string dbMemAddress_pattern =
@"^(DB)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)(DBB|DBW|DBD|DBR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";

private const string boolAddress_pattern =
@"^(M|E|A)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])(\.)[0-7]$";
private const string memAddress_pattern =
@"^(EB|EW|ED|AB|AW|AD|MB|MW|MD|MR)([0-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-6][0-6][0-5][0-3][0-6])$";

#endregion

private static void ValidateAddress(string address)
{
if (address == string.Empty)
throw new ArgumentOutOfRangeException("The addres cannot be
an empty string.");
Regex dbBool_Regex = new Regex(dbBoolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbBool_Regex.IsMatch(address))
return;

Regex dbMem_Regex = new Regex(dbMemAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (dbMem_Regex.IsMatch(address))
return;

Regex boolMem_Regex = new Regex(boolAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (boolMem_Regex.IsMatch(address))
return;

Regex Mem_Regex = new Regex(memAddress_pattern,
RegexOptions.Compiled | RegexOptions.IgnoreCase);

if (Mem_Regex.IsMatch(address))
return;
throw new ArgumentOutOfRangeException(string.Format("{0} is not a
valid address.", address));
}
static void Main(string[] args)
{
Console.WriteLine("Test started...");
System.Diagnostics.Stopwatch sw = new
System.Diagnostics.Stopwatch();
sw.Start();
for (int i = 0; i < 100; i++)
{
//ValidateAddress("DB0.DBX0.0");
//ValidateAddress("DB0.DBW0");
//ValidateAddress("M0.0");
ValidateAddress("MB0");
}

sw.Stop();
Console.WriteLine(sw.ElapsedMilliseconds.ToString( ) + " ms");
Console.WriteLine("Press any key to quit");
Console.ReadLine();
}
}
}
Compiling a regex will cost some time, saving a bit when you use it.
The best way (I think) to use a compiled regex:
Make a static readonly Regex variable with that compiled expression,
then use it multiple times.
This means you get the compile-cost just once and the speed benefit
(which in my experience is not huge but still present) every time.

Hans Kesting
Oct 10 '08 #4

P: n/a
Ah, found it:

http://msdn.microsoft.com/en-us/libr...ons.regex.aspx

"The Regex class is immutable (read-only) and is inherently thread
safe. Regex objects can be created on any thread and shared between
threads."
Oct 10 '08 #5

P: n/a
>
To solve this, put your regexes in a private static readonly Regex
instance and reuse that. Like this:

private static readonly Regex dbBoolAddressRegex = new
Regex(dbBoolAddress_pattern, RegexOptions.Compiled |
RegexOptions.IgnoreCase);
Thanks,

This is a huge performance boost :)

A 100000 times takes now about 763 ms

So this is great...

Bart
Oct 10 '08 #6

P: n/a
Hello Bart,
>To solve this, put your regexes in a private static readonly Regex
instance and reuse that. Like this:

private static readonly Regex dbBoolAddressRegex = new
Regex(dbBoolAddress_pattern, RegexOptions.Compiled |
RegexOptions.IgnoreCase);
Thanks,

This is a huge performance boost :)

A 100000 times takes now about 763 ms

So this is great...
You're welcome :)

--
Jesse Houwing
jesse.houwing at sogeti.nl
Oct 10 '08 #7

This discussion thread is closed

Replies have been disabled for this discussion.