We have to create ’smart’ regular expression. Instead of write one line expression we prepare multi line text from which we shall generate our long expression. Here is a simple example.
Expand|Select|Wrap|Line Numbers
- space [\s/-]+
- word \w+
- words (?:{word}{space})*?{word}
- birthday (?<birthday>\d+\.d+\.d+)
- title {word}\.
- name {words}
- person {title}{space}{name}{space}{birthday}
Expand|Select|Wrap|Line Numbers
- \w+\.[\s/-]+(?:\w+[\s/-]+)*?\w+[\s/-]+(?<birthday>\d+.\d+.\d+)
Expand|Select|Wrap|Line Numbers
- public class Lexer
- {
- private NameValueCollection col;
- public Lexer()
- {
- col = new NameValueCollection();
- }
- public static Lexer Create(string resource)
- {
- StringReader sr = new StringReader(resource);
- Lexer lex =new Lexer();
- while (sr.Peek()>=0)
- {
- string line = sr.ReadLine();
- Match m = Regex.Match(line,@"([\w_]+)\s+(.*)");
- if (m.Success)
- {
- lex.col.Add(m.Groups[1].Value.Trim(), m.Groups[2].Value.Trim());
- }
- }
- sr.Close();
- return lex;
- }
- public string GetExpression(string name)
- {
- if (name == null || name.Length == 0) return string.Empty;
- string res = col[name];
- if (res == null) throw new ArgumentException("Template not found", name);
- bool needGroup = res.IndexOf('|') > 0;
- Regex reg = new Regex(@"(?<!\\p){([a-zA-Z][\w_]+)}");
- Match m = reg.Match(res);
- while (m.Success)
- {
- string token = m.Groups[1].Value;
- string exp = GetExpression(token);
- if (exp != null && exp.Length>0)
- res = res.Replace(@"{"+token+"}",exp);
- m = m.NextMatch();
- }
- string result = res;
- if (needGroup)
- {
- result = "(?:" + res + ")";
- }
- result = "(?#" + name + ")" + result;
- return result;
- }
- }
Then we can create class instance and get regular expression
Expand|Select|Wrap|Line Numbers
- Lexer lex = Lexer.Create(txtLexerText.Text);
- string expr = lex.GetExpression("person");
- Regex reg = new Regex(expr);