472,139 Members | 1,810 Online
Bytes | Software Development & Data Engineering Community
Post +

Home Posts Topics Members FAQ

Join Bytes to post your question to a community of 472,139 software developers and data experts.

Error checking using regex ?

I have the code below which parses an expression string and creates tokens.

Can anyone suggest the best of error checking for things like:

Valid variable only obj.attribute -whitespace allowed

test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
test( "ff*$24..55/ddr") #double .. and $ -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable

I can't see an efficient way of doing this so any suggestions appreciated.

TIA,

Guy

code:

import re
import time

re_par = '[\(\)]'
re_num = '[0-9]*\.?[0-9]+\E?[0-9]*'
re_opr = '[\*\/\+\-\^]'
re_cns = 'PI'
re_trg = 'SIN|COS|TAN|ASIN|ACOS|ATAN|SGN'
re_var = '[a-z_0-9\s]*\.?[a-z_0-9\s]*'

recom = re.compile( '(?P<token>%s|%s|%s|%s|%s|%s)'
%(re_par,re_num,re_opr,re_cns,re_trg,re_var) ,re.VERBOSE|re.IGNORECASE)

def test(str):
output = []
try:
r = recom.split(str)
for rr in r:
rr = rr.strip()
#test for blank string
if rr =='':
pass
else:
output.append(rr)
print output

except:
print 'error of some kind'

class stopwatch:

def __init__(self):

pass
def start(self):

self.t = time.time()
return 'starting timer'

def stop(self):

rstr = 'stopped at %f seconds' %(time.time() -self.t)
self.t = 0
return rstr

e = stopwatch()
print e.start()
test( "9" )
test( "9 + 3 + 6" )
test( "9 + 3 / 11" )
test( "( 9 + 3)" )
test( "(9+3) / 11" )
test( "9 - 12 - 6" )
test( "-9 - (12 - 6)" )
test( "2*3.14159" )
test( "3.1415926535*3.1415926535 / 10" )
test( "PI * PI / 10" )
test( "PI*PI/10" )
test( "PI^2" )
test( "6.02E23 * 8.048" )
test( "sin(PI/2)" )
test( "2^3^2" )
test( "2^9" )
test( "sgn(-2)" )
test( "sgn(0)" )
test( "sgn(0.1)" )
test( "ff*2" )
test( "ff*g g/2" )
test( "ff*2/dd.r r")
test( "5*4+300/(5-2)*(6+4)+4" )
test( "((5*4+300)/(5-2))*(6+4)+4" )
test( "(320/3)*10+4" )

#now test error expressions

test( "ff*2/dd.r..ss r") #additional ..ss and whitespace -invalid
variable
test( "ff*$24..55/ddr") #double .. -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable
#test( "ff*((w.w+3)-2") #no closing parentheses-to be tested when
evaluating expression

print e.stop()
Jul 18 '05 #1
3 1748
Am Dienstag, 8. Juni 2004 13:26 schrieb Guy Robinson:
I have the code below which parses an expression string and creates tokens.


You cannot parse expressions using regular expressions, and neither check them
for error, as the language specified by regular expressions is not
"intelligent" enough to match braces (read any book on complexity theory
primers, you need a machine with state, such as a deterministic stack
machine, to check for matching braces).

Your best bet to be able to check an expression, and also to be able to parse
it, is to write a context free grammar for your syntax, try to parse the
string you're evaluating, and in case parsing fails, to complain that the
expression is invalid. If you're parsing Python expressions, your best bet is
to call functions from the compile module (which create a code object from a
Python expression which is callable using exec).

HTH!

Heiko.

Jul 18 '05 #2
"Guy Robinson" <gu*@NOSPAM.r-e-d.co.nz> wrote in message
news:ca**********@lust.ihug.co.nz...
I have the code below which parses an expression string and creates tokens.
Can anyone suggest the best of error checking for things like:

Valid variable only obj.attribute -whitespace allowed

test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
test( "ff*$24..55/ddr") #double .. and $ -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable

I can't see an efficient way of doing this so any suggestions appreciated.

TIA,

Guy

<snip>

Guy -

Well, I recognize the test cases from an example that I include with
pyparsing. Are you trying to add support for variables to that example? If
so, here is the example, modified to support assignments to variables.

-- Paul

============================
# minimath.py (formerly fourfn.py)
#
# Demonstration of the parsing module, implementing a simple 4-function
expression parser,
# with support for scientific notation, and symbols for e and pi.
# Extended to add exponentiation and simple built-in functions.
# Extended to add variable assignment, storage, and evaluation, and
Python-like comments.
#
# Copyright 2003,2004 by Paul McGuire
#
from pyparsing import
Literal,CaselessLiteral,Word,Combine,Group,Optiona l,ZeroOrMore,OneOrMore,For
ward,nums,alphas,restOfLine,delimitedList
import math

variables = {}
exprStack = []

def pushFirst( str, loc, toks ):
global exprStack
if toks:
exprStack.append( toks[0] )
return toks

def assignVar( str, loc, toks ):
global exprStack
global variables
variables[ toks[0] ] = evaluateStack( exprStack )
pushFirst(str,loc,toks)
bnf = None
def BNF():
global bnf
if not bnf:
point = Literal( "." )
e = CaselessLiteral( "E" )
fnumber = Combine( Word( "+-"+nums, nums ) +
Optional( point + Optional( Word( nums ) ) ) +
Optional( e + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_$")
varident = delimitedList(ident,".",combine=True)

plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
pi = CaselessLiteral( "PI" )

expr = Forward()
atom = ( pi | e | fnumber | ident + lpar + expr + rpar |
varident ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )
factor = atom + ZeroOrMore( ( expop + expr ).setParseAction(
pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction(
pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction(
pushFirst ) )
assignment = (varident + "=" + expr).setParseAction( assignVar )

bnf = Optional( assignment | expr )

comment = "#" + restOfLine
bnf.ignore(comment)

return bnf

# map operator symbols to corresponding arithmetic operations
opn = { "+" : ( lambda a,b: a + b ),
"-" : ( lambda a,b: a - b ),
"*" : ( lambda a,b: a * b ),
"/" : ( lambda a,b: a / b ),
"^" : ( lambda a,b: a ** b ) }
fn = { "sin" : math.sin,
"cos" : math.cos,
"tan" : math.tan,
"abs" : abs,
"trunc" : ( lambda a: int(a) ),
"round" : ( lambda a: int(a+0.5) ),
"sgn" : ( lambda a: ( (a<0 and -1) or (a>0 and 1) or 0 ) ) }
def evaluateStack( s ):
global variables
if not s: return 0.0
op = s.pop()
if op in "+-*/^":
op2 = evaluateStack( s )
op1 = evaluateStack( s )
return opn[op]( op1, op2 )
elif op == "PI":
return 3.1415926535
elif op == "E":
return 2.718281828
elif op[0].isalpha():
if op in variables:
return variables[op]
fnarg = evaluateStack( s )
return (fn[op])( fnarg )
else:
return float( op )

if __name__ == "__main__":

def test( str ):
global exprStack
exprStack = []
results = BNF().parseString( str )
print str, "->", results, "=>", exprStack, "=", evaluateStack(
exprStack )

test( "9" )
test( "9 + 3 + 6" )
test( "9 + 3 / 11" )
test( "(9 + 3)" )
test( "(9+3) / 11" )
test( "9 - 12 - 6" )
test( "9 - (12 - 6)" )
test( "2*3.14159" )
test( "3.1415926535*3.1415926535 / 10" )
test( "PI * PI / 10" )
test( "PI*PI/10" )
test( "PI^2" )
test( "6.02E23 * 8.048" )
test( "e / 3" )
test( "sin(PI/2)" )
test( "trunc(E)" )
test( "E^PI" )
test( "2^3^2" )
test( "2^9" )
test( "sgn(-2)" )
test( "sgn(0)" )
test( "sgn(0.1)" )
test( "5*4+300/(5-2)*(6+4)+4" )
test( "((5*4+301)/(5-2))*(6+4)+4" )
test( "(321/3)*10+4" )
test( "# nothing but comments" )
test( "a = 2^10" )
test( "a^0.1 # same as 10th root of 1024" )
test( "c = a" )
test( "b=a" )
test( "b-c" )
Jul 18 '05 #3
Hi Paul,

Yep your examples :-) I'm using this as a learning experience and have
looked at your code but I have specific requirements for integration
into another application.

I'm using the regex to create a list of tokens to be processed into a
postfix processing string. This is then offloaded to another class that
processes the string for each database row.

The speed to generate the postffix string isn't important. But the speed
to process for each database row is.

Guy
"Guy Robinson" <gu*@NOSPAM.r-e-d.co.nz> wrote in message
news:ca**********@lust.ihug.co.nz...
I have the code below which parses an expression string and creates


tokens.
Can anyone suggest the best of error checking for things like:

Valid variable only obj.attribute -whitespace allowed

test( "ff*2/dd.r..ss r") #additional ..ss -invalid variable.
test( "ff*$24..55/ddr") #double .. and $ -invalid number
test( "ff*2/dd.r.ss r") #variable with double . -invalid variable

I can't see an efficient way of doing this so any suggestions appreciated.

TIA,

Guy


<snip>

Guy -

Well, I recognize the test cases from an example that I include with
pyparsing. Are you trying to add support for variables to that example? If
so, here is the example, modified to support assignments to variables.

-- Paul

============================
# minimath.py (formerly fourfn.py)
#
# Demonstration of the parsing module, implementing a simple 4-function
expression parser,
# with support for scientific notation, and symbols for e and pi.
# Extended to add exponentiation and simple built-in functions.
# Extended to add variable assignment, storage, and evaluation, and
Python-like comments.
#
# Copyright 2003,2004 by Paul McGuire
#
from pyparsing import
Literal,CaselessLiteral,Word,Combine,Group,Optiona l,ZeroOrMore,OneOrMore,For
ward,nums,alphas,restOfLine,delimitedList
import math

variables = {}
exprStack = []

def pushFirst( str, loc, toks ):
global exprStack
if toks:
exprStack.append( toks[0] )
return toks

def assignVar( str, loc, toks ):
global exprStack
global variables
variables[ toks[0] ] = evaluateStack( exprStack )
pushFirst(str,loc,toks)
bnf = None
def BNF():
global bnf
if not bnf:
point = Literal( "." )
e = CaselessLiteral( "E" )
fnumber = Combine( Word( "+-"+nums, nums ) +
Optional( point + Optional( Word( nums ) ) ) +
Optional( e + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_$")
varident = delimitedList(ident,".",combine=True)

plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
pi = CaselessLiteral( "PI" )

expr = Forward()
atom = ( pi | e | fnumber | ident + lpar + expr + rpar |
varident ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )
factor = atom + ZeroOrMore( ( expop + expr ).setParseAction(
pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction(
pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction(
pushFirst ) )
assignment = (varident + "=" + expr).setParseAction( assignVar )

bnf = Optional( assignment | expr )

comment = "#" + restOfLine
bnf.ignore(comment)

return bnf

# map operator symbols to corresponding arithmetic operations
opn = { "+" : ( lambda a,b: a + b ),
"-" : ( lambda a,b: a - b ),
"*" : ( lambda a,b: a * b ),
"/" : ( lambda a,b: a / b ),
"^" : ( lambda a,b: a ** b ) }
fn = { "sin" : math.sin,
"cos" : math.cos,
"tan" : math.tan,
"abs" : abs,
"trunc" : ( lambda a: int(a) ),
"round" : ( lambda a: int(a+0.5) ),
"sgn" : ( lambda a: ( (a<0 and -1) or (a>0 and 1) or 0 ) ) }
def evaluateStack( s ):
global variables
if not s: return 0.0
op = s.pop()
if op in "+-*/^":
op2 = evaluateStack( s )
op1 = evaluateStack( s )
return opn[op]( op1, op2 )
elif op == "PI":
return 3.1415926535
elif op == "E":
return 2.718281828
elif op[0].isalpha():
if op in variables:
return variables[op]
fnarg = evaluateStack( s )
return (fn[op])( fnarg )
else:
return float( op )

if __name__ == "__main__":

def test( str ):
global exprStack
exprStack = []
results = BNF().parseString( str )
print str, "->", results, "=>", exprStack, "=", evaluateStack(
exprStack )

test( "9" )
test( "9 + 3 + 6" )
test( "9 + 3 / 11" )
test( "(9 + 3)" )
test( "(9+3) / 11" )
test( "9 - 12 - 6" )
test( "9 - (12 - 6)" )
test( "2*3.14159" )
test( "3.1415926535*3.1415926535 / 10" )
test( "PI * PI / 10" )
test( "PI*PI/10" )
test( "PI^2" )
test( "6.02E23 * 8.048" )
test( "e / 3" )
test( "sin(PI/2)" )
test( "trunc(E)" )
test( "E^PI" )
test( "2^3^2" )
test( "2^9" )
test( "sgn(-2)" )
test( "sgn(0)" )
test( "sgn(0.1)" )
test( "5*4+300/(5-2)*(6+4)+4" )
test( "((5*4+301)/(5-2))*(6+4)+4" )
test( "(321/3)*10+4" )
test( "# nothing but comments" )
test( "a = 2^10" )
test( "a^0.1 # same as 10th root of 1024" )
test( "c = a" )
test( "b=a" )
test( "b-c" )

Jul 18 '05 #4

This discussion thread is closed

Replies have been disabled for this discussion.

Similar topics

8 posts views Thread by unndunn | last post: by
6 posts views Thread by Maurice LING | last post: by
6 posts views Thread by Mike P | last post: by
reply views Thread by Buddy Home | last post: by
2 posts views Thread by akhilesh.noida | last post: by
reply views Thread by leo001 | last post: by

By using Bytes.com and it's services, you agree to our Privacy Policy and Terms of Use.

To disable or enable advertisements and analytics tracking please visit the manage ads & tracking page.