How do I make sure that my entire string was parsed when I call a
pyparsing element's parseString method? Here's a dramatically
simplified version of my problem:
py> import pyparsing as pp
py> match = pp.Word(pp.nums )
py> def parse_num(s, loc, toks):
.... n, = toks
.... return int(n) + 10
....
py> match.setParseA ction(parse_num )
W:(0123...)
py> match.parseStri ng('121abc')
([131], {})
I want to know (somehow) that when I called match.parseStri ng(), there
was some of the string left over (in this case, 'abc') after the parse
was complete. How can I do this? (I don't think I can do character
counting; all my internal setParseAction( ) functions return non-strings).
STeVe
P.S. FWIW, I've included the real code below. I need to throw an
exception when I call the parseString method of cls._root_node or
cls._root_nodes and the entire string is not consumed.
----------------------------------------------------------------------
# some character classes
printables_tran s = _pp.printables. translate
word_chars = printables_tran s(_id_trans, '()')
syn_tag_chars = printables_tran s(_id_trans, '()-=')
func_tag_chars = printables_tran s(_id_trans, '()-=0123456789')
# basic tag components
sep = _pp.Literal('-').leaveWhitesp ace()
alt_sep = _pp.Literal('=' ).leaveWhitespa ce()
special_word = _pp.Combine(sep + _pp.Word(syn_ta g_chars) + sep)
supp_sep = (alt_sep | sep).suppress()
syn_word = _pp.Word(syn_ta g_chars).leaveW hitespace()
func_word = _pp.Word(func_t ag_chars).leave Whitespace()
id_word = _pp.Word(_pp.nu ms).leaveWhites pace()
# the different tag types
special_tag = special_word.se tResultsName('t ag')
syn_tag = syn_word.setRes ultsName('tag')
func_tags = _pp.ZeroOrMore( supp_sep + func_word)
func_tags = func_tags.setRe sultsName('func s')
id_tag = _pp.Optional(su pp_sep + id_word).setRes ultsName('id')
tags = special_tag | (syn_tag + func_tags + id_tag)
def get_tag(orig_st ring, tokens_start, tokens):
tokens = dict(tokens)
tag = tokens.pop('tag ')
if tag == '-NONE-':
tag = None
functions = list(tokens.pop ('funcs', []))
id = tokens.pop('id' , None)
return [dict(tag=tag, functions=funct ions, id=id)]
tags.setParseAc tion(get_tag)
# node parentheses
start = _pp.Literal('(' ).suppress()
end = _pp.Literal(')' ).suppress()
# words
word = _pp.Word(word_c hars).setResult sName('word')
# leaf nodes
leaf_node = tags + _pp.Optional(wo rd)
def get_leaf_node(o rig_string, tokens_start, tokens):
try:
tag_dict, word = tokens
word = cls._unescape(w ord)
except ValueError:
tag_dict, = tokens
word = None
return cls(word=word, **tag_dict)
leaf_node.setPa rseAction(get_l eaf_node)
# node, recursive
node = _pp.Forward()
# branch nodes
branch_node = tags + _pp.OneOrMore(n ode)
def get_branch_node (orig_string, tokens_start, tokens):
return cls(children=to kens[1:], **tokens[0])
branch_node.set ParseAction(get _branch_node)
# node, recursive
node << start + (branch_node | leaf_node) + end
# root node may have additional parentheses
cls._root_node = node | start + node + end
cls._root_nodes = _pp.OneOrMore(c ls._root_node)