File: //snap/google-cloud-cli/396/platform/gsutil/third_party/pyparsing/examples/inv_regex.py
#
# original file: https://raw.githubusercontent.com/pyparsing/pyparsing/pyparsing_3.0.9/examples/invRegex.py
#
# Copyright 2008, Paul McGuire
#
# pyparsing script to expand a regular expression into all possible matching strings
# Supports:
# - {n} and {m,n} repetition, but not unbounded + or * repetition
# - ? optional elements
# - [] character ranges
# - () grouping
# - | alternation
#
__all__ = ["count", "invert"]
from pyparsing import (
Literal,
one_of,
Empty,
printables,
ParserElement,
Combine,
SkipTo,
infix_notation,
ParseFatalException,
Word,
nums,
OpAssoc,
Suppress,
ParseResults,
srange,
)
ParserElement.enablePackrat()
class CharacterRangeEmitter:
def __init__(self, chars):
# remove duplicate chars in character range, but preserve original order
seen = set()
self.charset = "".join(seen.add(c) or c for c in chars if c not in seen)
def __str__(self):
return "[" + self.charset + "]"
def __repr__(self):
return "[" + self.charset + "]"
def make_generator(self):
def gen_chars():
yield from self.charset
return gen_chars
class OptionalEmitter:
def __init__(self, expr):
self.expr = expr
def make_generator(self):
def optional_gen():
yield ""
yield from self.expr.make_generator()()
return optional_gen
class DotEmitter:
def make_generator(self):
def dot_gen():
yield from printables
return dot_gen
class GroupEmitter:
def __init__(self, exprs):
self.exprs = ParseResults(exprs)
def make_generator(self):
def group_gen():
def recurse_list(elist):
if len(elist) == 1:
yield from elist[0].make_generator()()
else:
for s in elist[0].make_generator()():
for s2 in recurse_list(elist[1:]):
yield s + s2
if self.exprs:
yield from recurse_list(self.exprs)
return group_gen
class AlternativeEmitter:
def __init__(self, exprs):
self.exprs = exprs
def make_generator(self):
def alt_gen():
for e in self.exprs:
yield from e.make_generator()()
return alt_gen
class LiteralEmitter:
def __init__(self, lit):
self.lit = lit
def __str__(self):
return "Lit:" + self.lit
def __repr__(self):
return "Lit:" + self.lit
def make_generator(self):
def lit_gen():
yield self.lit
return lit_gen
def handle_range(toks):
return CharacterRangeEmitter(srange(toks[0]))
def handle_repetition(toks):
toks = toks[0]
if toks[1] in "*+":
raise ParseFatalException("", 0, "unbounded repetition operators not supported")
if toks[1] == "?":
return OptionalEmitter(toks[0])
if "count" in toks:
return GroupEmitter([toks[0]] * int(toks.count))
if "minCount" in toks:
mincount = int(toks.minCount)
maxcount = int(toks.maxCount)
optcount = maxcount - mincount
if optcount:
opt = OptionalEmitter(toks[0])
for i in range(1, optcount):
opt = OptionalEmitter(GroupEmitter([toks[0], opt]))
return GroupEmitter([toks[0]] * mincount + [opt])
else:
return [toks[0]] * mincount
def handle_literal(toks):
lit = ""
for t in toks:
if t[0] == "\\":
if t[1] == "t":
lit += "\t"
else:
lit += t[1]
else:
lit += t
return LiteralEmitter(lit)
def handle_macro(toks):
macro_char = toks[0][1]
if macro_char == "d":
return CharacterRangeEmitter("0123456789")
elif macro_char == "w":
return CharacterRangeEmitter(srange("[A-Za-z0-9_]"))
elif macro_char == "s":
return LiteralEmitter(" ")
else:
raise ParseFatalException(
"", 0, "unsupported macro character (" + macro_char + ")"
)
def handle_sequence(toks):
return GroupEmitter(toks[0])
def handle_dot():
return CharacterRangeEmitter(printables)
def handle_alternative(toks):
return AlternativeEmitter(toks[0])
_parser = None
def parser():
global _parser
if _parser is None:
ParserElement.set_default_whitespace_chars("")
lbrack, rbrack, lbrace, rbrace, lparen, rparen, colon, qmark = Literal.using_each(
"[]{}():?"
)
re_macro = Combine("\\" + one_of("d w s"))
escaped_char = ~re_macro + Combine("\\" + one_of(list(printables)))
re_literal_char = (
"".join(c for c in printables if c not in r"\[]{}().*?+|") + " \t"
)
re_range = Combine(lbrack + SkipTo(rbrack, ignore=escaped_char) + rbrack) # type: ignore
re_literal = escaped_char | one_of(list(re_literal_char))
re_non_capture_group = Suppress("?:")
re_dot = Literal(".")
repetition = (
(lbrace + Word(nums)("count") + rbrace)
| (lbrace + Word(nums)("minCount") + "," + Word(nums)("maxCount") + rbrace)
| one_of(list("*+?"))
)
re_range.add_parse_action(handle_range)
re_literal.add_parse_action(handle_literal)
re_macro.add_parse_action(handle_macro)
re_dot.add_parse_action(handle_dot)
re_term = re_literal | re_range | re_macro | re_dot | re_non_capture_group
re_expr = infix_notation(
re_term,
[
(repetition, 1, OpAssoc.LEFT, handle_repetition),
(Empty(), 2, OpAssoc.LEFT, handle_sequence),
(Suppress("|"), 2, OpAssoc.LEFT, handle_alternative),
],
)
_parser = re_expr
return _parser
def count(gen):
"""Simple function to count the number of elements returned by a generator."""
return sum(1 for _ in gen)
def invert(regex):
r"""
Call this routine as a generator to return all the strings that
match the input regular expression.
for s in invert(r"[A-Z]{3}\d{3}"):
print s
"""
invre = GroupEmitter(parser().parseString(regex)).make_generator()
return invre()
def main():
tests = r"""
[A-EA]
[A-D]*
[A-D]{3}
X[A-C]{3}Y
X[A-C]{3}\(
X\d
foobar\d\d
foobar{2}
foobar{2,9}
fooba[rz]{2}
(foobar){2}
([01]\d)|(2[0-5])
(?:[01]\d)|(2[0-5])
([01]\d\d)|(2[0-4]\d)|(25[0-5])
[A-C]{1,2}
[A-C]{0,3}
[A-C]\s[A-C]\s[A-C]
[A-C]\s?[A-C][A-C]
[A-C]\s([A-C][A-C])
[A-C]\s([A-C][A-C])?
[A-C]{2}\d{2}
@|TH[12]
@(@|TH[12])?
@(@|TH[12]|AL[12]|SP[123]|TB(1[0-9]?|20?|[3-9]))?
@(@|TH[12]|AL[12]|SP[123]|TB(1[0-9]?|20?|[3-9])|OH(1[0-9]?|2[0-9]?|30?|[4-9]))?
(([ECMP]|HA|AK)[SD]|HS)T
[A-CV]{2}
A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|S[bcegimnr]?|T[abcehilm]|Uu[bhopqst]|U|V|W|Xe|Yb?|Z[nr]
(a|b)|(x|y)
(a|b) (x|y)
[ABCDEFG](?:#|##|b|bb)?(?:maj|min|m|sus|aug|dim)?[0-9]?(?:/[ABCDEFG](?:#|##|b|bb)?)?
(Fri|Mon|S(atur|un)|T(hur|ue)s|Wednes)day
A(pril|ugust)|((Dec|Nov|Sept)em|Octo)ber|(Febr|Jan)uary|Ju(ly|ne)|Ma(rch|y)
""".splitlines()
for t in tests:
t = t.strip()
if not t:
continue
print("-" * 50)
print(t)
try:
num = count(invert(t))
print(num)
maxprint = 30
for s in invert(t):
print(s)
maxprint -= 1
if not maxprint:
break
except ParseFatalException as pfe:
print(pfe.msg)
print("")
continue
print("")
if __name__ == "__main__":
main()