File: //snap/google-cloud-cli/396/platform/gsutil/third_party/pyparsing/examples/antlr_grammar.py
"""
antlr_grammar.py
Created on 4 sept. 2010
@author: luca
Submitted by Luca DallOlio, September, 2010
(Minor updates by Paul McGuire, June, 2012)
(Code idiom updates by Paul McGuire, April, 2019)
"""
from pyparsing import (
Word,
ZeroOrMore,
printables,
Suppress,
OneOrMore,
Group,
LineEnd,
Optional,
White,
originalTextFor,
hexnums,
nums,
Combine,
Literal,
Keyword,
cStyleComment,
Regex,
Forward,
MatchFirst,
And,
oneOf,
alphas,
alphanums,
delimitedList,
Char,
autoname_elements,
)
# http://www.antlr.org/grammar/ANTLR/ANTLRv3.g
(
QUOTE,
APOS,
EQ,
LBRACK,
RBRACK,
LBRACE,
RBRACE,
LPAR,
RPAR,
ROOT,
BANG,
AT,
TIL,
SEMI,
COLON,
VERT,
) = map(Suppress, "\"'=[]{}()^!@~;:|")
BSLASH = Literal("\\")
keywords = (
SRC_,
SCOPE_,
OPTIONS_,
TOKENS_,
FRAGMENT,
ID,
LEXER,
PARSER,
GRAMMAR,
TREE,
CATCH,
FINALLY,
THROWS,
PROTECTED,
PUBLIC,
PRIVATE,
) = list(
Keyword.using_each(
"""src scope options tokens fragment id lexer parser grammar tree catch finally throws protected
public private """.split(),
)
)
KEYWORD = MatchFirst(keywords)
# Tokens
EOL = Suppress(LineEnd()) # $
SGL_PRINTABLE = Char(printables)
singleTextString = originalTextFor(
ZeroOrMore(~EOL + (White(" \t") | Word(printables)))
).leaveWhitespace()
XDIGIT = hexnums
INT = Word(nums)
ESC = BSLASH + (
oneOf(list(r"nrtbf\">" + "'")) | ("u" + Word(hexnums, exact=4)) | SGL_PRINTABLE
)
LITERAL_CHAR = ESC | ~(APOS | BSLASH) + SGL_PRINTABLE
CHAR_LITERAL = APOS + LITERAL_CHAR + APOS
STRING_LITERAL = APOS + Combine(OneOrMore(LITERAL_CHAR)) + APOS
DOUBLE_QUOTE_STRING_LITERAL = '"' + ZeroOrMore(LITERAL_CHAR) + '"'
DOUBLE_ANGLE_STRING_LITERAL = "<<" + ZeroOrMore(SGL_PRINTABLE) + ">>"
TOKEN_REF = Word(alphas.upper(), alphanums + "_")
RULE_REF = Word(alphas.lower(), alphanums + "_")
ACTION_ESC = (
BSLASH.suppress() + APOS
| BSLASH.suppress()
| BSLASH.suppress() + (~(APOS | QUOTE) + SGL_PRINTABLE)
)
ACTION_CHAR_LITERAL = APOS + (ACTION_ESC | ~(BSLASH | APOS) + SGL_PRINTABLE) + APOS
ACTION_STRING_LITERAL = (
QUOTE + ZeroOrMore(ACTION_ESC | ~(BSLASH | QUOTE) + SGL_PRINTABLE) + QUOTE
)
SRC = SRC_.suppress() + ACTION_STRING_LITERAL("file") + INT("line")
id = TOKEN_REF | RULE_REF
SL_COMMENT = (
Suppress("//") + Suppress("$ANTLR") + SRC
| ZeroOrMore(~EOL + Word(printables)) + EOL
)
ML_COMMENT = cStyleComment
WS = OneOrMore(
Suppress(" ") | Suppress("\t") | (Optional(Suppress("\r")) + Literal("\n"))
)
WS_LOOP = ZeroOrMore(SL_COMMENT | ML_COMMENT)
NESTED_ARG_ACTION = Forward()
NESTED_ARG_ACTION << (
LBRACK
+ ZeroOrMore(NESTED_ARG_ACTION | ACTION_STRING_LITERAL | ACTION_CHAR_LITERAL)
+ RBRACK
)
ARG_ACTION = NESTED_ARG_ACTION
NESTED_ACTION = Forward()
NESTED_ACTION << (
LBRACE
+ ZeroOrMore(
NESTED_ACTION
| SL_COMMENT
| ML_COMMENT
| ACTION_STRING_LITERAL
| ACTION_CHAR_LITERAL
)
+ RBRACE
)
ACTION = NESTED_ACTION + Optional("?")
SCOPE = SCOPE_.suppress()
OPTIONS = OPTIONS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
TOKENS = TOKENS_.suppress() + LBRACE # + WS_LOOP + Suppress('{')
TREE_BEGIN = ROOT + LPAR
RANGE = Suppress("..")
REWRITE = Suppress("->")
# General Parser Definitions
# Grammar heading
optionValue = id | STRING_LITERAL | CHAR_LITERAL | INT | Literal("*").setName("s")
option = Group(id("id") + EQ + optionValue("value"))("option")
optionsSpec = OPTIONS + Group(OneOrMore(option + SEMI))("options") + RBRACE
tokenSpec = (
Group(TOKEN_REF("token_ref") + (EQ + (STRING_LITERAL | CHAR_LITERAL)("lit")))(
"token"
)
+ SEMI
)
tokensSpec = TOKENS + Group(OneOrMore(tokenSpec))("tokens") + RBRACE
attrScope = SCOPE_.suppress() + id + ACTION
grammarType = LEXER + PARSER + TREE
actionScopeName = id | LEXER("l") | PARSER("p")
action = AT + Optional(actionScopeName + Suppress("::")) + id + ACTION
grammarHeading = (
Optional(ML_COMMENT("ML_COMMENT"))
+ Optional(grammarType)
+ GRAMMAR
+ id("grammarName")
+ SEMI
+ Optional(optionsSpec)
+ Optional(tokensSpec)
+ ZeroOrMore(attrScope)
+ ZeroOrMore(action)
)
modifier = PROTECTED | PUBLIC | PRIVATE | FRAGMENT
ruleAction = AT + id + ACTION
throwsSpec = THROWS.suppress() + delimitedList(id)
ruleScopeSpec = (
(SCOPE_.suppress() + ACTION)
| (SCOPE_.suppress() + delimitedList(id) + SEMI)
| (SCOPE_.suppress() + ACTION + SCOPE_.suppress() + delimitedList(id) + SEMI)
)
unary_op = oneOf("^ !")
notTerminal = CHAR_LITERAL | TOKEN_REF | STRING_LITERAL
terminal = (
CHAR_LITERAL | TOKEN_REF + Optional(ARG_ACTION) | STRING_LITERAL | "."
) + Optional(unary_op)
block = Forward()
notSet = TIL + (notTerminal | block)
rangeNotPython = CHAR_LITERAL("c1") + RANGE + CHAR_LITERAL("c2")
atom = Group(
(rangeNotPython + Optional(unary_op)("op"))
| terminal
| (notSet + Optional(unary_op)("op"))
| (RULE_REF + Optional(ARG_ACTION("arg")) + Optional(unary_op)("op"))
)
element = Forward()
treeSpec = ROOT + LPAR + element * (2,) + RPAR
ebnfSuffix = oneOf("? * +")
ebnf = block + Optional(ebnfSuffix("op") | "=>")
elementNoOptionSpec = (
(id("result_name") + oneOf("= +=")("labelOp") + atom("atom") + Optional(ebnfSuffix))
| (id("result_name") + oneOf("= +=")("labelOp") + block + Optional(ebnfSuffix))
| atom("atom") + Optional(ebnfSuffix)
| ebnf
| ACTION
| (treeSpec + Optional(ebnfSuffix))
) # | SEMPRED ( '=>' -> GATED_SEMPRED | -> SEMPRED )
element <<= Group(elementNoOptionSpec)("element")
# Do not ask me why group is needed twice... seems like the xml that you see is not always the real structure?
alternative = Group(Group(OneOrMore(element))("elements"))
rewrite = Optional(Literal("TODO REWRITE RULES TODO"))
block <<= (
LPAR
+ Optional(Optional(optionsSpec("opts")) + COLON)
+ Group(
alternative("a1")
+ rewrite
+ Group(ZeroOrMore(VERT + alternative("a2") + rewrite))("alternatives")
)("block")
+ RPAR
)
altList = (
alternative("a1")
+ rewrite
+ Group(ZeroOrMore(VERT + alternative("a2") + rewrite))("alternatives")
)
exceptionHandler = CATCH.suppress() + ARG_ACTION + ACTION
finallyClause = FINALLY.suppress() + ACTION
exceptionGroup = (OneOrMore(exceptionHandler) + Optional(finallyClause)) | finallyClause
ruleHeading = (
Optional(ML_COMMENT)("ruleComment")
+ Optional(modifier)("modifier")
+ id("ruleName")
+ Optional("!")
+ Optional(ARG_ACTION("arg"))
+ Optional(Suppress("returns") + ARG_ACTION("rt"))
+ Optional(throwsSpec)
+ Optional(optionsSpec)
+ Optional(ruleScopeSpec)
+ ZeroOrMore(ruleAction)
)
rule = Group(ruleHeading + COLON + altList + SEMI + Optional(exceptionGroup))("rule")
grammarDef = grammarHeading + Group(OneOrMore(rule))("rules")
autoname_elements()
def grammar():
return grammarDef
def __antlrAlternativesConverter(pyparsingRules, antlrBlock):
rule = None
if (
hasattr(antlrBlock, "alternatives")
and antlrBlock.alternatives != ""
and len(antlrBlock.alternatives) > 0
):
alternatives = []
alternatives.append(__antlrAlternativeConverter(pyparsingRules, antlrBlock.a1))
for alternative in antlrBlock.alternatives:
alternatives.append(
__antlrAlternativeConverter(pyparsingRules, alternative)
)
rule = MatchFirst(alternatives)("anonymous_or")
elif hasattr(antlrBlock, "a1") and antlrBlock.a1 != "":
rule = __antlrAlternativeConverter(pyparsingRules, antlrBlock.a1)
else:
raise Exception("Not yet implemented")
assert rule != None
return rule
def __antlrAlternativeConverter(pyparsingRules, antlrAlternative):
elementList = []
for element in antlrAlternative.elements:
rule = None
if hasattr(element.atom, "c1") and element.atom.c1 != "":
regex = r"[" + str(element.atom.c1[0]) + "-" + str(element.atom.c2[0] + "]")
rule = Regex(regex)("anonymous_regex")
elif hasattr(element, "block") and element.block != "":
rule = __antlrAlternativesConverter(pyparsingRules, element.block)
else:
ruleRef = element.atom[0]
assert ruleRef in pyparsingRules
rule = pyparsingRules[ruleRef](ruleRef)
if hasattr(element, "op") and element.op != "":
if element.op == "+":
rule = Group(OneOrMore(rule))("anonymous_one_or_more")
elif element.op == "*":
rule = Group(ZeroOrMore(rule))("anonymous_zero_or_more")
elif element.op == "?":
rule = Optional(rule)
else:
raise Exception("rule operator not yet implemented : " + element.op)
rule = rule
elementList.append(rule)
if len(elementList) > 1:
rule = Group(And(elementList))("anonymous_and")
else:
rule = elementList[0]
assert rule is not None
return rule
def __antlrRuleConverter(pyparsingRules, antlrRule):
rule = None
rule = __antlrAlternativesConverter(pyparsingRules, antlrRule)
assert rule != None
rule(antlrRule.ruleName)
return rule
def antlrConverter(antlrGrammarTree):
pyparsingRules = {}
antlrTokens = {}
for antlrToken in antlrGrammarTree.tokens:
antlrTokens[antlrToken.token_ref] = antlrToken.lit
for antlrTokenName, antlrToken in list(antlrTokens.items()):
pyparsingRules[antlrTokenName] = Literal(antlrToken)
antlrRules = {}
for antlrRule in antlrGrammarTree.rules:
antlrRules[antlrRule.ruleName] = antlrRule
pyparsingRules[antlrRule.ruleName] = Forward() # antlr is a top down grammar
for antlrRuleName, antlrRule in list(antlrRules.items()):
pyparsingRule = __antlrRuleConverter(pyparsingRules, antlrRule)
assert pyparsingRule != None
pyparsingRules[antlrRuleName] <<= pyparsingRule
return pyparsingRules
if __name__ == "__main__":
import contextlib
with contextlib.suppress(Exception):
grammarDef.create_diagram("antlr_grammar_diagram.html", vertical=2, show_groups=True)
text = """\
grammar SimpleCalc;
options {
language = Python;
}
tokens {
PLUS = '+' ;
MINUS = '-' ;
MULT = '*' ;
DIV = '/' ;
}
/*------------------------------------------------------------------
* PARSER RULES
*------------------------------------------------------------------*/
expr : term ( ( PLUS | MINUS ) term )* ;
term : factor ( ( MULT | DIV ) factor )* ;
factor : NUMBER ;
/*------------------------------------------------------------------
* LEXER RULES
*------------------------------------------------------------------*/
NUMBER : (DIGIT)+ ;
/* WHITESPACE : ( '\t' | ' ' | '\r' | '\n'| '\u000C' )+ { $channel = HIDDEN; } ; */
fragment DIGIT : '0'..'9' ;
"""
antlrGrammarTree = grammar().parseString(text)
print(antlrGrammarTree.dump())
pyparsingRules = antlrConverter(antlrGrammarTree)
pyparsingRule = pyparsingRules["expr"]
pyparsingTree = pyparsingRule.parseString("2 - 5 * 42 + 7 / 25")
print(pyparsingTree.dump())