llvm--emulator/parser.py
2017-10-29 01:44:58 +02:00

189 lines
3.9 KiB
Python

"""Parser for LLVM--."""
import ply.lex as lex
class LLVMLexer(object):
current_string = ""
line_begin = 0
states = [
('string', 'exclusive')
]
reserved = {
'if': 'IF',
'then': 'THEN',
'else': 'ELSE',
'while': 'WHILE',
'add': 'ADD',
'sub': 'SUB',
'mul': 'MUL',
'shl': 'SHL',
'lshr': 'LSHR',
'ashr': 'ASHR',
'and': 'AND',
'or': 'OR',
'xor': 'XOR',
'sdiv': 'SDIV',
'eq': 'EQ',
'ne': 'NE',
'slt': 'SLT',
'sle': 'SLE',
'sgt': 'SGT',
'sge': 'SGE',
'alloca': 'ALLOCA',
'load': 'LOAD',
'store': 'STORE',
'icmp': 'ICMP',
'call': 'CALL',
'bitcast': 'BITCAST',
'getelementptr': 'GETELEMENTPTR',
'zext': 'ZEXT',
'ptrtoint': 'PTRTOINT',
'ret': 'RET',
'br': 'BR',
'label': 'LABEL',
'define': 'DEFINE',
'null': 'NULL',
'global': 'GLOBAL',
'type': 'TYPE',
'to': 'TO',
'void': 'VOID',
'i1': 'I1',
'i8': 'I8',
'i32': 'I32',
'i64': 'I64',
}
tokens = [
'INT', 'STRING',
'ASTERIX', 'LPAREN', 'RPAREN', 'LBRACK', 'RBRACK', 'LBRACE', 'RBRACE',
'ASSIGN', 'COLON', 'COMMA',
'PercentID', 'AtID', 'ID',
'COMMENT'
]
t_ignore = ' \t'
t_string_ignore = ''
t_ASTERIX = r'\*'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACK = r'\['
t_RBRACK = r'\]'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_ASSIGN = r'='
t_COLON = r':'
t_COMMA = r','
def __init__(self):
self.tokens += self.reserved.values()
def t_COMMENT(self, t):
r'(;|declare|target).*'
pass
def t_newline(self, t):
r'\n+'
t.lexer.lineno += len(t.value)
self.line_begin = t.lexpos
def t_string_newline(self, t):
r'\n'
print("{}:{}: Newline is not allowed inside strings."
.format(t.lineno, t.lexpos - self.line_begin))
t.lexer.lineno += 1
self.line_begin = t.lexpos
def t_INT(self, t):
r'\d+'
t.value = int(t.value)
return t
def t_begin_string(self, t):
r'c\"'
t.lexer.begin('string')
self.current_string = ""
def t_string_doublequote(self, t):
r'\\"'
self.current_string += '"'
def t_string_backslash(self, t):
r'\\\\'
self.current_string += '\\'
def t_string_hex(self, t):
r'\\[0-9a-f][0-9a-f]'
code = int(t.value[1:], 16)
self.current_string += chr(code)
def t_string_singlebackslash(self, t):
r'\\'
print("{}:{}: Single backslash is not allowed inside strings."
.format(t.lineno, t.lexpos - self.line_begin))
def t_string_end(self, t):
r'"'
t.value = self.current_string
self.current_string = ""
t.type = "STRING"
t.lexer.begin('INITIAL')
return t
def t_string_meat(self, t):
r'.'
self.current_string += t.value
def t_ID(self, t):
r'[a-zA-Z0-9_-]+'
t.type = self.reserved.get(t.value, 'ID')
return t
def t_PercentID(self, t):
r'%[a-zA-Z0-9_-]+'
t.value = t.value[1:]
return t
def t_AtID(self, t):
r'@[a-zA-Z0-9_-]+'
t.value = t.value[1:]
return t
def t_ANY_error(self, t):
print("{}:{}: Illegal character '{}'"
.format(t.lineno, t.lexpos - self.line_begin, t.value[0]))
t.lexer.skip(1)
def build(self, **kwargs):
self.lexer = lex.lex(module=self, **kwargs)
def test(self, data):
self.lexer.input(data)
for tok in self.lexer:
print(tok)
if __name__ == '__main__':
m = LLVMLexer()
m.build()
data = r'''123 456 c"abc _\2c_ \" \\ and so on" def add sdiv ; some comment
qqq
'''
m.test(data)