Add initial lexer.

This commit is contained in:
cfreksen 2017-10-29 01:07:34 +02:00
parent f960445524
commit 1be5b573b5
No known key found for this signature in database
GPG Key ID: EAC13EE101008978

133
parser.py Normal file
View File

@ -0,0 +1,133 @@
"""Parser for LLVM--."""
import ply.lex as lex
class LLVMLexer(object):
reserved = {
'if': 'IF',
'then': 'THEN',
'else': 'ELSE',
'while': 'WHILE',
'add': 'ADD',
'sub': 'SUB',
'mul': 'MUL',
'shl': 'SHL',
'lshr': 'LSHR',
'ashr': 'ASHR',
'and': 'AND',
'or': 'OR',
'xor': 'XOR',
'sdiv': 'SDIV',
'eq': 'EQ',
'ne': 'NE',
'slt': 'SLT',
'sle': 'SLE',
'sgt': 'SGT',
'sge': 'SGE',
'alloca': 'ALLOCA',
'load': 'LOAD',
'store': 'STORE',
'icmp': 'ICMP',
'call': 'CALL',
'bitcast': 'BITCAST',
'getelementptr': 'GETELEMENTPTR',
'zext': 'ZEXT',
'ptrtoint': 'PTRTOINT',
'ret': 'RET',
'br': 'BR',
'label': 'LABEL',
'define': 'DEFINE',
'null': 'NULL',
'global': 'GLOBAL',
'type': 'TYPE',
'to': 'TO',
'void': 'VOID',
'i1': 'I1',
'i8': 'I8',
'i32': 'I32',
'i64': 'I64',
}
tokens = [
'INT', 'STRING',
'ASTERIX', 'LPAREN', 'RPAREN', 'LBRACK', 'RBRACK', 'LBRACE', 'RBRACE',
'ASSIGN', 'COLON', 'COMMA',
'PercentID', 'AtID', 'ID',
'COMMENT'
]
t_ignore = ' \t'
t_ASTERIX = r'\*'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACK = r'\['
t_RBRACK = r'\]'
t_LBRACE = r'{'
t_RBRACE = r'}'
t_ASSIGN = r'='
t_COLON = r':'
t_COMMA = r','
def __init__(self):
self.tokens += self.reserved.values()
def t_COMMENT(self, t):
r'(;|declare|target).*'
pass
def t_newline(self, t):
r'\n+'
t.lexer.lineno += len(t.value)
def t_INT(self, t):
r'\d+'
t.value = int(t.value)
return t
def t_STRING(self, t):
r'c"[^"]*"'
value = t.value[2:-1]
t.value = value
return t
def t_ID(self, t):
r'[a-zA-Z0-9_-]+'
t.type = self.reserved.get(t.value, 'ID')
return t
def t_PercentID(self, t):
r'%[a-zA-Z0-9_-]+'
t.value = t.value[1:]
return t
def t_AtID(self, t):
r'@[a-zA-Z0-9_-]+'
t.value = t.value[1:]
return t
def t_error(self, t):
print("{}:{}: Illegal character '{}'".format(t.lineno, t.lexpos, t.value[0]))
t.lexer.skip(1)
def build(self, **kwargs):
self.lexer = lex.lex(module=self, **kwargs)
def test(self, data):
self.lexer.input(data)
for tok in self.lexer:
print(tok)
if __name__ == '__main__':
m = LLVMLexer()
m.build()
data = '''123 456 c"abc" def add sdiv ; some comment
'''
m.test(data)