From 1be5b573b540b2d8c777ed2b5461cb321ee32a14 Mon Sep 17 00:00:00 2001 From: cfreksen Date: Sun, 29 Oct 2017 01:07:34 +0200 Subject: [PATCH] Add initial lexer. --- parser.py | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 parser.py diff --git a/parser.py b/parser.py new file mode 100644 index 0000000..fead75d --- /dev/null +++ b/parser.py @@ -0,0 +1,133 @@ +"""Parser for LLVM--.""" + +import ply.lex as lex + +class LLVMLexer(object): + reserved = { + 'if': 'IF', + 'then': 'THEN', + 'else': 'ELSE', + 'while': 'WHILE', + 'add': 'ADD', + 'sub': 'SUB', + 'mul': 'MUL', + 'shl': 'SHL', + 'lshr': 'LSHR', + 'ashr': 'ASHR', + 'and': 'AND', + 'or': 'OR', + 'xor': 'XOR', + 'sdiv': 'SDIV', + 'eq': 'EQ', + 'ne': 'NE', + 'slt': 'SLT', + 'sle': 'SLE', + 'sgt': 'SGT', + 'sge': 'SGE', + 'alloca': 'ALLOCA', + 'load': 'LOAD', + 'store': 'STORE', + 'icmp': 'ICMP', + 'call': 'CALL', + 'bitcast': 'BITCAST', + 'getelementptr': 'GETELEMENTPTR', + 'zext': 'ZEXT', + 'ptrtoint': 'PTRTOINT', + 'ret': 'RET', + 'br': 'BR', + 'label': 'LABEL', + 'define': 'DEFINE', + 'null': 'NULL', + 'global': 'GLOBAL', + 'type': 'TYPE', + 'to': 'TO', + 'void': 'VOID', + 'i1': 'I1', + 'i8': 'I8', + 'i32': 'I32', + 'i64': 'I64', + } + + tokens = [ + 'INT', 'STRING', + 'ASTERIX', 'LPAREN', 'RPAREN', 'LBRACK', 'RBRACK', 'LBRACE', 'RBRACE', + 'ASSIGN', 'COLON', 'COMMA', + 'PercentID', 'AtID', 'ID', + 'COMMENT' + ] + + t_ignore = ' \t' + t_ASTERIX = r'\*' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_LBRACK = r'\[' + t_RBRACK = r'\]' + t_LBRACE = r'{' + t_RBRACE = r'}' + t_ASSIGN = r'=' + t_COLON = r':' + t_COMMA = r',' + + + def __init__(self): + self.tokens += self.reserved.values() + + + def t_COMMENT(self, t): + r'(;|declare|target).*' + pass + + + def t_newline(self, t): + r'\n+' + t.lexer.lineno += len(t.value) + + def t_INT(self, t): + r'\d+' + t.value = int(t.value) + return t + + + def t_STRING(self, t): + r'c"[^"]*"' + value = t.value[2:-1] + t.value = value + return t + + + def t_ID(self, t): + r'[a-zA-Z0-9_-]+' + t.type = self.reserved.get(t.value, 'ID') + return t + + + def t_PercentID(self, t): + r'%[a-zA-Z0-9_-]+' + t.value = t.value[1:] + return t + + def t_AtID(self, t): + r'@[a-zA-Z0-9_-]+' + t.value = t.value[1:] + return t + + + def t_error(self, t): + print("{}:{}: Illegal character '{}'".format(t.lineno, t.lexpos, t.value[0])) + t.lexer.skip(1) + + def build(self, **kwargs): + self.lexer = lex.lex(module=self, **kwargs) + + def test(self, data): + self.lexer.input(data) + for tok in self.lexer: + print(tok) + + +if __name__ == '__main__': + m = LLVMLexer() + m.build() + data = '''123 456 c"abc" def add sdiv ; some comment + ''' + m.test(data)