Add initial lexer.

2017-10-29 01:07:34 +02:00 · 2017-10-29 01:07:34 +02:00 · 1be5b573b5
commit 1be5b573b5
parent f960445524
1 changed files with 133 additions and 0 deletions
--- a/parser.py
+++ b/parser.py
@ -0,0 +1,133 @@
+"""Parser for LLVM--."""
+
+import ply.lex as lex
+
+class LLVMLexer(object):
+    reserved = {
+        'if': 'IF',
+        'then': 'THEN',
+        'else': 'ELSE',
+        'while': 'WHILE',
+        'add': 'ADD',
+        'sub': 'SUB',
+        'mul': 'MUL',
+        'shl': 'SHL',
+        'lshr': 'LSHR',
+        'ashr': 'ASHR',
+        'and': 'AND',
+        'or': 'OR',
+        'xor': 'XOR',
+        'sdiv': 'SDIV',
+        'eq': 'EQ',
+        'ne': 'NE',
+        'slt': 'SLT',
+        'sle': 'SLE',
+        'sgt': 'SGT',
+        'sge': 'SGE',
+        'alloca': 'ALLOCA',
+        'load': 'LOAD',
+        'store': 'STORE',
+        'icmp': 'ICMP',
+        'call': 'CALL',
+        'bitcast': 'BITCAST',
+        'getelementptr': 'GETELEMENTPTR',
+        'zext': 'ZEXT',
+        'ptrtoint': 'PTRTOINT',
+        'ret': 'RET',
+        'br': 'BR',
+        'label': 'LABEL',
+        'define': 'DEFINE',
+        'null': 'NULL',
+        'global': 'GLOBAL',
+        'type': 'TYPE',
+        'to': 'TO',
+        'void': 'VOID',
+        'i1': 'I1',
+        'i8': 'I8',
+        'i32': 'I32',
+        'i64': 'I64',
+       }
+
+    tokens = [
+        'INT', 'STRING',
+        'ASTERIX', 'LPAREN', 'RPAREN', 'LBRACK', 'RBRACK', 'LBRACE', 'RBRACE',
+        'ASSIGN', 'COLON', 'COMMA',
+        'PercentID', 'AtID', 'ID',
+        'COMMENT'
+    ]
+
+    t_ignore = ' \t'
+    t_ASTERIX = r'\*'
+    t_LPAREN = r'\('
+    t_RPAREN = r'\)'
+    t_LBRACK = r'\['
+    t_RBRACK = r'\]'
+    t_LBRACE = r'{'
+    t_RBRACE = r'}'
+    t_ASSIGN = r'='
+    t_COLON = r':'
+    t_COMMA = r','
+
+
+    def __init__(self):
+        self.tokens += self.reserved.values()
+
+
+    def t_COMMENT(self, t):
+        r'(;|declare|target).*'
+        pass
+
+
+    def t_newline(self, t):
+        r'\n+'
+        t.lexer.lineno += len(t.value)
+
+    def t_INT(self, t):
+        r'\d+'
+        t.value = int(t.value)
+        return t
+
+
+    def t_STRING(self, t):
+        r'c"[^"]*"'
+        value = t.value[2:-1]
+        t.value = value
+        return t
+
+
+    def t_ID(self, t):
+        r'[a-zA-Z0-9_-]+'
+        t.type = self.reserved.get(t.value, 'ID')
+        return t
+
+
+    def t_PercentID(self, t):
+        r'%[a-zA-Z0-9_-]+'
+        t.value = t.value[1:]
+        return t
+
+    def t_AtID(self, t):
+        r'@[a-zA-Z0-9_-]+'
+        t.value = t.value[1:]
+        return t
+
+
+    def t_error(self, t):
+        print("{}:{}: Illegal character '{}'".format(t.lineno, t.lexpos, t.value[0]))
+        t.lexer.skip(1)
+
+    def build(self, **kwargs):
+        self.lexer = lex.lex(module=self, **kwargs)
+
+    def test(self, data):
+        self.lexer.input(data)
+        for tok in self.lexer:
+            print(tok)
+
+
+if __name__ == '__main__':
+    m = LLVMLexer()
+    m.build()
+    data = '''123 456 c"abc" def add sdiv ; some comment
+    '''
+    m.test(data)