llvm--emulator/parser.py

"""Parser for LLVM--."""

import ply.lex as lex

class LLVMLexer(object):
    current_string = ""
    line_begin = 0

    states = [
        ('string', 'exclusive')
    ]

    reserved = {
        'if': 'IF',
        'then': 'THEN',
        'else': 'ELSE',
        'while': 'WHILE',
        'add': 'ADD',
        'sub': 'SUB',
        'mul': 'MUL',
        'shl': 'SHL',
        'lshr': 'LSHR',
        'ashr': 'ASHR',
        'and': 'AND',
        'or': 'OR',
        'xor': 'XOR',
        'sdiv': 'SDIV',
        'eq': 'EQ',
        'ne': 'NE',
        'slt': 'SLT',
        'sle': 'SLE',
        'sgt': 'SGT',
        'sge': 'SGE',
        'alloca': 'ALLOCA',
        'load': 'LOAD',
        'store': 'STORE',
        'icmp': 'ICMP',
        'call': 'CALL',
        'bitcast': 'BITCAST',
        'getelementptr': 'GETELEMENTPTR',
        'zext': 'ZEXT',
        'ptrtoint': 'PTRTOINT',
        'ret': 'RET',
        'br': 'BR',
        'label': 'LABEL',
        'define': 'DEFINE',
        'null': 'NULL',
        'global': 'GLOBAL',
        'type': 'TYPE',
        'to': 'TO',
        'void': 'VOID',
        'i1': 'I1',
        'i8': 'I8',
        'i32': 'I32',
        'i64': 'I64',
       }

    tokens = [
        'INT', 'STRING',
        'ASTERIX', 'LPAREN', 'RPAREN', 'LBRACK', 'RBRACK', 'LBRACE', 'RBRACE',
        'ASSIGN', 'COLON', 'COMMA',
        'PercentID', 'AtID', 'ID',
        'COMMENT'
    ]

    t_ignore = ' \t'
    t_string_ignore = ''
    t_ASTERIX = r'\*'
    t_LPAREN = r'\('
    t_RPAREN = r'\)'
    t_LBRACK = r'\['
    t_RBRACK = r'\]'
    t_LBRACE = r'{'
    t_RBRACE = r'}'
    t_ASSIGN = r'='
    t_COLON = r':'
    t_COMMA = r','


    def __init__(self):
        self.tokens += self.reserved.values()


    def t_COMMENT(self, t):
        r'(;|declare|target).*'
        pass


    def t_newline(self, t):
        r'\n+'
        t.lexer.lineno += len(t.value)
        self.line_begin = t.lexpos


    def t_string_newline(self, t):
        r'\n'
        print("{}:{}: Newline is not allowed inside strings."
              .format(t.lineno, t.lexpos - self.line_begin))
        t.lexer.lineno += 1
        self.line_begin = t.lexpos


    def t_INT(self, t):
        r'\d+'
        t.value = int(t.value)
        return t

    def t_begin_string(self, t):
        r'c\"'
        t.lexer.begin('string')
        self.current_string = ""

    def t_string_doublequote(self, t):
        r'\\"'
        self.current_string += '"'


    def t_string_backslash(self, t):
        r'\\\\'
        self.current_string += '\\'


    def t_string_hex(self, t):
        r'\\[0-9a-f][0-9a-f]'
        code = int(t.value[1:], 16)
        self.current_string += chr(code)


    def t_string_singlebackslash(self, t):
        r'\\'
        print("{}:{}: Single backslash is not allowed inside strings."
              .format(t.lineno, t.lexpos - self.line_begin))


    def t_string_end(self, t):
        r'"'
        t.value = self.current_string
        self.current_string = ""
        t.type = "STRING"
        t.lexer.begin('INITIAL')
        return t


    def t_string_meat(self, t):
        r'.'
        self.current_string += t.value


    def t_ID(self, t):
        r'[a-zA-Z0-9_-]+'
        t.type = self.reserved.get(t.value, 'ID')
        return t


    def t_PercentID(self, t):
        r'%[a-zA-Z0-9_-]+'
        t.value = t.value[1:]
        return t

    def t_AtID(self, t):
        r'@[a-zA-Z0-9_-]+'
        t.value = t.value[1:]
        return t


    def t_ANY_error(self, t):
        print("{}:{}: Illegal character '{}'"
              .format(t.lineno, t.lexpos - self.line_begin, t.value[0]))
        t.lexer.skip(1)


    def build(self, **kwargs):
        self.lexer = lex.lex(module=self, **kwargs)


    def test(self, data):
        self.lexer.input(data)
        for tok in self.lexer:
            print(tok)


if __name__ == '__main__':
    m = LLVMLexer()
    m.build()
    data = r'''123 456 c"abc _\2c_ \" \\ and so on" def add sdiv ; some comment
    qqq
    '''
    m.test(data)
Add initial lexer. 2017-10-28 23:07:34 +00:00			`"""Parser for LLVM--."""`

			`import ply.lex as lex`

			`class LLVMLexer(object):`
Add better string support. 2017-10-28 23:44:58 +00:00			`current_string = ""`
			`line_begin = 0`

			`states = [`
			`('string', 'exclusive')`
			`]`

Add initial lexer. 2017-10-28 23:07:34 +00:00			`reserved = {`
			`'if': 'IF',`
			`'then': 'THEN',`
			`'else': 'ELSE',`
			`'while': 'WHILE',`
			`'add': 'ADD',`
			`'sub': 'SUB',`
			`'mul': 'MUL',`
			`'shl': 'SHL',`
			`'lshr': 'LSHR',`
			`'ashr': 'ASHR',`
			`'and': 'AND',`
			`'or': 'OR',`
			`'xor': 'XOR',`
			`'sdiv': 'SDIV',`
			`'eq': 'EQ',`
			`'ne': 'NE',`
			`'slt': 'SLT',`
			`'sle': 'SLE',`
			`'sgt': 'SGT',`
			`'sge': 'SGE',`
			`'alloca': 'ALLOCA',`
			`'load': 'LOAD',`
			`'store': 'STORE',`
			`'icmp': 'ICMP',`
			`'call': 'CALL',`
			`'bitcast': 'BITCAST',`
			`'getelementptr': 'GETELEMENTPTR',`
			`'zext': 'ZEXT',`
			`'ptrtoint': 'PTRTOINT',`
			`'ret': 'RET',`
			`'br': 'BR',`
			`'label': 'LABEL',`
			`'define': 'DEFINE',`
			`'null': 'NULL',`
			`'global': 'GLOBAL',`
			`'type': 'TYPE',`
			`'to': 'TO',`
			`'void': 'VOID',`
			`'i1': 'I1',`
			`'i8': 'I8',`
			`'i32': 'I32',`
			`'i64': 'I64',`
			`}`

			`tokens = [`
			`'INT', 'STRING',`
			`'ASTERIX', 'LPAREN', 'RPAREN', 'LBRACK', 'RBRACK', 'LBRACE', 'RBRACE',`
			`'ASSIGN', 'COLON', 'COMMA',`
			`'PercentID', 'AtID', 'ID',`
			`'COMMENT'`
			`]`

			`t_ignore = ' \t'`
Add better string support. 2017-10-28 23:44:58 +00:00			`t_string_ignore = ''`
Add initial lexer. 2017-10-28 23:07:34 +00:00			`t_ASTERIX = r'\*'`
			`t_LPAREN = r'\('`
			`t_RPAREN = r'\)'`
			`t_LBRACK = r'\['`
			`t_RBRACK = r'\]'`
			`t_LBRACE = r'{'`
			`t_RBRACE = r'}'`
			`t_ASSIGN = r'='`
			`t_COLON = r':'`
			`t_COMMA = r','`


			`def __init__(self):`
			`self.tokens += self.reserved.values()`


			`def t_COMMENT(self, t):`
			`r'(;\|declare\|target).*'`
			`pass`


			`def t_newline(self, t):`
			`r'\n+'`
			`t.lexer.lineno += len(t.value)`
Add better string support. 2017-10-28 23:44:58 +00:00			`self.line_begin = t.lexpos`


			`def t_string_newline(self, t):`
			`r'\n'`
			`print("{}:{}: Newline is not allowed inside strings."`
			`.format(t.lineno, t.lexpos - self.line_begin))`
			`t.lexer.lineno += 1`
			`self.line_begin = t.lexpos`

Add initial lexer. 2017-10-28 23:07:34 +00:00
			`def t_INT(self, t):`
			`r'\d+'`
			`t.value = int(t.value)`
			`return t`

Add better string support. 2017-10-28 23:44:58 +00:00			`def t_begin_string(self, t):`
			`r'c\"'`
			`t.lexer.begin('string')`
			`self.current_string = ""`

			`def t_string_doublequote(self, t):`
			`r'\\"'`
			`self.current_string += '"'`


			`def t_string_backslash(self, t):`
			`r'\\\\'`
			`self.current_string += '\\'`
Add initial lexer. 2017-10-28 23:07:34 +00:00
Add better string support. 2017-10-28 23:44:58 +00:00
			`def t_string_hex(self, t):`
			`r'\\[0-9a-f][0-9a-f]'`
			`code = int(t.value[1:], 16)`
			`self.current_string += chr(code)`


			`def t_string_singlebackslash(self, t):`
			`r'\\'`
			`print("{}:{}: Single backslash is not allowed inside strings."`
			`.format(t.lineno, t.lexpos - self.line_begin))`


			`def t_string_end(self, t):`
			`r'"'`
			`t.value = self.current_string`
			`self.current_string = ""`
			`t.type = "STRING"`
			`t.lexer.begin('INITIAL')`
Add initial lexer. 2017-10-28 23:07:34 +00:00			`return t`


Add better string support. 2017-10-28 23:44:58 +00:00			`def t_string_meat(self, t):`
			`r'.'`
			`self.current_string += t.value`


Add initial lexer. 2017-10-28 23:07:34 +00:00			`def t_ID(self, t):`
			`r'[a-zA-Z0-9_-]+'`
			`t.type = self.reserved.get(t.value, 'ID')`
			`return t`


			`def t_PercentID(self, t):`
			`r'%[a-zA-Z0-9_-]+'`
			`t.value = t.value[1:]`
			`return t`

			`def t_AtID(self, t):`
			`r'@[a-zA-Z0-9_-]+'`
			`t.value = t.value[1:]`
			`return t`


Add better string support. 2017-10-28 23:44:58 +00:00			`def t_ANY_error(self, t):`
			`print("{}:{}: Illegal character '{}'"`
			`.format(t.lineno, t.lexpos - self.line_begin, t.value[0]))`
Add initial lexer. 2017-10-28 23:07:34 +00:00			`t.lexer.skip(1)`

Add better string support. 2017-10-28 23:44:58 +00:00
Add initial lexer. 2017-10-28 23:07:34 +00:00			`def build(self, **kwargs):`
			`self.lexer = lex.lex(module=self, **kwargs)`

Add better string support. 2017-10-28 23:44:58 +00:00
Add initial lexer. 2017-10-28 23:07:34 +00:00			`def test(self, data):`
			`self.lexer.input(data)`
			`for tok in self.lexer:`
			`print(tok)`


			`if __name__ == '__main__':`
			`m = LLVMLexer()`
			`m.build()`
Add better string support. 2017-10-28 23:44:58 +00:00			`data = r'''123 456 c"abc _\2c_ \" \\ and so on" def add sdiv ; some comment`
			`qqq`
Add initial lexer. 2017-10-28 23:07:34 +00:00			`'''`
			`m.test(data)`