local LEXER_RULE_CONTINUE = {} local function lex_string (lexer, str, offset) offset = offset or 0 if type(lexer) ~= 'table' then error(('[Lexer]: Bad argument #1, expected table, but got %s (%s).' ):format(lexer, type(lexer))) end if type(str) ~= 'string' then error(('[Lexer]: Bad argument #2, expected string, but got %s (%s).' ):format(str, type(str))) end if type(offset) ~= 'number' then error(('[Lexer]: Bad argument #3, expected number, but got %s (%s).' ):format(offset, type(offset))) end local index, tokens = 1, {} while index <= #str do local longest_match, longest_match_right = nil, index - 1 for i = 1, #lexer.rules do local _, match_right = string.find(str, lexer.rules[i].pattern, index) if match_right and longest_match_right < match_right then longest_match, longest_match_right = lexer.rules[i], match_right end end -- if longest_match then if longest_match.token ~= LEXER_RULE_CONTINUE then tokens[#tokens+1] = { text = str:sub(index, longest_match_right) , left = offset + index , right = offset + longest_match_right , token = longest_match.token } end index = longest_match_right end index = index + 1 end return tokens end local LEXER_MT = { __index = { lex = lex_string } } local STRICT_MT = { __index = error, __newindex = error } local function new_lexer (t) assert(type(t) == 'table') -- local rules = {} local tokens = {} -- for _, rule in ipairs(t) do assert(type(rule[1]) == 'string', 'Pattern must be string!') assert(type(rule[2]) == 'string' or rule[2] == LEXER_RULE_CONTINUE) assert(not string.match('', rule[1]), 'Pattern must not match empty string!') rules[#rules+1] = { pattern = '^'..rule[1], token = rule[2] } end return setmetatable({ rules = rules, tokens = setmetatable(tokens, STRICT_MT) }, LEXER_MT) end return setmetatable({ CONTINUE = LEXER_RULE_CONTINUE }, {__call = function(_, ...) return new_lexer(...) end})