1
0
Fork 0
assert-gooder/Lexer.lua

55 lines
2.0 KiB
Lua

local LEXER_RULE_CONTINUE = {}
local function lex_string (lexer, str, offset)
offset = offset or 0
if type(lexer) ~= 'table' then error(('[Lexer]: Bad argument #1, expected table, but got %s (%s).' ):format(lexer, type(lexer))) end
if type(str) ~= 'string' then error(('[Lexer]: Bad argument #2, expected string, but got %s (%s).' ):format(str, type(str))) end
if type(offset) ~= 'number' then error(('[Lexer]: Bad argument #3, expected number, but got %s (%s).' ):format(offset, type(offset))) end
local index, tokens = 1, {}
while index <= #str do
local longest_match, longest_match_right = nil, index - 1
for i = 1, #lexer.rules do
local _, match_right = string.find(str, lexer.rules[i].pattern, index)
if match_right and longest_match_right < match_right then
longest_match, longest_match_right = lexer.rules[i], match_right
end
end
--
if longest_match then
if longest_match.token ~= LEXER_RULE_CONTINUE then
tokens[#tokens+1] = { text = str:sub(index, longest_match_right)
, left = offset + index
, right = offset + longest_match_right
, token = longest_match.token }
end
index = longest_match_right
end
index = index + 1
end
return tokens
end
local LEXER_MT = { __index = { lex = lex_string } }
local STRICT_MT = { __index = error, __newindex = error }
local function new_lexer (t)
assert(type(t) == 'table')
--
local rules = {}
local tokens = {}
--
for _, rule in ipairs(t) do
assert(type(rule[1]) == 'string', 'Pattern must be string!')
assert(type(rule[2]) == 'string' or rule[2] == LEXER_RULE_CONTINUE)
assert(not string.match('', rule[1]), 'Pattern must not match empty string!')
rules[#rules+1] = { pattern = '^'..rule[1], token = rule[2] }
end
return setmetatable({ rules = rules, tokens = setmetatable(tokens, STRICT_MT) }, LEXER_MT)
end
return setmetatable({ CONTINUE = LEXER_RULE_CONTINUE }, {__call = function(_, ...) return new_lexer(...) end})