Jon Michael Aanes
aec232efcb
that automatically creates a useful error message, when the assert fails.
55 lines
2.0 KiB
Lua
55 lines
2.0 KiB
Lua
|
|
local LEXER_RULE_CONTINUE = {}
|
|
|
|
local function lex_string (lexer, str, offset)
|
|
offset = offset or 0
|
|
if type(lexer) ~= 'table' then error(('[Lexer]: Bad argument #1, expected table, but got %s (%s).' ):format(lexer, type(lexer))) end
|
|
if type(str) ~= 'string' then error(('[Lexer]: Bad argument #2, expected string, but got %s (%s).' ):format(str, type(str))) end
|
|
if type(offset) ~= 'number' then error(('[Lexer]: Bad argument #3, expected number, but got %s (%s).' ):format(offset, type(offset))) end
|
|
local index, tokens = 1, {}
|
|
while index <= #str do
|
|
local longest_match, longest_match_right = nil, index - 1
|
|
for i = 1, #lexer.rules do
|
|
local _, match_right = string.find(str, lexer.rules[i].pattern, index)
|
|
if match_right and longest_match_right < match_right then
|
|
longest_match, longest_match_right = lexer.rules[i], match_right
|
|
end
|
|
end
|
|
--
|
|
if longest_match then
|
|
if longest_match.token ~= LEXER_RULE_CONTINUE then
|
|
tokens[#tokens+1] = { text = str:sub(index, longest_match_right)
|
|
, left = offset + index
|
|
, right = offset + longest_match_right
|
|
, token = longest_match.token }
|
|
end
|
|
index = longest_match_right
|
|
end
|
|
index = index + 1
|
|
end
|
|
|
|
return tokens
|
|
end
|
|
|
|
local LEXER_MT = { __index = { lex = lex_string } }
|
|
local STRICT_MT = { __index = error, __newindex = error }
|
|
|
|
local function new_lexer (t)
|
|
|
|
assert(type(t) == 'table')
|
|
--
|
|
local rules = {}
|
|
local tokens = {}
|
|
--
|
|
for _, rule in ipairs(t) do
|
|
assert(type(rule[1]) == 'string', 'Pattern must be string!')
|
|
assert(type(rule[2]) == 'string' or rule[2] == LEXER_RULE_CONTINUE)
|
|
assert(not string.match('', rule[1]), 'Pattern must not match empty string!')
|
|
rules[#rules+1] = { pattern = '^'..rule[1], token = rule[2] }
|
|
end
|
|
|
|
return setmetatable({ rules = rules, tokens = setmetatable(tokens, STRICT_MT) }, LEXER_MT)
|
|
end
|
|
|
|
return setmetatable({ CONTINUE = LEXER_RULE_CONTINUE }, {__call = function(_, ...) return new_lexer(...) end})
|