Initial implementation of errors library.
This commit is contained in:
commit
a75a33b85f
56
errors.lua
Normal file
56
errors.lua
Normal file
|
@ -0,0 +1,56 @@
|
|||
|
||||
local string_dist = require 'string_distance'
|
||||
|
||||
assert(debug and debug.getinfo)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Error handler
|
||||
|
||||
local ErrorHandler = setmetatable({}, {__call = function (c, ...) return c.new(...) end})
|
||||
ErrorHandler.__index = ErrorHandler
|
||||
|
||||
function ErrorHandler.new (module_name)
|
||||
return setmetatable({module_name = module_name, registered = {}}, ErrorHandler)
|
||||
end
|
||||
|
||||
function ErrorHandler:register (f)
|
||||
assert(type(self) == 'table')
|
||||
assert(type(f) == 'function')
|
||||
self.registered[f] = true
|
||||
end
|
||||
|
||||
local ERROR_MODES = { ['internal'] = true, ['external'] = true }
|
||||
|
||||
function ErrorHandler:getErrorFunc (mode, name)
|
||||
|
||||
-- Parameter fixing
|
||||
local mode = mode or 'internal'
|
||||
local name = name or (mode == 'external' and '') or mode
|
||||
if name ~= '' then name = '/'..name end
|
||||
|
||||
-- Error checking
|
||||
assert(ERROR_MODES[mode])
|
||||
assert(type(name) == 'string')
|
||||
|
||||
-- Create error funcs.
|
||||
if mode == 'internal' then
|
||||
return function (format_msg, ...)
|
||||
error(('[%s%s]: '..format_msg):format(self.module_name, name, ...), 2)
|
||||
end
|
||||
elseif mode == 'external' then
|
||||
return function (format_msg, ...)
|
||||
local level = 2
|
||||
while self.registered[debug.getinfo(level, 'f').func] do
|
||||
level = level + 1
|
||||
end
|
||||
error(('[%s%s]: '..format_msg):format(self.module_name, name, ...), level)
|
||||
end
|
||||
end
|
||||
assert(false)
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
return {
|
||||
ErrorHandler = ErrorHandler
|
||||
}
|
214
string_distance.lua
Normal file
214
string_distance.lua
Normal file
|
@ -0,0 +1,214 @@
|
|||
---- String Distance.lua ----
|
||||
-- A submodule of the `errors` library, with various string distance functions.
|
||||
-- Utilities for using these distance functions are also present.
|
||||
--
|
||||
-- Each distance function returns 3 values:
|
||||
-- * How similar the strings were.
|
||||
-- * The value of maximum similarity.
|
||||
-- * The value of least similarity.
|
||||
-- By using these values, it's possible to normalize.
|
||||
--
|
||||
-- The `levenshtein` function is based on: https://gist.github.com/james2doyle/e406180e143da3bdd102
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Utility functions
|
||||
|
||||
|
||||
local function split_string_into_words (str)
|
||||
-- TODO: Add unicode support
|
||||
-- TODO: Add support for splitting 'helloWorld' into {'hello', 'World'}
|
||||
|
||||
assert(str)
|
||||
|
||||
local words = {}
|
||||
for word in str:gmatch('[A-Z]?[a-z]*') do
|
||||
if #word > 0 then words[#words+1] = word:lower() end
|
||||
end
|
||||
return words
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Similarity metrics
|
||||
|
||||
local function levenshtein (str1, str2)
|
||||
-- levenshtein(str1, str2)
|
||||
--
|
||||
-- Calculates the amount of 'inserts', 'removals' or 'substitutions'
|
||||
-- required to transform `str1` into `str2`, and vice versa.
|
||||
-- Note that the strings are automatically converted to lowercase.
|
||||
--
|
||||
-- Lower numbers denote more similar strings.
|
||||
--
|
||||
-- Adapted from the C version given at: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance
|
||||
|
||||
-- Error handeling.
|
||||
assert(type(str1) == 'string')
|
||||
assert(type(str2) == 'string')
|
||||
|
||||
|
||||
-- Do work
|
||||
local str1, str2 = str1:lower(), str2:lower()
|
||||
local len1, len2 = #str1, #str2
|
||||
|
||||
-- Quick cut-offs to save time
|
||||
if len1 == 0 then
|
||||
return len2, math.abs(len1 - len2), math.max(len1, len2)
|
||||
elseif len2 == 0 then
|
||||
return len1, math.abs(len1 - len2), math.max(len1, len2)
|
||||
elseif str1 == str2 then
|
||||
return 0, math.abs(len1 - len2), math.max(len1, len2)
|
||||
end
|
||||
|
||||
-- Init column
|
||||
local column = {}
|
||||
for y = 1, len1 do column[y] = y end
|
||||
|
||||
-- Algorithm
|
||||
for x = 1, len2 do
|
||||
column[0] = x
|
||||
local lastdiag, olddiag = x - 1, nil
|
||||
for y = 1, len1 do
|
||||
olddiag = column[y]
|
||||
column[y] = math.min(column[y] + 1, column[y-1] + 1, lastdiag + (str1:byte(y-1) == str2:byte(x-1) and 0 or 1))
|
||||
lastdiag = olddiag
|
||||
end
|
||||
end
|
||||
|
||||
-- Return the last value - this is the Levenshtein distance
|
||||
return column[len1], math.abs(len1 - len2), math.max(len1, len2)
|
||||
end
|
||||
|
||||
local function longest_common_subsequence (str1, str2)
|
||||
-- longest_common_subsequence(str1, str2)
|
||||
--
|
||||
-- Calculates the longest common subsequence, of the two input strings.
|
||||
-- That is the maximum amount of characters who fit into the same places in
|
||||
-- both strings, with possible characters in betweens. This is not the same
|
||||
-- as longest common substring.
|
||||
-- Note that the strings are automatically converted to lowercase.
|
||||
--
|
||||
-- Higher numbers denote more similar strings.
|
||||
|
||||
-- Error handeling.
|
||||
assert(type(str1) == 'string')
|
||||
assert(type(str2) == 'string')
|
||||
|
||||
-- Do work
|
||||
local str1, str2 = str1:lower(), str2:lower()
|
||||
local len1, len2 = #str1, #str2
|
||||
|
||||
-- Quick cut-offs to save time
|
||||
if str1 == str2 then
|
||||
return len1, len1, 0
|
||||
elseif len1 == 0 or len2 == 0 then
|
||||
return 0, math.max(len1, len2), 0
|
||||
end
|
||||
|
||||
-- Init C
|
||||
local matrix = {}
|
||||
for i = 0, len1 do matrix[i] = {[0] = 0} end
|
||||
for j = 0, len2 do matrix[0][j] = 0 end
|
||||
|
||||
-- Fill up table
|
||||
for i = 1, len1 do
|
||||
for j = 1, len2 do
|
||||
matrix[i][j] = (str1:byte(i) == str2:byte(j)) and (matrix[i-1][j-1] + 1) or math.max(matrix[i][j-1], matrix[i-1][j])
|
||||
end
|
||||
end
|
||||
|
||||
-- Return
|
||||
return matrix[len1][len2], math.max(len1, len2), 0
|
||||
end
|
||||
|
||||
local function jaccard_similarity_of_words (str1, str2)
|
||||
-- jaccard_similarity_of_words(str1, str2)
|
||||
--
|
||||
-- Calculates the jaccard similarity of the words in the strings.
|
||||
--
|
||||
-- Higher numbers denote more similar strings. At 1 the strings contain
|
||||
-- exactly the same words.
|
||||
|
||||
-- Error handeling.
|
||||
assert(type(str1) == 'string')
|
||||
assert(type(str2) == 'string')
|
||||
|
||||
-- Quick cut-offs to save time
|
||||
if str1:lower() == str2:lower() or str1 == '' and str2 == '' then
|
||||
return 1, 1, 0
|
||||
elseif str1 == '' or str2 == '' then
|
||||
return 0, 1, 0
|
||||
end
|
||||
|
||||
-- Work work
|
||||
local words1, words2, all = {}, {}, {}, {}
|
||||
for _, word in ipairs(split_string_into_words(str1)) do
|
||||
words1[word], all[word] = true, true
|
||||
end
|
||||
for _, word in ipairs(split_string_into_words(str2)) do
|
||||
words2[word], all[word] = true, true
|
||||
end
|
||||
|
||||
-- Which words are in common?
|
||||
local num_in_common, num_words_in_total = 0, 0
|
||||
for word, _ in pairs(all) do
|
||||
num_words_in_total = num_words_in_total + 1
|
||||
if words1[word] and words2[word] then num_in_common = num_in_common + 1 end
|
||||
end
|
||||
|
||||
-- Return similarity
|
||||
return num_in_common/num_words_in_total, 1, 0
|
||||
end
|
||||
|
||||
local SIMILARITY_METRICS = {
|
||||
levenshtein,
|
||||
longest_common_subsequence,
|
||||
jaccard_similarity_of_words,
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
local function strings_with_highest_similarity (str, list_of_other_str)
|
||||
-- strings_with_highest_similarity(str, list)
|
||||
--
|
||||
-- Returns a new list, sorted by comparing the strings in the list to the
|
||||
-- predefined string, sorted in descending order, eg. the first elements in
|
||||
-- the output list is the most similar.
|
||||
|
||||
-- Error checking
|
||||
assert(type(str) == 'string')
|
||||
assert(type(list_of_other_str) == 'table')
|
||||
for i = 1, #list_of_other_str do assert(type(list_of_other_str[i]) == 'string') end
|
||||
|
||||
-- Do work
|
||||
local possible = {}
|
||||
|
||||
-- Calculate similarity metrics
|
||||
for _, other_str in ipairs(list_of_other_str) do
|
||||
local total_sim = 0
|
||||
--print(other_str)
|
||||
for _, similarity_func in ipairs(SIMILARITY_METRICS) do
|
||||
local sim, max_sim, min_sim = similarity_func(str, other_str)
|
||||
assert(max_sim ~= min_sim)
|
||||
total_sim = total_sim + (sim-min_sim)/(max_sim-min_sim)
|
||||
--print('', sim, (sim-min_sim)/(max_sim-min_sim))
|
||||
end
|
||||
possible[#possible+1] = {other_str, total_sim}
|
||||
--print('\tTotal: '.. total_sim)
|
||||
end
|
||||
|
||||
-- Sort and flatten
|
||||
table.sort(possible, function(a, b) return a[2] > b[2] end)
|
||||
for i = 1, #possible do possible[i] = possible[i][1] end
|
||||
|
||||
-- Return the sorted list
|
||||
return possible
|
||||
end
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
return {
|
||||
levenshtein = levenshtein,
|
||||
longest_common_subsequence = longest_common_subsequence,
|
||||
jaccard_similarity_of_words = jaccard_similarity_of_words,
|
||||
strings_with_highest_similarity = strings_with_highest_similarity,
|
||||
}
|
71
test/test_errors.lua
Normal file
71
test/test_errors.lua
Normal file
|
@ -0,0 +1,71 @@
|
|||
|
||||
local error_handler = require('errors').ErrorHandler 'errors_test'
|
||||
|
||||
local SUITE = require('TestSuite').new('errors')
|
||||
SUITE:setEnviroment {
|
||||
error_handler = error_handler,
|
||||
external_error = error_handler:getErrorFunc 'external',
|
||||
internal_error = error_handler:getErrorFunc 'internal',
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Basic errors functionallity
|
||||
|
||||
SUITE:addTest('errors basic functionallity', function()
|
||||
local func_a = function () external_error 'Hello World' end
|
||||
local func_b, func_b_line = function () func_a() end, curline()
|
||||
error_handler:register(func_a)
|
||||
|
||||
local status, error_msg = pcall(func_b)
|
||||
assert_equal(false, status)
|
||||
local expected_error = './test/test_errors.lua:'..func_b_line..': [errors_test]: Hello World'
|
||||
assert_equal(expected_error, error_msg)
|
||||
end)
|
||||
|
||||
SUITE:addTest('errors deep nesting works', function()
|
||||
local func_a = function () external_error 'Hello World' end
|
||||
local func_b, func_b_line = function () func_a() end, curline()
|
||||
local func_c, func_c_line = function () func_b() end, curline()
|
||||
error_handler:register(func_a)
|
||||
error_handler:register(func_b)
|
||||
|
||||
local status, error_msg = pcall(func_c)
|
||||
assert_equal(false, status)
|
||||
local expected_error = './test/test_errors.lua:'..func_c_line..': [errors_test]: Hello World'
|
||||
assert_equal(expected_error, error_msg)
|
||||
end)
|
||||
|
||||
SUITE:addTest('internal errors also works', function()
|
||||
local func_a, func_a_line = function () internal_error 'Hello World' end, curline()
|
||||
local func_b, func_b_line = function () func_a() end, curline()
|
||||
local func_c, func_c_line = function () func_b() end, curline()
|
||||
error_handler:register(func_a)
|
||||
error_handler:register(func_b)
|
||||
|
||||
local status, error_msg = pcall(func_c)
|
||||
assert_equal(false, status)
|
||||
local expected_error = './test/test_errors.lua:'..func_a_line..': [errors_test/internal]: Hello World'
|
||||
assert_equal(expected_error, error_msg)
|
||||
end)
|
||||
|
||||
SUITE:addTest('errors modes can be overwritten', function()
|
||||
local extra_error = error_handler:getErrorFunc('external', 'extra')
|
||||
local func, func_line = function () extra_error 'Hi' end, curline()
|
||||
|
||||
local status, error_msg = pcall(func)
|
||||
local expected_error = './test/test_errors.lua:'..func_line..': [errors_test/extra]: Hi'
|
||||
assert_equal(expected_error, error_msg)
|
||||
end)
|
||||
|
||||
SUITE:addTest('errors modes can be overwritten, including internal', function()
|
||||
local extra_error = error_handler:getErrorFunc('internal', 'extra')
|
||||
local func, func_line = function () extra_error 'Hi' end, curline()
|
||||
|
||||
local status, error_msg = pcall(func)
|
||||
local expected_error = './test/test_errors.lua:'..func_line..': [errors_test/extra]: Hi'
|
||||
assert_equal(expected_error, error_msg)
|
||||
end)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
return SUITE
|
121
test/test_string_distance.lua
Normal file
121
test/test_string_distance.lua
Normal file
|
@ -0,0 +1,121 @@
|
|||
|
||||
local SUITE = require('TestSuite').new('string_distance')
|
||||
SUITE:setEnviroment {
|
||||
levenshtein = require('string_distance').levenshtein,
|
||||
longest_common_subsequence = require('string_distance').longest_common_subsequence,
|
||||
jaccard_similarity_of_words = require('string_distance').jaccard_similarity_of_words,
|
||||
strings_with_highest_similarity = require('string_distance').strings_with_highest_similarity,
|
||||
}
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Levenshtein
|
||||
|
||||
SUITE:addTest('levenshtein example 1', function()
|
||||
-- From: https://en.wikipedia.org/wiki/Levenshtein_distance
|
||||
assert_equal( 3, levenshtein('kitten', 'sitting') )
|
||||
end)
|
||||
|
||||
SUITE:addTest('levenshtein example 2', function()
|
||||
-- From: https://people.cs.pitt.edu/~kirk/cs1501/Pruhs/Spring2006/assignments/editdistance/Levenshtein%20Distance.htm
|
||||
assert_equal( 2, levenshtein('gumbo', 'gambol') )
|
||||
end)
|
||||
|
||||
SUITE:addTest('levenshtein example 3', function()
|
||||
-- From: https://secweb.cs.odu.edu/~zeil/cs361/web/website/Lectures/styles/pages/editdistance.html
|
||||
assert_equal( 1, levenshtein('hello', 'jello') )
|
||||
end)
|
||||
|
||||
SUITE:addTest('levenshtein example 4', function()
|
||||
-- From: https://secweb.cs.odu.edu/~zeil/cs361/web/website/Lectures/styles/pages/editdistance.html
|
||||
assert_equal( 3, levenshtein('good', 'goodbye') )
|
||||
end)
|
||||
|
||||
SUITE:addTest('levenshtein identical strings have distance 0', function()
|
||||
for _, word in pairs {'hello', 'kitten', 'sitting', 'jello', 'good'} do
|
||||
assert_equal( 0, levenshtein(word, word) )
|
||||
end
|
||||
end)
|
||||
|
||||
SUITE:addTest('levenshtein normalizes to lowercase', function()
|
||||
assert_equal( 3, levenshtein('kItten', 'sitTiNg') )
|
||||
end)
|
||||
|
||||
SUITE:addTest('levenshtein not defined for non-strings', function()
|
||||
bad_call( levenshtein, 5, 'hi' )
|
||||
bad_call( levenshtein, 'derp', {} )
|
||||
end)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Longest common subsequence
|
||||
|
||||
SUITE:addTest('subsequence example 1', function()
|
||||
-- From: http://www.geeksforgeeks.org/dynamic-programming-set-4-longest-common-subsequence/
|
||||
assert_equal( 4, longest_common_subsequence('AGGTAB', 'GXTXAYB') )
|
||||
end)
|
||||
|
||||
SUITE:addTest('subsequence example 2', function()
|
||||
-- From: http://www.cs.cmu.edu/afs/cs/academic/class/15451-s15/LectureNotes/lecture04.pdf
|
||||
assert_equal( 4, longest_common_subsequence('ABAZDC', 'BACBAD') )
|
||||
end)
|
||||
|
||||
SUITE:addTest('subsequence normalizes to lowercase', function()
|
||||
assert_equal( 4, longest_common_subsequence('AGGtAB', 'GXTXAYb') )
|
||||
end)
|
||||
|
||||
SUITE:addTest('subsequence not defined for non-strings', function()
|
||||
bad_call( longest_common_subsequence, 5, 'hi' )
|
||||
bad_call( longest_common_subsequence, 'derp', {} )
|
||||
end)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- Jaccard Similarity
|
||||
|
||||
SUITE:addTest('jaccard example 1', function()
|
||||
local input_1 = 'hello world'
|
||||
local input_2 = 'hello planet'
|
||||
assert_equal( 1/3, jaccard_similarity_of_words(input_1, input_2) )
|
||||
end)
|
||||
|
||||
SUITE:addTest('jaccard identical strings', function()
|
||||
local input_1 = 'hello world'
|
||||
local input_2 = 'hello world'
|
||||
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
|
||||
end)
|
||||
|
||||
SUITE:addTest('jaccard identical words', function()
|
||||
local input_1 = 'hello world'
|
||||
local input_2 = 'world hello'
|
||||
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
|
||||
end)
|
||||
|
||||
SUITE:addTest('jaccard CamelCase works', function()
|
||||
local input_1 = 'HelloWorld'
|
||||
local input_2 = 'hello world'
|
||||
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
|
||||
end)
|
||||
|
||||
SUITE:addTest('jaccard snake_case works', function()
|
||||
local input_1 = 'hello_world'
|
||||
local input_2 = 'worldHello'
|
||||
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
|
||||
end)
|
||||
|
||||
SUITE:addTest('jaccard singlewords', function()
|
||||
local input_1 = 'hello'
|
||||
local input_2 = 'world'
|
||||
assert_equal( 0, jaccard_similarity_of_words(input_1, input_2) )
|
||||
end)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
-- strings_with_highest_similarity
|
||||
|
||||
SUITE:addTest('strings_with_highest_similarity example 1', function()
|
||||
local strings = { 'Ada Lovelace', 'Charles Babbage ', 'Allan Turing', 'Grace Hopper' }
|
||||
local output = strings_with_highest_similarity('turning', strings)
|
||||
assert_equal( 'Allan Turing', output[1] )
|
||||
end)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
|
||||
return SUITE
|
7
test/tests.lua
Normal file
7
test/tests.lua
Normal file
|
@ -0,0 +1,7 @@
|
|||
|
||||
package.path = package.path .. ';./test/?.lua;./src/?.lua'
|
||||
|
||||
local TEST_SUITE = require("TestSuite").new('errors')
|
||||
TEST_SUITE:addModules('test/test_*')
|
||||
TEST_SUITE:setOptions(...)
|
||||
TEST_SUITE:runTests()
|
Loading…
Reference in New Issue
Block a user