1
0
errors/test/test_string_distance.lua

122 lines
4.2 KiB
Lua
Raw Normal View History

local SUITE = require('TestSuite').new('string_distance')
SUITE:setEnviroment {
levenshtein = require('string_distance').levenshtein,
longest_common_subsequence = require('string_distance').longest_common_subsequence,
jaccard_similarity_of_words = require('string_distance').jaccard_similarity_of_words,
strings_with_highest_similarity = require('string_distance').strings_with_highest_similarity,
}
--------------------------------------------------------------------------------
-- Levenshtein
SUITE:addTest('levenshtein example 1', function()
-- From: https://en.wikipedia.org/wiki/Levenshtein_distance
assert_equal( 3, levenshtein('kitten', 'sitting') )
end)
SUITE:addTest('levenshtein example 2', function()
-- From: https://people.cs.pitt.edu/~kirk/cs1501/Pruhs/Spring2006/assignments/editdistance/Levenshtein%20Distance.htm
assert_equal( 2, levenshtein('gumbo', 'gambol') )
end)
SUITE:addTest('levenshtein example 3', function()
-- From: https://secweb.cs.odu.edu/~zeil/cs361/web/website/Lectures/styles/pages/editdistance.html
assert_equal( 1, levenshtein('hello', 'jello') )
end)
SUITE:addTest('levenshtein example 4', function()
-- From: https://secweb.cs.odu.edu/~zeil/cs361/web/website/Lectures/styles/pages/editdistance.html
assert_equal( 3, levenshtein('good', 'goodbye') )
end)
SUITE:addTest('levenshtein identical strings have distance 0', function()
for _, word in pairs {'hello', 'kitten', 'sitting', 'jello', 'good'} do
assert_equal( 0, levenshtein(word, word) )
end
end)
SUITE:addTest('levenshtein normalizes to lowercase', function()
assert_equal( 3, levenshtein('kItten', 'sitTiNg') )
end)
SUITE:addTest('levenshtein not defined for non-strings', function()
bad_call( levenshtein, 5, 'hi' )
bad_call( levenshtein, 'derp', {} )
end)
--------------------------------------------------------------------------------
-- Longest common subsequence
SUITE:addTest('subsequence example 1', function()
-- From: http://www.geeksforgeeks.org/dynamic-programming-set-4-longest-common-subsequence/
assert_equal( 4, longest_common_subsequence('AGGTAB', 'GXTXAYB') )
end)
SUITE:addTest('subsequence example 2', function()
-- From: http://www.cs.cmu.edu/afs/cs/academic/class/15451-s15/LectureNotes/lecture04.pdf
assert_equal( 4, longest_common_subsequence('ABAZDC', 'BACBAD') )
end)
SUITE:addTest('subsequence normalizes to lowercase', function()
assert_equal( 4, longest_common_subsequence('AGGtAB', 'GXTXAYb') )
end)
SUITE:addTest('subsequence not defined for non-strings', function()
bad_call( longest_common_subsequence, 5, 'hi' )
bad_call( longest_common_subsequence, 'derp', {} )
end)
--------------------------------------------------------------------------------
-- Jaccard Similarity
SUITE:addTest('jaccard example 1', function()
local input_1 = 'hello world'
local input_2 = 'hello planet'
assert_equal( 1/3, jaccard_similarity_of_words(input_1, input_2) )
end)
SUITE:addTest('jaccard identical strings', function()
local input_1 = 'hello world'
local input_2 = 'hello world'
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
end)
SUITE:addTest('jaccard identical words', function()
local input_1 = 'hello world'
local input_2 = 'world hello'
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
end)
SUITE:addTest('jaccard CamelCase works', function()
local input_1 = 'HelloWorld'
local input_2 = 'hello world'
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
end)
SUITE:addTest('jaccard snake_case works', function()
local input_1 = 'hello_world'
local input_2 = 'worldHello'
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
end)
SUITE:addTest('jaccard singlewords', function()
local input_1 = 'hello'
local input_2 = 'world'
assert_equal( 0, jaccard_similarity_of_words(input_1, input_2) )
end)
--------------------------------------------------------------------------------
-- strings_with_highest_similarity
SUITE:addTest('strings_with_highest_similarity example 1', function()
local strings = { 'Ada Lovelace', 'Charles Babbage ', 'Allan Turing', 'Grace Hopper' }
local output = strings_with_highest_similarity('turning', strings)
assert_equal( 'Allan Turing', output[1] )
end)
--------------------------------------------------------------------------------
return SUITE