2017-06-09 13:22:25 +00:00
|
|
|
|
2017-06-27 11:50:27 +00:00
|
|
|
local SUITE = require 'TestSuite' 'string_distance'
|
2017-06-09 13:22:25 +00:00
|
|
|
SUITE:setEnviroment {
|
|
|
|
levenshtein = require('string_distance').levenshtein,
|
|
|
|
longest_common_subsequence = require('string_distance').longest_common_subsequence,
|
|
|
|
jaccard_similarity_of_words = require('string_distance').jaccard_similarity_of_words,
|
|
|
|
strings_with_highest_similarity = require('string_distance').strings_with_highest_similarity,
|
|
|
|
}
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Levenshtein
|
|
|
|
|
|
|
|
SUITE:addTest('levenshtein example 1', function()
|
|
|
|
-- From: https://en.wikipedia.org/wiki/Levenshtein_distance
|
|
|
|
assert_equal( 3, levenshtein('kitten', 'sitting') )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('levenshtein example 2', function()
|
|
|
|
-- From: https://people.cs.pitt.edu/~kirk/cs1501/Pruhs/Spring2006/assignments/editdistance/Levenshtein%20Distance.htm
|
|
|
|
assert_equal( 2, levenshtein('gumbo', 'gambol') )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('levenshtein example 3', function()
|
|
|
|
-- From: https://secweb.cs.odu.edu/~zeil/cs361/web/website/Lectures/styles/pages/editdistance.html
|
|
|
|
assert_equal( 1, levenshtein('hello', 'jello') )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('levenshtein example 4', function()
|
|
|
|
-- From: https://secweb.cs.odu.edu/~zeil/cs361/web/website/Lectures/styles/pages/editdistance.html
|
|
|
|
assert_equal( 3, levenshtein('good', 'goodbye') )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('levenshtein identical strings have distance 0', function()
|
|
|
|
for _, word in pairs {'hello', 'kitten', 'sitting', 'jello', 'good'} do
|
|
|
|
assert_equal( 0, levenshtein(word, word) )
|
|
|
|
end
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('levenshtein normalizes to lowercase', function()
|
|
|
|
assert_equal( 3, levenshtein('kItten', 'sitTiNg') )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('levenshtein not defined for non-strings', function()
|
|
|
|
bad_call( levenshtein, 5, 'hi' )
|
|
|
|
bad_call( levenshtein, 'derp', {} )
|
|
|
|
end)
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Longest common subsequence
|
|
|
|
|
|
|
|
SUITE:addTest('subsequence example 1', function()
|
|
|
|
-- From: http://www.geeksforgeeks.org/dynamic-programming-set-4-longest-common-subsequence/
|
|
|
|
assert_equal( 4, longest_common_subsequence('AGGTAB', 'GXTXAYB') )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('subsequence example 2', function()
|
|
|
|
-- From: http://www.cs.cmu.edu/afs/cs/academic/class/15451-s15/LectureNotes/lecture04.pdf
|
|
|
|
assert_equal( 4, longest_common_subsequence('ABAZDC', 'BACBAD') )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('subsequence normalizes to lowercase', function()
|
|
|
|
assert_equal( 4, longest_common_subsequence('AGGtAB', 'GXTXAYb') )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('subsequence not defined for non-strings', function()
|
|
|
|
bad_call( longest_common_subsequence, 5, 'hi' )
|
|
|
|
bad_call( longest_common_subsequence, 'derp', {} )
|
|
|
|
end)
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- Jaccard Similarity
|
|
|
|
|
|
|
|
SUITE:addTest('jaccard example 1', function()
|
|
|
|
local input_1 = 'hello world'
|
|
|
|
local input_2 = 'hello planet'
|
|
|
|
assert_equal( 1/3, jaccard_similarity_of_words(input_1, input_2) )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('jaccard identical strings', function()
|
|
|
|
local input_1 = 'hello world'
|
|
|
|
local input_2 = 'hello world'
|
|
|
|
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('jaccard identical words', function()
|
|
|
|
local input_1 = 'hello world'
|
|
|
|
local input_2 = 'world hello'
|
|
|
|
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('jaccard CamelCase works', function()
|
|
|
|
local input_1 = 'HelloWorld'
|
|
|
|
local input_2 = 'hello world'
|
|
|
|
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('jaccard snake_case works', function()
|
|
|
|
local input_1 = 'hello_world'
|
|
|
|
local input_2 = 'worldHello'
|
|
|
|
assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) )
|
|
|
|
end)
|
|
|
|
|
|
|
|
SUITE:addTest('jaccard singlewords', function()
|
|
|
|
local input_1 = 'hello'
|
|
|
|
local input_2 = 'world'
|
|
|
|
assert_equal( 0, jaccard_similarity_of_words(input_1, input_2) )
|
|
|
|
end)
|
|
|
|
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
-- strings_with_highest_similarity
|
|
|
|
|
|
|
|
SUITE:addTest('strings_with_highest_similarity example 1', function()
|
|
|
|
local strings = { 'Ada Lovelace', 'Charles Babbage ', 'Allan Turing', 'Grace Hopper' }
|
|
|
|
local output = strings_with_highest_similarity('turning', strings)
|
|
|
|
assert_equal( 'Allan Turing', output[1] )
|
|
|
|
end)
|
|
|
|
|
2017-08-27 10:05:46 +00:00
|
|
|
SUITE:addTest('strings_with_highest_similarity not defined on non-strings', function()
|
|
|
|
local strings = { 'Ada Lovelace', 'Charles Babbage ', 'Allan Turing', 'Grace Hopper' }
|
|
|
|
bad_call(strings_with_highest_similarity, 132, strings)
|
|
|
|
local strings = { 'Ada Lovelace', 'Charles Babbage ', 'Allan Turing', 4, 'Grace Hopper' }
|
|
|
|
bad_call(strings_with_highest_similarity, 'turning', strings)
|
|
|
|
local strings = { 'Ada Lovelace', 'Charles Babbage ', 'Allan Turing', {}, 'Grace Hopper' }
|
|
|
|
bad_call(strings_with_highest_similarity, 'turning', strings)
|
|
|
|
end)
|
|
|
|
|
2017-06-09 13:22:25 +00:00
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
return SUITE
|