local SUITE = require 'TestSuite' 'string_distance' SUITE:setEnviroment { levenshtein = require('string_distance').levenshtein, longest_common_subsequence = require('string_distance').longest_common_subsequence, jaccard_similarity_of_words = require('string_distance').jaccard_similarity_of_words, strings_with_highest_similarity = require('string_distance').strings_with_highest_similarity, } -------------------------------------------------------------------------------- -- Levenshtein SUITE:addTest('levenshtein example 1', function() -- From: https://en.wikipedia.org/wiki/Levenshtein_distance assert_equal( 3, levenshtein('kitten', 'sitting') ) end) SUITE:addTest('levenshtein example 2', function() -- From: https://people.cs.pitt.edu/~kirk/cs1501/Pruhs/Spring2006/assignments/editdistance/Levenshtein%20Distance.htm assert_equal( 2, levenshtein('gumbo', 'gambol') ) end) SUITE:addTest('levenshtein example 3', function() -- From: https://secweb.cs.odu.edu/~zeil/cs361/web/website/Lectures/styles/pages/editdistance.html assert_equal( 1, levenshtein('hello', 'jello') ) end) SUITE:addTest('levenshtein example 4', function() -- From: https://secweb.cs.odu.edu/~zeil/cs361/web/website/Lectures/styles/pages/editdistance.html assert_equal( 3, levenshtein('good', 'goodbye') ) end) SUITE:addTest('levenshtein identical strings have distance 0', function() for _, word in pairs {'hello', 'kitten', 'sitting', 'jello', 'good'} do assert_equal( 0, levenshtein(word, word) ) end end) SUITE:addTest('levenshtein normalizes to lowercase', function() assert_equal( 3, levenshtein('kItten', 'sitTiNg') ) end) SUITE:addTest('levenshtein not defined for non-strings', function() bad_call( levenshtein, 5, 'hi' ) bad_call( levenshtein, 'derp', {} ) end) -------------------------------------------------------------------------------- -- Longest common subsequence SUITE:addTest('subsequence example 1', function() -- From: http://www.geeksforgeeks.org/dynamic-programming-set-4-longest-common-subsequence/ assert_equal( 4, longest_common_subsequence('AGGTAB', 'GXTXAYB') ) end) SUITE:addTest('subsequence example 2', function() -- From: http://www.cs.cmu.edu/afs/cs/academic/class/15451-s15/LectureNotes/lecture04.pdf assert_equal( 4, longest_common_subsequence('ABAZDC', 'BACBAD') ) end) SUITE:addTest('subsequence normalizes to lowercase', function() assert_equal( 4, longest_common_subsequence('AGGtAB', 'GXTXAYb') ) end) SUITE:addTest('subsequence not defined for non-strings', function() bad_call( longest_common_subsequence, 5, 'hi' ) bad_call( longest_common_subsequence, 'derp', {} ) end) -------------------------------------------------------------------------------- -- Jaccard Similarity SUITE:addTest('jaccard example 1', function() local input_1 = 'hello world' local input_2 = 'hello planet' assert_equal( 1/3, jaccard_similarity_of_words(input_1, input_2) ) end) SUITE:addTest('jaccard identical strings', function() local input_1 = 'hello world' local input_2 = 'hello world' assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) ) end) SUITE:addTest('jaccard identical words', function() local input_1 = 'hello world' local input_2 = 'world hello' assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) ) end) SUITE:addTest('jaccard CamelCase works', function() local input_1 = 'HelloWorld' local input_2 = 'hello world' assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) ) end) SUITE:addTest('jaccard snake_case works', function() local input_1 = 'hello_world' local input_2 = 'worldHello' assert_equal( 1, jaccard_similarity_of_words(input_1, input_2) ) end) SUITE:addTest('jaccard singlewords', function() local input_1 = 'hello' local input_2 = 'world' assert_equal( 0, jaccard_similarity_of_words(input_1, input_2) ) end) -------------------------------------------------------------------------------- -- strings_with_highest_similarity SUITE:addTest('strings_with_highest_similarity example 1', function() local strings = { 'Ada Lovelace', 'Charles Babbage ', 'Allan Turing', 'Grace Hopper' } local output = strings_with_highest_similarity('turning', strings) assert_equal( 'Allan Turing', output[1] ) end) -------------------------------------------------------------------------------- return SUITE