From 19a77553065a67a5ff5046cbe55b2e4b7165177d Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Fri, 8 Jun 2018 14:46:30 +0200 Subject: [PATCH] Added support for wikipedia, and move internet interaction into new module. --- internet.lua | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++ main.lua | 78 +++-------------------------- 2 files changed, 145 insertions(+), 71 deletions(-) create mode 100644 internet.lua diff --git a/internet.lua b/internet.lua new file mode 100644 index 0000000..a484d76 --- /dev/null +++ b/internet.lua @@ -0,0 +1,138 @@ + +local https = require 'ssl.https' + +local internet = {} + +-------------------------------------------------------------------------------- +-- Util + +local SCANDI_SYMBOLS = { 'æ', 'Æ', 'ø', 'Ø', 'å', 'Å' } + +local function string_contains_scandi (str) + for _, symbol in ipairs(SCANDI_SYMBOLS) do + if topic:match(symbol) then return true end + end + return false +end + +-------------------------------------------------------------------------------- + +local function search_clearbit_for_logo (topic) + if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then + return nil, 'Bad topic: '..tostring(topic) + elseif string_contains_scandi(topic) then + return nil, 'Clearbit does not like æøå: '..tostring(topic) + end + -- + for _, domain in ipairs { 'org', 'com', 'net', 'dk' } do + local search_url = ('https://logo-core.clearbit.com/%s.%s'):format(topic, domain) + local _, code, headers, status = https.request { url = search_url, method = 'HEAD' } + if code == 200 then return search_url end + end +end + +local function search_splashbase_for_image_topic (topic) + if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then + return nil, 'Bad topic: '..tostring(topic) + elseif string_contains_scandi(topic) then + return nil, 'Splashbase does not like æøå: '..tostring(topic) + end + + local search_url = string.format('http://www.splashbase.co/api/v1/images/search?query=%s', topic:gsub('%s', '%%20')) + local body, code, headers, status = https.request(search_url) + if not body then error(code) end + local data = json.decode(body) + + if not data then return nil, 'JSON could not decode data for '..topic end + if #data.images <= 0 then return nil, 'Query returned no data for '..topic end + + local img_url = data.images[math.random(#data.images)].url + assert(type(img_url) == 'string') + return img_url +end + +local WIKIPEDIA_API_URL = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=pageimages&format=json&piprop=original&redirects=1' + +local function search_wikipedia_for_images (topics, language, topic_to_image_url) + if type(topics) == 'string' then topics = { topics } end + local language = language or 'en' + local topic_to_image_url = topic_to_image_url or {} + -- + local titles_field = table.concat(topics, '|'):gsub('%s+', '%%20') + + local body, code, headers, status = https.request(WIKIPEDIA_API_URL:format(language, titles_field)) + if not body then error(code) end + local data = json.decode(body) + + if not data then return {}, 'JSON could not decode data from wikipedia for '..titles_field end + + -- Determine if some topic was redirected + local redirected_topics = {} + for _, redirect in pairs(data.query.normalized or {}) do + redirected_topics[ redirect.to ] = redirected_topics[ redirect.from ] or redirect.from + end + for _, redirect in pairs(data.query.redirects or {}) do + redirected_topics[ redirect.to ] = redirected_topics[ redirect.from ] or redirect.from + end + + -- Determine topic to image + for _, page in pairs(data.query.pages) do + local orig_title = redirected_topics[ page.title ] + if not topic_to_image_url[orig_title] then + local found_url = false + if page.original then found_url = page.original.source end + topic_to_image_url[orig_title] = found_url + end + end + --- + return topic_to_image_url +end + +local function all_topics_has_image (topic_to_image_url) + for _, url in pairs(topic_to_image_url) do + if not url then return false end + end + return true +end + +function internet.search_images (topics) + assert(type(topics) == 'table') + if #topics == 0 then return {} end + local topic_to_image_url = {} + -- Wikipedia + search_wikipedia_for_images(topics, 'da', topic_to_image_url) + if not all_topics_has_image(topic_to_image_url) then return topic_to_image_url end + search_wikipedia_for_images(topics, 'en', topic_to_image_url) + -- Logoes + for topic, val in pairs(topic_to_image_url) do + if not val then + topic_to_image_url[topic] = search_clearbit_for_logo(topic:lower()) + end + end + -- Stock photoes + for topic, val in pairs(topic_to_image_url) do + if not val then + topic_to_image_url[topic] = search_splashbase_for_image_topic(topic:lower()) + end + end + -- Ret + return topic_to_image_url +end + +-------------------------------------------------------------------------------- + +function internet.download_file (url, filename) + -- retrieve the content of a URL + local body, code = https.request(url) + if not body then error(code) end + + -- save the content to a file + local f = assert(io.open(filename, 'wb')) -- open in "binary" mode + f:write(body) + f:close() +end + +-------------------------------------------------------------------------------- + +return internet + diff --git a/main.lua b/main.lua index e23697d..03b457f 100644 --- a/main.lua +++ b/main.lua @@ -28,7 +28,6 @@ local FARVEL_INTERVAL = 90 local imlib = require 'imlib2' require 'socket' -local https = require 'ssl.https' local json = require 'json' @@ -41,6 +40,8 @@ local signal do if a then signal = b end end +local internet = require 'internet' + -------------------------------------------------------------------------------- -- Meme utils @@ -61,55 +62,11 @@ local function flatten_onto(target_img, other_img, x0, y0) return end -local function search_clearbit_for_logo (topic) - if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then - return nil, 'Bad topic: '..tostring(topic) - elseif topic:match 'æ' or topic:match 'ø' or topic:match 'å' then - return nil, 'Splashbase does not like æøå: '..tostring(topic) - end - -- - for _, domain in ipairs { 'org', 'com', 'net', 'dk' } do - local search_url = ('https://logo-core.clearbit.com/%s.%s'):format(topic, domain) - local _, code, headers, status = https.request { url = search_url, method = 'HEAD' } - if code == 200 then return search_url end - end -end - -local function search_splashbase_for_image_topic (topic) - if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then - return nil, 'Bad topic: '..tostring(topic) - elseif topic:match 'æ' or topic:match 'ø' or topic:match 'å' then - return nil, 'Splashbase does not like æøå: '..tostring(topic) - end - - local search_url = string.format('http://www.splashbase.co/api/v1/images/search?query=%s', topic:gsub('%s', '%%20')) - local body, code, headers, status = https.request(search_url) - if not body then error(code) end - local data = json.decode(body) - - if not data then return nil, 'JSON could not decode data for '..topic end - if #data.images <= 0 then return nil, 'Query returned no data for '..topic end - - local img_url = data.images[math.random(#data.images)].url - assert(type(img_url) == 'string') - return img_url -end - -local function download_file (url, filename) - -- retrieve the content of a URL - local body, code = https.request(url) - if not body then error(code) end - - -- save the content to a file - local f = assert(io.open(filename, 'wb')) -- open in "binary" mode - f:write(body) - f:close() -end - local function clean_text (text) return text:gsub('%s+', ' '):match('^%s*(.-)%s*$') end + -------------------------------------------------------------------------------- -- Meme creations @@ -160,37 +117,16 @@ local function load_random_font () return font_name end -local CHANCE_OF_GUARENTEED_IMAGE_SEARCH = 0.05 -local IMAGE_CHANCE = 0.5 -local SCANDI_SYMBOLS = { 'æ', 'Æ', 'ø', 'Ø', 'å', 'Å' } - - -local function should_look_for_images (topic) - assert(type(topic) == 'string') - if math.random() < CHANCE_OF_GUARENTEED_IMAGE_SEARCH then return true end - -- - if #topic < 2 then return false end - -- - for _, symbol in ipairs(SCANDI_SYMBOLS) do - if topic:match(symbol) then return false end - end - -- - return math.random() < IMAGE_CHANCE -end - local function fill_in_topics_information (topics) assert(type(topics) == 'table') -- + local topic_to_image_url = internet.search_images(topics) + -- local new_topics = {} for i, topic in ipairs(topics) do assert(type(topic) == 'string') - local topic_l = topic:lower() - local url, msg - if should_look_for_images(topic_l) then - if not url then url, msg = search_clearbit_for_logo (topic_l) end - if not url then url, msg = search_splashbase_for_image_topic(topic_l) end - end + local url = topic_to_image_url[topic] if url then new_topics[i] = { topic = topic, type = 'image', url = url } else new_topics[i] = { topic = topic, type = 'text', text = topic } @@ -206,7 +142,7 @@ local function paste_topic_onto_image (target, topic, x, y, w, h, bg_color, font -- Download and paste found image if topic.type == 'image' then local url, filename = topic.url, CONFIG.IMGGEN_PATH_OUTPUT..'topic_'..topic.topic..'.png' - download_file(url, filename) + internet.download_file(url, filename) local found_img = imlib.image.load(filename) found_img:crop_and_scale(0, 0, found_img:get_width(), found_img:get_height(), w, h) flatten_onto (target, found_img, x, y)