Added support for wikipedia, and move internet interaction into new module.

This commit is contained in:
Jon Michael Aanes 2018-06-08 14:46:30 +02:00
parent 5753c04bcd
commit 19a7755306
2 changed files with 145 additions and 71 deletions

138
internet.lua Normal file
View File

@ -0,0 +1,138 @@
local https = require 'ssl.https'
local internet = {}
--------------------------------------------------------------------------------
-- Util
local SCANDI_SYMBOLS = { 'æ', 'Æ', 'ø', 'Ø', 'å', 'Å' }
local function string_contains_scandi (str)
for _, symbol in ipairs(SCANDI_SYMBOLS) do
if topic:match(symbol) then return true end
end
return false
end
--------------------------------------------------------------------------------
local function search_clearbit_for_logo (topic)
if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then
return nil, 'Bad topic: '..tostring(topic)
elseif string_contains_scandi(topic) then
return nil, 'Clearbit does not like æøå: '..tostring(topic)
end
--
for _, domain in ipairs { 'org', 'com', 'net', 'dk' } do
local search_url = ('https://logo-core.clearbit.com/%s.%s'):format(topic, domain)
local _, code, headers, status = https.request { url = search_url, method = 'HEAD' }
if code == 200 then return search_url end
end
end
local function search_splashbase_for_image_topic (topic)
if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then
return nil, 'Bad topic: '..tostring(topic)
elseif string_contains_scandi(topic) then
return nil, 'Splashbase does not like æøå: '..tostring(topic)
end
local search_url = string.format('http://www.splashbase.co/api/v1/images/search?query=%s', topic:gsub('%s', '%%20'))
local body, code, headers, status = https.request(search_url)
if not body then error(code) end
local data = json.decode(body)
if not data then return nil, 'JSON could not decode data for '..topic end
if #data.images <= 0 then return nil, 'Query returned no data for '..topic end
local img_url = data.images[math.random(#data.images)].url
assert(type(img_url) == 'string')
return img_url
end
local WIKIPEDIA_API_URL = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=pageimages&format=json&piprop=original&redirects=1'
local function search_wikipedia_for_images (topics, language, topic_to_image_url)
if type(topics) == 'string' then topics = { topics } end
local language = language or 'en'
local topic_to_image_url = topic_to_image_url or {}
--
local titles_field = table.concat(topics, '|'):gsub('%s+', '%%20')
local body, code, headers, status = https.request(WIKIPEDIA_API_URL:format(language, titles_field))
if not body then error(code) end
local data = json.decode(body)
if not data then return {}, 'JSON could not decode data from wikipedia for '..titles_field end
-- Determine if some topic was redirected
local redirected_topics = {}
for _, redirect in pairs(data.query.normalized or {}) do
redirected_topics[ redirect.to ] = redirected_topics[ redirect.from ] or redirect.from
end
for _, redirect in pairs(data.query.redirects or {}) do
redirected_topics[ redirect.to ] = redirected_topics[ redirect.from ] or redirect.from
end
-- Determine topic to image
for _, page in pairs(data.query.pages) do
local orig_title = redirected_topics[ page.title ]
if not topic_to_image_url[orig_title] then
local found_url = false
if page.original then found_url = page.original.source end
topic_to_image_url[orig_title] = found_url
end
end
---
return topic_to_image_url
end
local function all_topics_has_image (topic_to_image_url)
for _, url in pairs(topic_to_image_url) do
if not url then return false end
end
return true
end
function internet.search_images (topics)
assert(type(topics) == 'table')
if #topics == 0 then return {} end
local topic_to_image_url = {}
-- Wikipedia
search_wikipedia_for_images(topics, 'da', topic_to_image_url)
if not all_topics_has_image(topic_to_image_url) then return topic_to_image_url end
search_wikipedia_for_images(topics, 'en', topic_to_image_url)
-- Logoes
for topic, val in pairs(topic_to_image_url) do
if not val then
topic_to_image_url[topic] = search_clearbit_for_logo(topic:lower())
end
end
-- Stock photoes
for topic, val in pairs(topic_to_image_url) do
if not val then
topic_to_image_url[topic] = search_splashbase_for_image_topic(topic:lower())
end
end
-- Ret
return topic_to_image_url
end
--------------------------------------------------------------------------------
function internet.download_file (url, filename)
-- retrieve the content of a URL
local body, code = https.request(url)
if not body then error(code) end
-- save the content to a file
local f = assert(io.open(filename, 'wb')) -- open in "binary" mode
f:write(body)
f:close()
end
--------------------------------------------------------------------------------
return internet

View File

@ -28,7 +28,6 @@ local FARVEL_INTERVAL = 90
local imlib = require 'imlib2' local imlib = require 'imlib2'
require 'socket' require 'socket'
local https = require 'ssl.https'
local json = require 'json' local json = require 'json'
@ -41,6 +40,8 @@ local signal do
if a then signal = b end if a then signal = b end
end end
local internet = require 'internet'
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- Meme utils -- Meme utils
@ -61,55 +62,11 @@ local function flatten_onto(target_img, other_img, x0, y0)
return return
end end
local function search_clearbit_for_logo (topic)
if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then
return nil, 'Bad topic: '..tostring(topic)
elseif topic:match 'æ' or topic:match 'ø' or topic:match 'å' then
return nil, 'Splashbase does not like æøå: '..tostring(topic)
end
--
for _, domain in ipairs { 'org', 'com', 'net', 'dk' } do
local search_url = ('https://logo-core.clearbit.com/%s.%s'):format(topic, domain)
local _, code, headers, status = https.request { url = search_url, method = 'HEAD' }
if code == 200 then return search_url end
end
end
local function search_splashbase_for_image_topic (topic)
if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then
return nil, 'Bad topic: '..tostring(topic)
elseif topic:match 'æ' or topic:match 'ø' or topic:match 'å' then
return nil, 'Splashbase does not like æøå: '..tostring(topic)
end
local search_url = string.format('http://www.splashbase.co/api/v1/images/search?query=%s', topic:gsub('%s', '%%20'))
local body, code, headers, status = https.request(search_url)
if not body then error(code) end
local data = json.decode(body)
if not data then return nil, 'JSON could not decode data for '..topic end
if #data.images <= 0 then return nil, 'Query returned no data for '..topic end
local img_url = data.images[math.random(#data.images)].url
assert(type(img_url) == 'string')
return img_url
end
local function download_file (url, filename)
-- retrieve the content of a URL
local body, code = https.request(url)
if not body then error(code) end
-- save the content to a file
local f = assert(io.open(filename, 'wb')) -- open in "binary" mode
f:write(body)
f:close()
end
local function clean_text (text) local function clean_text (text)
return text:gsub('%s+', ' '):match('^%s*(.-)%s*$') return text:gsub('%s+', ' '):match('^%s*(.-)%s*$')
end end
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-- Meme creations -- Meme creations
@ -160,37 +117,16 @@ local function load_random_font ()
return font_name return font_name
end end
local CHANCE_OF_GUARENTEED_IMAGE_SEARCH = 0.05
local IMAGE_CHANCE = 0.5
local SCANDI_SYMBOLS = { 'æ', 'Æ', 'ø', 'Ø', 'å', 'Å' }
local function should_look_for_images (topic)
assert(type(topic) == 'string')
if math.random() < CHANCE_OF_GUARENTEED_IMAGE_SEARCH then return true end
--
if #topic < 2 then return false end
--
for _, symbol in ipairs(SCANDI_SYMBOLS) do
if topic:match(symbol) then return false end
end
--
return math.random() < IMAGE_CHANCE
end
local function fill_in_topics_information (topics) local function fill_in_topics_information (topics)
assert(type(topics) == 'table') assert(type(topics) == 'table')
-- --
local topic_to_image_url = internet.search_images(topics)
--
local new_topics = {} local new_topics = {}
for i, topic in ipairs(topics) do for i, topic in ipairs(topics) do
assert(type(topic) == 'string') assert(type(topic) == 'string')
local topic_l = topic:lower()
local url, msg local url = topic_to_image_url[topic]
if should_look_for_images(topic_l) then
if not url then url, msg = search_clearbit_for_logo (topic_l) end
if not url then url, msg = search_splashbase_for_image_topic(topic_l) end
end
if url then new_topics[i] = { topic = topic, type = 'image', url = url } if url then new_topics[i] = { topic = topic, type = 'image', url = url }
else new_topics[i] = { topic = topic, type = 'text', text = topic } else new_topics[i] = { topic = topic, type = 'text', text = topic }
@ -206,7 +142,7 @@ local function paste_topic_onto_image (target, topic, x, y, w, h, bg_color, font
-- Download and paste found image -- Download and paste found image
if topic.type == 'image' then if topic.type == 'image' then
local url, filename = topic.url, CONFIG.IMGGEN_PATH_OUTPUT..'topic_'..topic.topic..'.png' local url, filename = topic.url, CONFIG.IMGGEN_PATH_OUTPUT..'topic_'..topic.topic..'.png'
download_file(url, filename) internet.download_file(url, filename)
local found_img = imlib.image.load(filename) local found_img = imlib.image.load(filename)
found_img:crop_and_scale(0, 0, found_img:get_width(), found_img:get_height(), w, h) found_img:crop_and_scale(0, 0, found_img:get_width(), found_img:get_height(), w, h)
flatten_onto (target, found_img, x, y) flatten_onto (target, found_img, x, y)