memebot/internet.lua

139 lines
5.0 KiB
Lua

local https = require 'ssl.https'
local internet = {}
--------------------------------------------------------------------------------
-- Util
local SCANDI_SYMBOLS = { 'æ', 'Æ', 'ø', 'Ø', 'å', 'Å' }
local function string_contains_scandi (str)
for _, symbol in ipairs(SCANDI_SYMBOLS) do
if topic:match(symbol) then return true end
end
return false
end
--------------------------------------------------------------------------------
local function search_clearbit_for_logo (topic)
if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then
return nil, 'Bad topic: '..tostring(topic)
elseif string_contains_scandi(topic) then
return nil, 'Clearbit does not like æøå: '..tostring(topic)
end
--
for _, domain in ipairs { 'org', 'com', 'net', 'dk' } do
local search_url = ('https://logo-core.clearbit.com/%s.%s'):format(topic, domain)
local _, code, headers, status = https.request { url = search_url, method = 'HEAD' }
if code == 200 then return search_url end
end
end
local function search_splashbase_for_image_topic (topic)
if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then
return nil, 'Bad topic: '..tostring(topic)
elseif string_contains_scandi(topic) then
return nil, 'Splashbase does not like æøå: '..tostring(topic)
end
local search_url = string.format('http://www.splashbase.co/api/v1/images/search?query=%s', topic:gsub('%s', '%%20'))
local body, code, headers, status = https.request(search_url)
if not body then error(code) end
local data = json.decode(body)
if not data then return nil, 'JSON could not decode data for '..topic end
if #data.images <= 0 then return nil, 'Query returned no data for '..topic end
local img_url = data.images[math.random(#data.images)].url
assert(type(img_url) == 'string')
return img_url
end
local WIKIPEDIA_API_URL = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=pageimages&format=json&piprop=original&redirects=1'
local function search_wikipedia_for_images (topics, language, topic_to_image_url)
if type(topics) == 'string' then topics = { topics } end
local language = language or 'en'
local topic_to_image_url = topic_to_image_url or {}
--
local titles_field = table.concat(topics, '|'):gsub('%s+', '%%20')
local body, code, headers, status = https.request(WIKIPEDIA_API_URL:format(language, titles_field))
if not body then error(code) end
local data = json.decode(body)
if not data then return {}, 'JSON could not decode data from wikipedia for '..titles_field end
-- Determine if some topic was redirected
local redirected_topics = {}
for _, redirect in pairs(data.query.normalized or {}) do
redirected_topics[ redirect.to ] = redirected_topics[ redirect.from ] or redirect.from
end
for _, redirect in pairs(data.query.redirects or {}) do
redirected_topics[ redirect.to ] = redirected_topics[ redirect.from ] or redirect.from
end
-- Determine topic to image
for _, page in pairs(data.query.pages) do
local orig_title = redirected_topics[ page.title ] or page.title
if not topic_to_image_url[orig_title] then
local found_url = false
if page.original then found_url = page.original.source end
topic_to_image_url[orig_title] = found_url
end
end
---
return topic_to_image_url
end
local function all_topics_has_image (topic_to_image_url)
for _, url in pairs(topic_to_image_url) do
if not url then return false end
end
return true
end
function internet.search_images (topics)
assert(type(topics) == 'table')
if #topics == 0 then return {} end
local topic_to_image_url = {}
-- Wikipedia
search_wikipedia_for_images(topics, 'da', topic_to_image_url)
if not all_topics_has_image(topic_to_image_url) then return topic_to_image_url end
search_wikipedia_for_images(topics, 'en', topic_to_image_url)
-- Logoes
for topic, val in pairs(topic_to_image_url) do
if not val then
topic_to_image_url[topic] = search_clearbit_for_logo(topic:lower())
end
end
-- Stock photoes
for topic, val in pairs(topic_to_image_url) do
if not val then
topic_to_image_url[topic] = search_splashbase_for_image_topic(topic:lower())
end
end
-- Ret
return topic_to_image_url
end
--------------------------------------------------------------------------------
function internet.download_file (url, filename)
-- retrieve the content of a URL
local body, code = https.request(url)
if not body then error(code) end
-- save the content to a file
local f = assert(io.open(filename, 'wb')) -- open in "binary" mode
f:write(body)
f:close()
end
--------------------------------------------------------------------------------
return internet