139 lines
5.0 KiB
Lua
139 lines
5.0 KiB
Lua
|
|
local https = require 'ssl.https'
|
|
|
|
local internet = {}
|
|
|
|
--------------------------------------------------------------------------------
|
|
-- Util
|
|
|
|
local SCANDI_SYMBOLS = { 'æ', 'Æ', 'ø', 'Ø', 'å', 'Å' }
|
|
|
|
local function string_contains_scandi (str)
|
|
for _, symbol in ipairs(SCANDI_SYMBOLS) do
|
|
if topic:match(symbol) then return true end
|
|
end
|
|
return false
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
local function search_clearbit_for_logo (topic)
|
|
if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then
|
|
return nil, 'Bad topic: '..tostring(topic)
|
|
elseif string_contains_scandi(topic) then
|
|
return nil, 'Clearbit does not like æøå: '..tostring(topic)
|
|
end
|
|
--
|
|
for _, domain in ipairs { 'org', 'com', 'net', 'dk' } do
|
|
local search_url = ('https://logo-core.clearbit.com/%s.%s'):format(topic, domain)
|
|
local _, code, headers, status = https.request { url = search_url, method = 'HEAD' }
|
|
if code == 200 then return search_url end
|
|
end
|
|
end
|
|
|
|
local function search_splashbase_for_image_topic (topic)
|
|
if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then
|
|
return nil, 'Bad topic: '..tostring(topic)
|
|
elseif string_contains_scandi(topic) then
|
|
return nil, 'Splashbase does not like æøå: '..tostring(topic)
|
|
end
|
|
|
|
local search_url = string.format('http://www.splashbase.co/api/v1/images/search?query=%s', topic:gsub('%s', '%%20'))
|
|
local body, code, headers, status = https.request(search_url)
|
|
if not body then error(code) end
|
|
local data = json.decode(body)
|
|
|
|
if not data then return nil, 'JSON could not decode data for '..topic end
|
|
if #data.images <= 0 then return nil, 'Query returned no data for '..topic end
|
|
|
|
local img_url = data.images[math.random(#data.images)].url
|
|
assert(type(img_url) == 'string')
|
|
return img_url
|
|
end
|
|
|
|
local WIKIPEDIA_API_URL = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=pageimages&format=json&piprop=original&redirects=1'
|
|
|
|
local function search_wikipedia_for_images (topics, language, topic_to_image_url)
|
|
if type(topics) == 'string' then topics = { topics } end
|
|
local language = language or 'en'
|
|
local topic_to_image_url = topic_to_image_url or {}
|
|
--
|
|
local titles_field = table.concat(topics, '|'):gsub('%s+', '%%20')
|
|
|
|
local body, code, headers, status = https.request(WIKIPEDIA_API_URL:format(language, titles_field))
|
|
if not body then error(code) end
|
|
local data = json.decode(body)
|
|
|
|
if not data then return {}, 'JSON could not decode data from wikipedia for '..titles_field end
|
|
|
|
-- Determine if some topic was redirected
|
|
local redirected_topics = {}
|
|
for _, redirect in pairs(data.query.normalized or {}) do
|
|
redirected_topics[ redirect.to ] = redirected_topics[ redirect.from ] or redirect.from
|
|
end
|
|
for _, redirect in pairs(data.query.redirects or {}) do
|
|
redirected_topics[ redirect.to ] = redirected_topics[ redirect.from ] or redirect.from
|
|
end
|
|
|
|
-- Determine topic to image
|
|
for _, page in pairs(data.query.pages) do
|
|
local orig_title = redirected_topics[ page.title ]
|
|
if not topic_to_image_url[orig_title] then
|
|
local found_url = false
|
|
if page.original then found_url = page.original.source end
|
|
topic_to_image_url[orig_title] = found_url
|
|
end
|
|
end
|
|
---
|
|
return topic_to_image_url
|
|
end
|
|
|
|
local function all_topics_has_image (topic_to_image_url)
|
|
for _, url in pairs(topic_to_image_url) do
|
|
if not url then return false end
|
|
end
|
|
return true
|
|
end
|
|
|
|
function internet.search_images (topics)
|
|
assert(type(topics) == 'table')
|
|
if #topics == 0 then return {} end
|
|
local topic_to_image_url = {}
|
|
-- Wikipedia
|
|
search_wikipedia_for_images(topics, 'da', topic_to_image_url)
|
|
if not all_topics_has_image(topic_to_image_url) then return topic_to_image_url end
|
|
search_wikipedia_for_images(topics, 'en', topic_to_image_url)
|
|
-- Logoes
|
|
for topic, val in pairs(topic_to_image_url) do
|
|
if not val then
|
|
topic_to_image_url[topic] = search_clearbit_for_logo(topic:lower())
|
|
end
|
|
end
|
|
-- Stock photoes
|
|
for topic, val in pairs(topic_to_image_url) do
|
|
if not val then
|
|
topic_to_image_url[topic] = search_splashbase_for_image_topic(topic:lower())
|
|
end
|
|
end
|
|
-- Ret
|
|
return topic_to_image_url
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
function internet.download_file (url, filename)
|
|
-- retrieve the content of a URL
|
|
local body, code = https.request(url)
|
|
if not body then error(code) end
|
|
|
|
-- save the content to a file
|
|
local f = assert(io.open(filename, 'wb')) -- open in "binary" mode
|
|
f:write(body)
|
|
f:close()
|
|
end
|
|
|
|
--------------------------------------------------------------------------------
|
|
|
|
return internet
|
|
|