local http = require 'socket.http' local https = require 'ssl.https' local md5 = require 'md5' local json = require 'json' local internet = {} -------------------------------------------------------------------------------- -- Util local function assert_equal (a, b) if a ~= b then error(('Assertion failed!\n\tThis : %s\n\tShould be : %s'):format(a, b)) end return true end local SCANDI_SYMBOLS = { 'æ', 'Æ', 'ø', 'Ø', 'å', 'Å' } local function string_contains_scandi (str) assert(type(str) == 'string') for _, symbol in ipairs(SCANDI_SYMBOLS) do if str:match(symbol) then return true end end return false end local function escape_url (url, non_standard) local non_standard = non_standard or {} return url:gsub(' ', non_standard[' '] or '%%20'):gsub(',', non_standard[','] or '%%2C') end local function escape_pattern (text) return text:gsub('[+-?*]', '%%%1') end local function safe_access (base, path) for i = 1, #path do local item = base[path[i]] if not item then return nil, path[i] end base = item end return base end local function generic_request (...) --print('Request', ...) local output, code, headers, status = https.request(...) --print('Https', output, code, headers, status) if code ~= nil and status ~= 'connection refused' then return output, code, headers, status end local output, code, headers, status = http.request(...) --print('Http', output, code, headers, status) return output, code, headers, status end local function report_https_request_error (status, code) local f = io.stdout f:write 'Error when attempting request:\n' f:write (' Status: '..tostring(status)..'\n') f:write (' Code: '..tostring(code)..'\n') --f:write (' Headers:\n ') end -------------------------------------------------------------------------------- -- Searching Clearbit for logoes -- Contains logoes local function search_clearbit_for_logo (topic) if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then return nil, 'Bad topic: '..tostring(topic) elseif string_contains_scandi(topic) then return nil, 'Clearbit does not like æøå: '..tostring(topic) end -- for _, domain in ipairs { 'org', 'com', 'net', 'dk' } do local search_url = ('https://logo-core.clearbit.com/%s.%s'):format(topic, domain) local _, code, headers, status = https.request { url = search_url, method = 'HEAD' } if code == 200 then return search_url end end end -------------------------------------------------------------------------------- -- Searching Shutterstock for stockphotoes local htmlparser = require 'htmlparser' local function search_shutterstock_for_stock_photoes (topic) if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then return nil, 'Bad topic: '..tostring(topic) elseif string_contains_scandi(topic) then return nil, 'Splashbase does not like æøå: '..tostring(topic) end local search_url = 'https://www.shutterstock.com/search/'..escape_url(topic) local body, code, headers, status = https.request(search_url) if not body then error(code) end local html = htmlparser.parse(body, 10000) if not html then return nil, 'HTML could not decode data for '..topic end local img_elems = html:select 'img.z_g_i' local img_url = img_elems[math.random(#img_elems)].attributes.src assert(type(img_url) == 'string') return img_url end -------------------------------------------------------------------------------- -- Searching splashbase for fairly-licensed stockphotoes local function search_splashbase_for_stock_photoes (topic) if not (type(topic) == 'string' and topic == topic:lower() and #topic > 0) then return nil, 'Bad topic: '..tostring(topic) elseif string_contains_scandi(topic) then return nil, 'Splashbase does not like æøå: '..tostring(topic) end local search_url = escape_url('http://www.splashbase.co/api/v1/images/search?query='..topic) local body, code, headers, status = https.request(search_url) if not body then error(code) end local data = json.decode(body) if not data then return nil, 'JSON could not decode data for '..topic end if #data.images <= 0 then return nil, 'Query returned no data for '..topic end local img_url = data.images[math.random(#data.images)].url assert(type(img_url) == 'string') return img_url end -------------------------------------------------------------------------------- -- Search wikipedia for images on pages local WIKIPEDIA_API_URL = 'https://%s.wikipedia.org/w/api.php?action=query&titles=%s&prop=pageimages&format=json&piprop=original&redirects=1&prop=categories&prop=links' --[[ local WIKIPEDIA_DISAMBIGUATION_CATEGORIES_FOR_LANG = { da = 'Kategori:Flertydig', en = 'Category:All disambiguation pages' } --]] local WIKIPEDIA_CONTENT_NAMESPACE = 0 local function get_disambiguation_links (page) assert(type(page) == 'table') -- local pagename = escape_pattern(page.title:lower()) -- local links = {} for _, link_info in pairs(page.links or {}) do if link_info.title and link_info.title:lower():match(pagename) and link_info.ns == WIKIPEDIA_CONTENT_NAMESPACE then links[#links+1] = link_info.title end end -- return links end local function get_wikipedia_pages (topics, language) assert(type(topics) == 'table') assert(type(language) == 'string') -- local titles_field = escape_url(table.concat(topics, '|')) local body, code, headers, status = https.request(WIKIPEDIA_API_URL:format(language, titles_field)) if not body then report_https_request_error(status, code) return {} end local data = json.decode(body) if not data then return nil, 'JSON could not decode data from wikipedia for '..titles_field end --if data.success ~= 1 then return nil, 'Query was incorrect in some way' end -- Determine if some topic was redirected or normalized local pages = {} for _, page in pairs(data.query.pages) do pages[page.title] = page end for _, redirect in pairs(data.query.normalized or {}) do pages[ redirect.from ] = pages[ redirect.to ] end for _, redirect in pairs(data.query.redirects or {}) do pages[ redirect.from ] = pages[ redirect.to ] end -- return pages end -------------------------------------------------------------------------------- -- Search Wikidata for infobox images local WIKIMEDIA_IMAGE_PATH = 'https://upload.wikimedia.org/wikipedia/commons/%s/%s/%s' local WIKIDATA_API_URL = 'https://www.wikidata.org/w/api.php?action=wbgetentities&sites=%s&titles=%s&languages=en&props=claims&format=json&redirects=yes&normalize=yes' local function is_disambiguation_entity (entity) local is_a_list = safe_access(entity, {'claims', 'P31'}) if not is_a_list then return false end for _, is_a_attr in ipairs(is_a_list) do if safe_access(is_a_attr, {'mainsnak', 'datavalue', 'value', 'id'}) == 'Q4167410' then return true end end return false end local IMAGE_CLAIM_PRIORITY = { 'P41', 'P154', 'P18' } local function select_image_filename_from_entity (entity) if not entity.claims then return nil end for _, image_property_name in ipairs(IMAGE_CLAIM_PRIORITY) do local claim = entity.claims[image_property_name] for _, subclaim in pairs(claim or {}) do local filename = safe_access(subclaim, { 'mainsnak', 'datavalue', 'value' }) if filename then return filename end end end end local function search_wikidata_for_image (topic, language) -- Assert and correction assert(type(topic) == 'string' or topic == nil) assert(type(language) == 'string' or language == nil) local language = language or 'en' local site = language..'wiki' local topic_to_image_url = topic_to_image_url or {} -- Download and parse local body, code, headers, status = https.request(WIKIDATA_API_URL:format(site, escape_url(topic))) if not body then report_https_request_error(status, code) return nil end local data = json.decode(body) if not data then return nil, 'JSON could not decode data from wikipedia for '..titles_field end if data.success ~= 1 then return nil, 'Query was incorrect in some way' end -- Find entity local entity_key = next(data.entities) if not entity_key then return end assert(next(data.entities, entity_key) == nil) local entity = data.entities[entity_key] -- Determine if hit disambiguation entity if is_disambiguation_entity (entity) then local wikipedia_page = get_wikipedia_pages({topic}, language) local links = get_disambiguation_links(wikipedia_page[topic]) if #links <= 0 then return nil, 'Ramte flertydig '..language..' wikipedia side for "'..topic..'", men kunne ikke finde nogle links!' end assert(#links > 0) return search_wikidata_for_image(links[math.random(#links)], language) end -- Find image, if any local filename = select_image_filename_from_entity(entity) if not filename then return end filename = filename:gsub(' ', '_') local hex = md5.sumhexa(filename) local url = WIKIMEDIA_IMAGE_PATH:format(hex:sub(1,1), hex:sub(1,2), filename) return escape_url(url) end assert_equal( search_wikidata_for_image('Java', 'en') , 'https://upload.wikimedia.org/wikipedia/commons/0/0b/Gunung_Merapi_2006-05-14%2C_MODIS.jpg' ) assert_equal( search_wikidata_for_image('poop emoji', 'en') , 'https://upload.wikimedia.org/wikipedia/commons/6/6a/Emoji_u1f4a9.svg' ) -------------------------------------------------------------------------------- -- General search for images function internet.search_images (topics) assert(type(topics) == 'table') if #topics == 0 then return {} end -- Init local topic_to_image_url = {} for _, topic in ipairs(topics) do local val -- Wikidata if not val then val = search_wikidata_for_image(topic, 'da') end if not val then val = search_wikidata_for_image(topic, 'en') end -- Logoes if not val then val = search_clearbit_for_logo(topic:lower()) end -- Stock Photoes if not val then val = search_shutterstock_for_stock_photoes(topic) end if not val then val = search_splashbase_for_stock_photoes(topic:lower()) end topic_to_image_url[topic] = val end -- Ret return topic_to_image_url end -------------------------------------------------------------------------------- -- Find images on reddit function internet.find_reddit_memes (subreddit, filter) -- Error check assert(type(subreddit) == 'string') filter = filter or function() return true end assert(type(filter) == 'function') -- local search_url = escape_url('https://www.reddit.com/r/'..subreddit..'/new.json') local body, code, headers, status = https.request(search_url) if not body then report_https_request_error(status, code) return {} end local data = json.decode(body) local memes = {} for _, meme_data in pairs(data.data.children) do meme_data = meme_data.data local success = filter(meme_data) if success then memes[#memes+1] = meme_data end end return memes end -------------------------------------------------------------------------------- -- Download file function internet.download_file (url, filename) -- retrieve the content of a URL, and store in filename assert(type(url) == 'string') assert(type(filename) == 'string') if url:match '^file://' then local path = url:match '^file://(.+)$' os.execute('cp "'..path..'" "'..filename..'"') return true end --local body, code, headers, status = generic_request(url) local body, code, headers, status = https.request(url) if code ~= 200 then return false, code --error(('Connection to "%s" failed, with error "%s"'):format(url, status)) end assert(type(body) == 'string') -- save the content to a file local f = assert(io.open(filename, 'wb')) -- open in "binary" mode f:write(body) f:close() return true end function internet.download_video (url) assert(type(url) == 'string') local video_filename = os.tmpname() local status = os.execute(('youtube-dl "%s" -o "%s"'):format(url, video_filename)) assert(status == 0) return video_filename..'.mkv' end function internet.download_headers (url) assert(type(url) == 'string') -- local _, code, headers, status = generic_request { url = url, method = 'HEAD' } -- return headers end -------------------------------------------------------------------------------- return internet