imdb_scraper/parser.rb

62 lines
1.6 KiB
Ruby
Raw Normal View History

2018-08-06 17:34:01 +00:00
require 'rest-client'
require 'json'
require 'optparse'
require 'nokogiri'
movie_options = {}
OptionParser.new do |opt|
opt.on('--movie MOVIE') { |o| movie_options[:movie] = o }
opt.on('--year YEAR') { |o| movie_options[:year] = o }
end.parse!
movie = movie_options[:movie]
puts "---------------------------------------"
puts "searching for movie beginning with \'#{movie.gsub('_', ' ')}\'\n\n"
callback_string = "imdb$#{movie}("
uri = "https://v2.sg.media-imdb.com/suggests/#{movie[0]}/#{movie}.json"
res = RestClient.get(uri)
res.gsub!(callback_string, '')
res.gsub!('})', '}')
res_obj = JSON.parse(res)
movie_id = nil
picked_movie = nil
if movie_options[:year] then
res_obj['d'].each do |spec_movie|
if spec_movie['y'].to_i == movie_options[:year].to_i then
picked_movie = spec_movie
break
end
end
else
puts "No specific year was given via \'--year\' argument, just picking the first movie which matches\n\n"
picked_movie = res_obj['d'][0]
end
if picked_movie then
puts "found \'#{picked_movie['l']}\' from year #{picked_movie['y']}"
puts "the film stars #{picked_movie['s']}"
puts "The poster for the movie can be found at:\n#{picked_movie['i'][0]}"
movie_id = picked_movie['id']
movie_page = Nokogiri::HTML(RestClient.get("https://www.imdb.com/title/#{movie_id}/?ref_=nv_sr_1"))
rating = movie_page.css('span[itemprop=ratingValue]').text
puts "The rating of the movie is #{rating} out of 10"
puts "\nThe summary of the movie is:\n#{movie_page.css('.summary_text').text.strip}"
else
puts "Couldn't find a matching movie"
end
puts "---------------------------------------"