intprog_hackery/pdf-parser.rb
2019-10-23 16:07:47 +02:00

57 lines
1.4 KiB
Ruby

require 'pdf-reader'
require_relative 'Method_desc'
def parse(exam_text)
reader = PDF::Reader.new(exam_text)
text = reader.pages[0].text.split "\n"
content = text.slice(3, 15).map {|x| x.split " "}.map {|x| x.filter {|y| y.length > 1 and y.strip != "*"} }.filter {|x| x.length > 0}
prim_class = content[0][0]
if content[0].length == 1
container = content[1][0].strip
else
container = content[0][1].strip
end
# Fuck mig
container_params = content[2].length > 1 ? [content[2][0].strip] : [content[3][0].strip]
container_methods = content.slice(2, content.length-1).filter { |x|
if x[0].include? "("
x[0]
end
}.map {|x| x[0].strip}
class_fields = content.slice(3, content.length-1).filter { |x|
(x.length > 1) and (not x[1].include? "(")
}.map {|x| x[1].strip}
if content[4].length == 1
tmp = content[4][0].strip
class_fields = [tmp] + class_fields
end
#print(content.slice(0, content.length-1))
regex = /\A(\S+) (\w+)\((.*)\)\Z/
container_methods.map! do |x|
matches = x.match(regex)
Method_desc.new(*matches.captures)
end
puts "prim #{prim_class}"
puts "container #{container}"
puts "class_f #{class_fields}"
puts "container_p #{container_params}"
puts "container_m #{container_methods}"
return prim_class, container, class_fields, container_params, container_methods
end
parse("flower.pdf")