intprog_hackery/pdf-parser.rb

57 lines
1.4 KiB
Ruby
Raw Normal View History

2019-10-21 18:16:37 +00:00
require 'pdf-reader'
require_relative 'Method_desc'
def parse(exam_text)
reader = PDF::Reader.new(exam_text)
text = reader.pages[0].text.split "\n"
content = text.slice(3, 15).map {|x| x.split " "}.map {|x| x.filter {|y| y.length > 1 and y.strip != "*"} }.filter {|x| x.length > 0}
2019-10-23 14:07:47 +00:00
2019-10-21 18:16:37 +00:00
prim_class = content[0][0]
if content[0].length == 1
2019-10-23 14:07:47 +00:00
container = content[1][0].strip
2019-10-21 18:16:37 +00:00
else
2019-10-23 14:07:47 +00:00
container = content[0][1].strip
2019-10-21 18:16:37 +00:00
end
2019-10-22 09:47:33 +00:00
2019-10-23 14:07:47 +00:00
# Fuck mig
container_params = content[2].length > 1 ? [content[2][0].strip] : [content[3][0].strip]
2019-10-21 18:16:37 +00:00
container_methods = content.slice(2, content.length-1).filter { |x|
if x[0].include? "("
x[0]
end
}.map {|x| x[0].strip}
class_fields = content.slice(3, content.length-1).filter { |x|
(x.length > 1) and (not x[1].include? "(")
}.map {|x| x[1].strip}
if content[4].length == 1
tmp = content[4][0].strip
2019-10-22 09:47:33 +00:00
class_fields = [tmp] + class_fields
2019-10-21 18:16:37 +00:00
end
#print(content.slice(0, content.length-1))
regex = /\A(\S+) (\w+)\((.*)\)\Z/
container_methods.map! do |x|
matches = x.match(regex)
Method_desc.new(*matches.captures)
end
2019-10-23 14:07:47 +00:00
puts "prim #{prim_class}"
puts "container #{container}"
puts "class_f #{class_fields}"
puts "container_p #{container_params}"
puts "container_m #{container_methods}"
2019-10-21 18:16:37 +00:00
return prim_class, container, class_fields, container_params, container_methods
end
2019-10-23 14:07:47 +00:00
parse("flower.pdf")