Added a check to prevent failing on poor html code.

This commit is contained in:
Alexander Munch-Hansen 2018-10-20 12:00:11 +02:00
parent 8ff786bdcb
commit f2ac03ed5b

View File

@ -89,6 +89,10 @@ def do_taeker_things():
def do_find_bolig_things(): def do_find_bolig_things():
def already_seen(already_seens, spec_currently_found) -> bool:
return spec_currently_found.id in already_seens
find_bolig_type = namedtuple("Apartment_find_bolig", "link id") find_bolig_type = namedtuple("Apartment_find_bolig", "link id")
seen_apartments_file_find_bolig = 'seen_apartments_find_bolig.json' seen_apartments_file_find_bolig = 'seen_apartments_find_bolig.json'
@ -98,6 +102,10 @@ def do_find_bolig_things():
r = requests.get("https://www.findbolig.nu/ledigeboliger/liste.aspx?where=Aarhus%208000&rentmax=7000&showrented=1&showyouth=1&showlimitedperiod=1&showunlimitedperiod=1&showOpenDay=0&focus=ctl00_placeholdersidebar_0_txt_where") r = requests.get("https://www.findbolig.nu/ledigeboliger/liste.aspx?where=Aarhus%208000&rentmax=7000&showrented=1&showyouth=1&showlimitedperiod=1&showunlimitedperiod=1&showOpenDay=0&focus=ctl00_placeholdersidebar_0_txt_where")
soup = BeautifulSoup(r.text, "html5lib") soup = BeautifulSoup(r.text, "html5lib")
table = soup.find(id="GridView_Results")
if table is not None:
table_body = soup.find(id="GridView_Results").find("tbody") table_body = soup.find(id="GridView_Results").find("tbody")
all_apartments = [] all_apartments = []
@ -114,8 +122,7 @@ def do_find_bolig_things():
tmp = find_bolig_type(link, id) tmp = find_bolig_type(link, id)
all_apartments.append(tmp) all_apartments.append(tmp)
def already_seen(already_seens, spec_currently_found) -> bool:
return spec_currently_found.id in already_seens
already_seen_locations = [tmp.id for tmp in previous_seen] already_seen_locations = [tmp.id for tmp in previous_seen]
for apartment in all_apartments: for apartment in all_apartments:
@ -136,12 +143,19 @@ def do_find_bolig_things():
def do_hestia_things(): def do_hestia_things():
def already_seen(already_seens, spec_currently_found) -> bool:
return spec_currently_found.location in already_seens
seen_apartments_file_hestia = 'seen_apartments_hestia.json' seen_apartments_file_hestia = 'seen_apartments_hestia.json'
previous_seen = do_old_file_things(seen_apartments_file_hestia, hestia_apartment_type) previous_seen = do_old_file_things(seen_apartments_file_hestia, hestia_apartment_type)
r = requests.get("https://www.hestia.as/ledige-lejligheder/?area=266&max=7200") r = requests.get("https://www.hestia.as/ledige-lejligheder/?area=266&max=7200")
soup = BeautifulSoup(r.text, "html5lib") soup = BeautifulSoup(r.text, "html5lib")
table = soup.find(id="sortTable")
if table is not None:
table_body = soup.find(id="sortTable").find("tbody") table_body = soup.find(id="sortTable").find("tbody")
all_apartments = [] all_apartments = []
@ -153,11 +167,6 @@ def do_hestia_things():
text = [col.get_text() for col in cols] text = [col.get_text() for col in cols]
all_apartments.append(hestia_apartment_type(link[:link.find("\'")], *text)) all_apartments.append(hestia_apartment_type(link[:link.find("\'")], *text))
def already_seen(already_seens, spec_currently_found) -> bool:
return spec_currently_found.location in already_seens
already_seen_locations = [tmp.location for tmp in previous_seen] already_seen_locations = [tmp.location for tmp in previous_seen]
for apartment in all_apartments: for apartment in all_apartments:
if not already_seen(already_seen_locations, apartment): if not already_seen(already_seen_locations, apartment):
@ -168,8 +177,6 @@ def do_hestia_things():
else: else:
print("I've already seen this") print("I've already seen this")
with open(seen_apartments_file_hestia, 'w', encoding="utf8") as outfile: with open(seen_apartments_file_hestia, 'w', encoding="utf8") as outfile:
json.dump(previous_seen, outfile) json.dump(previous_seen, outfile)