Added a check to prevent failing on poor html code.
This commit is contained in:
parent
8ff786bdcb
commit
f2ac03ed5b
|
@ -89,6 +89,10 @@ def do_taeker_things():
|
||||||
|
|
||||||
|
|
||||||
def do_find_bolig_things():
|
def do_find_bolig_things():
|
||||||
|
|
||||||
|
def already_seen(already_seens, spec_currently_found) -> bool:
|
||||||
|
return spec_currently_found.id in already_seens
|
||||||
|
|
||||||
find_bolig_type = namedtuple("Apartment_find_bolig", "link id")
|
find_bolig_type = namedtuple("Apartment_find_bolig", "link id")
|
||||||
|
|
||||||
seen_apartments_file_find_bolig = 'seen_apartments_find_bolig.json'
|
seen_apartments_file_find_bolig = 'seen_apartments_find_bolig.json'
|
||||||
|
@ -98,80 +102,83 @@ def do_find_bolig_things():
|
||||||
r = requests.get("https://www.findbolig.nu/ledigeboliger/liste.aspx?where=Aarhus%208000&rentmax=7000&showrented=1&showyouth=1&showlimitedperiod=1&showunlimitedperiod=1&showOpenDay=0&focus=ctl00_placeholdersidebar_0_txt_where")
|
r = requests.get("https://www.findbolig.nu/ledigeboliger/liste.aspx?where=Aarhus%208000&rentmax=7000&showrented=1&showyouth=1&showlimitedperiod=1&showunlimitedperiod=1&showOpenDay=0&focus=ctl00_placeholdersidebar_0_txt_where")
|
||||||
|
|
||||||
soup = BeautifulSoup(r.text, "html5lib")
|
soup = BeautifulSoup(r.text, "html5lib")
|
||||||
table_body = soup.find(id="GridView_Results").find("tbody")
|
|
||||||
|
|
||||||
all_apartments = []
|
table = soup.find(id="GridView_Results")
|
||||||
rows = table_body.find_all('tr')
|
|
||||||
|
|
||||||
concatable_string = "https://www.findbolig.nu/Findbolig-nu/Find%20bolig/Ledige%20boliger/Boligpraesentation"
|
if table is not None:
|
||||||
for row in rows[1:]:
|
table_body = soup.find(id="GridView_Results").find("tbody")
|
||||||
cols = row.find_all('td')
|
|
||||||
aid = re.search('(aid.+)', cols[0].find('a')['href']).group(0)
|
|
||||||
# Hacky :(
|
|
||||||
id = aid.split("=")[1].split("&")[0]
|
|
||||||
link = concatable_string + "/Boligen.aspx?" + aid
|
|
||||||
|
|
||||||
tmp = find_bolig_type(link, id)
|
all_apartments = []
|
||||||
all_apartments.append(tmp)
|
rows = table_body.find_all('tr')
|
||||||
|
|
||||||
def already_seen(already_seens, spec_currently_found) -> bool:
|
concatable_string = "https://www.findbolig.nu/Findbolig-nu/Find%20bolig/Ledige%20boliger/Boligpraesentation"
|
||||||
return spec_currently_found.id in already_seens
|
for row in rows[1:]:
|
||||||
|
cols = row.find_all('td')
|
||||||
|
aid = re.search('(aid.+)', cols[0].find('a')['href']).group(0)
|
||||||
|
# Hacky :(
|
||||||
|
id = aid.split("=")[1].split("&")[0]
|
||||||
|
link = concatable_string + "/Boligen.aspx?" + aid
|
||||||
|
|
||||||
already_seen_locations = [tmp.id for tmp in previous_seen]
|
tmp = find_bolig_type(link, id)
|
||||||
for apartment in all_apartments:
|
all_apartments.append(tmp)
|
||||||
if not already_seen(already_seen_locations, apartment):
|
|
||||||
print("I've found a new apartment!")
|
|
||||||
if not args.populate_lists:
|
|
||||||
mail_handler.handle(apartment.link)
|
|
||||||
previous_seen.append(apartment)
|
|
||||||
else:
|
|
||||||
print("I've already seen this")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
with open(seen_apartments_file_find_bolig, 'w', encoding="utf8") as outfile:
|
already_seen_locations = [tmp.id for tmp in previous_seen]
|
||||||
json.dump(previous_seen, outfile)
|
for apartment in all_apartments:
|
||||||
|
if not already_seen(already_seen_locations, apartment):
|
||||||
|
print("I've found a new apartment!")
|
||||||
|
if not args.populate_lists:
|
||||||
|
mail_handler.handle(apartment.link)
|
||||||
|
previous_seen.append(apartment)
|
||||||
|
else:
|
||||||
|
print("I've already seen this")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
with open(seen_apartments_file_find_bolig, 'w', encoding="utf8") as outfile:
|
||||||
|
json.dump(previous_seen, outfile)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def do_hestia_things():
|
def do_hestia_things():
|
||||||
|
|
||||||
|
def already_seen(already_seens, spec_currently_found) -> bool:
|
||||||
|
return spec_currently_found.location in already_seens
|
||||||
|
|
||||||
seen_apartments_file_hestia = 'seen_apartments_hestia.json'
|
seen_apartments_file_hestia = 'seen_apartments_hestia.json'
|
||||||
|
|
||||||
previous_seen = do_old_file_things(seen_apartments_file_hestia, hestia_apartment_type)
|
previous_seen = do_old_file_things(seen_apartments_file_hestia, hestia_apartment_type)
|
||||||
|
|
||||||
r = requests.get("https://www.hestia.as/ledige-lejligheder/?area=266&max=7200")
|
r = requests.get("https://www.hestia.as/ledige-lejligheder/?area=266&max=7200")
|
||||||
soup = BeautifulSoup(r.text, "html5lib")
|
soup = BeautifulSoup(r.text, "html5lib")
|
||||||
table_body = soup.find(id="sortTable").find("tbody")
|
|
||||||
|
|
||||||
all_apartments = []
|
table = soup.find(id="sortTable")
|
||||||
|
|
||||||
rows = table_body.find_all('tr')
|
if table is not None:
|
||||||
for row in rows:
|
table_body = soup.find(id="sortTable").find("tbody")
|
||||||
link = re.search("(https.+|http.+)", row.get("onclick")).group(0)
|
|
||||||
cols = row.find_all('td')
|
|
||||||
text = [col.get_text() for col in cols]
|
|
||||||
all_apartments.append(hestia_apartment_type(link[:link.find("\'")], *text))
|
|
||||||
|
|
||||||
|
all_apartments = []
|
||||||
|
|
||||||
def already_seen(already_seens, spec_currently_found) -> bool:
|
rows = table_body.find_all('tr')
|
||||||
return spec_currently_found.location in already_seens
|
for row in rows:
|
||||||
|
link = re.search("(https.+|http.+)", row.get("onclick")).group(0)
|
||||||
|
cols = row.find_all('td')
|
||||||
|
text = [col.get_text() for col in cols]
|
||||||
|
all_apartments.append(hestia_apartment_type(link[:link.find("\'")], *text))
|
||||||
|
|
||||||
|
already_seen_locations = [tmp.location for tmp in previous_seen]
|
||||||
|
for apartment in all_apartments:
|
||||||
|
if not already_seen(already_seen_locations, apartment):
|
||||||
|
print("I've found a new apartment!")
|
||||||
|
if not args.populate_lists:
|
||||||
|
mail_handler.handle(apartment.link)
|
||||||
|
previous_seen.append(apartment)
|
||||||
|
else:
|
||||||
|
print("I've already seen this")
|
||||||
|
|
||||||
already_seen_locations = [tmp.location for tmp in previous_seen]
|
with open(seen_apartments_file_hestia, 'w', encoding="utf8") as outfile:
|
||||||
for apartment in all_apartments:
|
json.dump(previous_seen, outfile)
|
||||||
if not already_seen(already_seen_locations, apartment):
|
|
||||||
print("I've found a new apartment!")
|
|
||||||
if not args.populate_lists:
|
|
||||||
mail_handler.handle(apartment.link)
|
|
||||||
previous_seen.append(apartment)
|
|
||||||
else:
|
|
||||||
print("I've already seen this")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
with open(seen_apartments_file_hestia, 'w', encoding="utf8") as outfile:
|
|
||||||
json.dump(previous_seen, outfile)
|
|
||||||
|
|
||||||
|
|
||||||
do_find_bolig_things()
|
do_find_bolig_things()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user