From 0bae9f45874fe5f97c51d40ab61c2e2c258bb0e9 Mon Sep 17 00:00:00 2001 From: Alexander Munch-Hansen Date: Tue, 9 Oct 2018 21:57:47 +0200 Subject: [PATCH] Initial commit --- apartment_looker.py | 142 ++++++++++++++++++++++++++++++++++++++++++++ mail_handler.py | 28 +++++++++ 2 files changed, 170 insertions(+) create mode 100644 apartment_looker.py create mode 100644 mail_handler.py diff --git a/apartment_looker.py b/apartment_looker.py new file mode 100644 index 0000000..13c95d5 --- /dev/null +++ b/apartment_looker.py @@ -0,0 +1,142 @@ +import requests +from bs4 import BeautifulSoup +from collections import namedtuple +import re +import json +import os.path +import mail_handler +import argparse + +parser = argparse.ArgumentParser(description="Scraper for apartments") +parser.add_argument('--populate_lists', action='store_true', + help='populate all json files without sending emails') + +args = parser.parse_args() + +apartment_type = namedtuple("Apartment", "link location date size shareable rooms price deposit prerent") + + + + +def do_find_bolig_things(): + find_bolig_type = namedtuple("Apartment_find_bolig", "link id") + + seen_apartments_file_find_bolig = 'seen_apartments_find_bolig.json' + + # If find_bolig file doesn't exist, create + if not os.path.isfile(seen_apartments_file_find_bolig): + open(seen_apartments_file_find_bolig, "w+") + + with open(seen_apartments_file_find_bolig, encoding="utf8") as json_file: + text = json_file.read() + if len(text) == 0: + previously_seen_apartments = [] + else: + json_data = json.loads(text) + previously_seen_apartments = json_data + + + previous_seen = [] + + for apartment_ in previously_seen_apartments: + previous_seen.append(find_bolig_type(*apartment_)) + + r = requests.get("https://www.findbolig.nu/ledigeboliger/liste.aspx?where=Aarhus%208000&rentmax=7000&showrented=1&showyouth=1&showlimitedperiod=1&showunlimitedperiod=1&showOpenDay=0&focus=ctl00_placeholdersidebar_0_txt_where") + + soup = BeautifulSoup(r.text, "html5lib") + table_body = soup.find(id="GridView_Results").find("tbody") + + all_apartments = [] + rows = table_body.find_all('tr') + + concatable_string = "https://www.findbolig.nu/Findbolig-nu/Find%20bolig/Ledige%20boliger/Boligpraesentation" + for row in rows[1:]: + cols = row.find_all('td') + aid = re.search('(aid.+)', cols[0].find('a')['href']).group(0) + # Hacky :( + id = aid.split("=")[1].split("&")[0] + link = concatable_string + "/Boligen.aspx?" + aid + + tmp = find_bolig_type(link, id) + all_apartments.append(tmp) + + def already_seen(already_seens, spec_currently_found) -> bool: + return spec_currently_found.id in already_seens + + already_seen_locations = [tmp.id for tmp in previous_seen] + for apartment in all_apartments: + if not already_seen(already_seen_locations, apartment): + print("I've found a new apartment!") + if not args.populate_lists: + mail_handler.handle(apartment.link) + previous_seen.append(apartment) + else: + print("I've already seen this") + + + + with open(seen_apartments_file_find_bolig, 'w', encoding="utf8") as outfile: + json.dump(previous_seen, outfile) + + + + + +def do_hestia_things(): + previously_seen_apartments = None + + seen_apartments_file_hestia = 'seen_apartments_hestia.json' + + # If hestia file doesn't exist, create + if not os.path.isfile(seen_apartments_file_hestia): + open(seen_apartments_file_hestia, "w+") + + with open(seen_apartments_file_hestia, encoding="utf8") as json_file: + text = json_file.read() + if len(text) == 0: + previously_seen_apartments = [] + else: + json_data = json.loads(text) + previously_seen_apartments = json_data + + previous_seen = [] + for apartment_ in previously_seen_apartments: + previous_seen.append(apartment_type(*apartment_)) + + r = requests.get("https://www.hestia.as/ledige-lejligheder/?area=266&max=7200") + soup = BeautifulSoup(r.text, "html5lib") + table_body = soup.find(id="sortTable").find("tbody") + + all_apartments = [] + + rows = table_body.find_all('tr') + for row in rows: + link = re.search("(https.+|http.+)", row.get("onclick")).group(0) + cols = row.find_all('td') + text = [col.get_text() for col in cols] + all_apartments.append(apartment_type(link[:link.find("\'")], *text)) + + + def already_seen(already_seens, spec_currently_found) -> bool: + return spec_currently_found.location in already_seens + + + already_seen_locations = [tmp.location for tmp in previous_seen] + for apartment in all_apartments: + if not already_seen(already_seen_locations, apartment): + print("I've found a new apartment!") + if not args.populate_lists: + mail_handler.handle(apartment.link) + previous_seen.append(apartment) + else: + print("I've already seen this") + + + + with open(seen_apartments_file_hestia, 'w', encoding="utf8") as outfile: + json.dump(previous_seen, outfile) + + +do_find_bolig_things() + + diff --git a/mail_handler.py b/mail_handler.py new file mode 100644 index 0000000..bb58215 --- /dev/null +++ b/mail_handler.py @@ -0,0 +1,28 @@ +import smtplib +import constants +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +username = constants.EMAIL_USERNAME +password = constants.EMAIL_PASSWORD + +def handle(link_to_apartment): + from_addr = username + to_addr = "alex@pwnh.io" + server = smtplib.SMTP('smtp.gmail.com:587') + server.starttls() + server.login(username, password) + + text = link_to_apartment + text = MIMEText(text.encode('utf-8'), 'html', 'utf-8') + + msg = MIMEMultipart('alternative') + msg['Subject'] = 'Fundet ny lejlighed' + msg['From'] = from_addr + msg['To'] = to_addr + + msg.attach(text) + + server.sendmail(from_addr, to_addr, msg.as_string()) + server.quit() +