Initial commit

This commit is contained in:
Alexander Munch-Hansen 2018-10-09 21:57:47 +02:00
commit 0bae9f4587
2 changed files with 170 additions and 0 deletions

142
apartment_looker.py Normal file
View File

@ -0,0 +1,142 @@
import requests
from bs4 import BeautifulSoup
from collections import namedtuple
import re
import json
import os.path
import mail_handler
import argparse
parser = argparse.ArgumentParser(description="Scraper for apartments")
parser.add_argument('--populate_lists', action='store_true',
help='populate all json files without sending emails')
args = parser.parse_args()
apartment_type = namedtuple("Apartment", "link location date size shareable rooms price deposit prerent")
def do_find_bolig_things():
find_bolig_type = namedtuple("Apartment_find_bolig", "link id")
seen_apartments_file_find_bolig = 'seen_apartments_find_bolig.json'
# If find_bolig file doesn't exist, create
if not os.path.isfile(seen_apartments_file_find_bolig):
open(seen_apartments_file_find_bolig, "w+")
with open(seen_apartments_file_find_bolig, encoding="utf8") as json_file:
text = json_file.read()
if len(text) == 0:
previously_seen_apartments = []
else:
json_data = json.loads(text)
previously_seen_apartments = json_data
previous_seen = []
for apartment_ in previously_seen_apartments:
previous_seen.append(find_bolig_type(*apartment_))
r = requests.get("https://www.findbolig.nu/ledigeboliger/liste.aspx?where=Aarhus%208000&rentmax=7000&showrented=1&showyouth=1&showlimitedperiod=1&showunlimitedperiod=1&showOpenDay=0&focus=ctl00_placeholdersidebar_0_txt_where")
soup = BeautifulSoup(r.text, "html5lib")
table_body = soup.find(id="GridView_Results").find("tbody")
all_apartments = []
rows = table_body.find_all('tr')
concatable_string = "https://www.findbolig.nu/Findbolig-nu/Find%20bolig/Ledige%20boliger/Boligpraesentation"
for row in rows[1:]:
cols = row.find_all('td')
aid = re.search('(aid.+)', cols[0].find('a')['href']).group(0)
# Hacky :(
id = aid.split("=")[1].split("&")[0]
link = concatable_string + "/Boligen.aspx?" + aid
tmp = find_bolig_type(link, id)
all_apartments.append(tmp)
def already_seen(already_seens, spec_currently_found) -> bool:
return spec_currently_found.id in already_seens
already_seen_locations = [tmp.id for tmp in previous_seen]
for apartment in all_apartments:
if not already_seen(already_seen_locations, apartment):
print("I've found a new apartment!")
if not args.populate_lists:
mail_handler.handle(apartment.link)
previous_seen.append(apartment)
else:
print("I've already seen this")
with open(seen_apartments_file_find_bolig, 'w', encoding="utf8") as outfile:
json.dump(previous_seen, outfile)
def do_hestia_things():
previously_seen_apartments = None
seen_apartments_file_hestia = 'seen_apartments_hestia.json'
# If hestia file doesn't exist, create
if not os.path.isfile(seen_apartments_file_hestia):
open(seen_apartments_file_hestia, "w+")
with open(seen_apartments_file_hestia, encoding="utf8") as json_file:
text = json_file.read()
if len(text) == 0:
previously_seen_apartments = []
else:
json_data = json.loads(text)
previously_seen_apartments = json_data
previous_seen = []
for apartment_ in previously_seen_apartments:
previous_seen.append(apartment_type(*apartment_))
r = requests.get("https://www.hestia.as/ledige-lejligheder/?area=266&max=7200")
soup = BeautifulSoup(r.text, "html5lib")
table_body = soup.find(id="sortTable").find("tbody")
all_apartments = []
rows = table_body.find_all('tr')
for row in rows:
link = re.search("(https.+|http.+)", row.get("onclick")).group(0)
cols = row.find_all('td')
text = [col.get_text() for col in cols]
all_apartments.append(apartment_type(link[:link.find("\'")], *text))
def already_seen(already_seens, spec_currently_found) -> bool:
return spec_currently_found.location in already_seens
already_seen_locations = [tmp.location for tmp in previous_seen]
for apartment in all_apartments:
if not already_seen(already_seen_locations, apartment):
print("I've found a new apartment!")
if not args.populate_lists:
mail_handler.handle(apartment.link)
previous_seen.append(apartment)
else:
print("I've already seen this")
with open(seen_apartments_file_hestia, 'w', encoding="utf8") as outfile:
json.dump(previous_seen, outfile)
do_find_bolig_things()

28
mail_handler.py Normal file
View File

@ -0,0 +1,28 @@
import smtplib
import constants
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
username = constants.EMAIL_USERNAME
password = constants.EMAIL_PASSWORD
def handle(link_to_apartment):
from_addr = username
to_addr = "alex@pwnh.io"
server = smtplib.SMTP('smtp.gmail.com:587')
server.starttls()
server.login(username, password)
text = link_to_apartment
text = MIMEText(text.encode('utf-8'), 'html', 'utf-8')
msg = MIMEMultipart('alternative')
msg['Subject'] = 'Fundet ny lejlighed'
msg['From'] = from_addr
msg['To'] = to_addr
msg.attach(text)
server.sendmail(from_addr, to_addr, msg.as_string())
server.quit()