commit c395a1ef32d6e7cabd7e0e2325d6021060b9b071 Author: Pownie Date: Sun Apr 16 21:06:02 2017 +0200 Had to recreate project diff --git a/RequestCrawler.py b/RequestCrawler.py new file mode 100644 index 0000000..a697012 --- /dev/null +++ b/RequestCrawler.py @@ -0,0 +1,169 @@ +#!/usr/bin/python3 +import sys +from bs4 import BeautifulSoup +import requests +from requests.auth import HTTPBasicAuth +import io +import os +import time +from twitter import * +import tweepy +import difflib +# This imports from a file called 'constants'. This is at the moment, the file called 'constants_template' +import constants +import mail_handler + +# Open a session for requests, which will be used throughout +session = requests.session() + +# The config for being able to utilize twitter +cfg = { + "consumer_key" : constants.cons_key, + "consumer_secret" : constants.cons_secret, + "access_token" : constants.access_token, + "access_token_secret" : constants.access_token_secret +} + + +# Setting the Twitter connection up +def get_api(cfg): + auth = tweepy.OAuthHandler(cfg['consumer_key'], cfg['consumer_secret']) + auth.set_access_token(cfg['access_token'], cfg['access_token_secret']) + return tweepy.API(auth) + +api = get_api(cfg) + + +# The main method for scraping Stads +def findGrades(): + + # Calling the first get to Stads, such that I can get the correct link to follow + stads = session.get('https://sbstads.au.dk/sb_STAP/sb/resultater/studresultater.jsp') + + # Getting the response with a meta tag, that I can then follow + soup = BeautifulSoup(stads.text, 'html5lib') + + # Finding said meta tag + meta_tag_redirect = session.get(soup.find('meta')['content'][6:]) + + # This should return 200, since I hopefully found the correct meta tag + print(meta_tag_redirect.status_code) + + # Getting the url of the meta tag + meta_tag_url = meta_tag_redirect.url + + # Trying to log in to WAYF + wayf_login = session.post(meta_tag_url, data={'username':constants.USERNAME,'password':constants.PASSWORD}) + + # Should return 200 + print(wayf_login.status_code) + + soup = BeautifulSoup(wayf_login.text, 'html5lib') + + + # Finding SAMLResponse, such that I can parse it as a parameter, so WAYF will like me + SAMLResponse = soup.find('input', {'name':'SAMLResponse'})['value'] + + # Hopefully WAYF does in fact like me + wayf = session.post('https://wayf.wayf.dk/module.php/saml/sp/saml2-acs.php/wayf.wayf.dk', data={'SAMLResponse':SAMLResponse}) + + # If this returns 200, it does \o/ + print(wayf.status_code) + + # After concluding that WAYF liked me, we look at the response of WAYF + soup = BeautifulSoup(wayf.text, 'html5lib') + + # We then find the new SAMLResponse as well as a string 'RelayState' + SAMLResponse = soup.find('input', {'name':'SAMLResponse'})['value'] + RelayState = soup.find('input', {'name':'RelayState'})['value'] + + # We then do the last post, and after this, hopefully we can 'get' Stads + SAMLAssertion = session.post('https://sbstads.au.dk/sb_STAP/saml/SAMLAssertionConsumer', data={'SAMLResponse':SAMLResponse,'RelayState':RelayState}) + + # If this returns 200, it's go time! + print(SAMLAssertion.status_code) + + # Given that the previous print returned 200, we can now get the source code of Stads + resultater = session.get('https://sbstads.au.dk/sb_STAP/sb/resultater/studresultater.jsp') + + # Just to check that it returns 200, so we have access + print(resultater.status_code) + + # Given that it returned 200, we can now get the source code and thus continue in our adventure to find the grades + soup = BeautifulSoup(resultater.text, 'html5lib') + + return soup + + + +def createGradeFile(): + soup = findGrades() + + newest_grades = soup.find_all('tr', {'class':'DataSelect'}) + + if os.path.isfile('./temp_new_grades.log'): + grade_file = open('temp_new_grades.log', 'r+') + else: + grade_file = open('temp_new_grades.log', 'w+') + + + for grade in newest_grades: + grade_file.truncate() + grade_file.write('%s \n' %str.strip(grade.find_all('td')[0].getText())) + + grade_file.close() + +def diffGradeLists(): + + createGradeFile() + + new_grade_file = open('./temp_new_grades.log', 'r+') + old_grade_file = open('./old_grades.log', 'r+') + + diff = difflib.unified_diff(old_grade_file.readlines(), new_grade_file.readlines(), fromfile='file1', tofile='file2', lineterm="\n", n=0) + lines = list(diff)[2:] + added = [line[1:] for line in lines if line[0] == '+'] + removed = [line[1:] for line in lines if line[0] == '-'] + + new_courses = [] + + for line in added: + if line not in removed: + new_courses.append(line) + + new_grade_file.close() + old_grade_file.close() + return new_courses + +def checker(): + + new_grades = diffGradeLists() + new_grade_file = open('./temp_new_grades.log', 'r+') + old_grade_file = open('./old_grades.log', 'r+') + + grades_string = "New grade(s) in the following course(s):\n" + + if not new_grades: + print('There are no new grades') + else: + with new_grade_file: + with old_grade_file: + for line in new_grade_file: + old_grade_file.write(line) + for i in range (0, len(new_grades)): + grades_string += (new_grades[i]) + print(grades_string) + mail_handler.handle(grades_string) + tweeter(grades_string) + + new_grade_file.close() + old_grade_file.close() + +def tweeter(grades): + tweet = '{:s}'.format(grades) + api.update_status(status=tweet) + + +checker() + + diff --git a/Web/WebSubscription.py b/Web/WebSubscription.py new file mode 100644 index 0000000..11397f2 --- /dev/null +++ b/Web/WebSubscription.py @@ -0,0 +1,42 @@ +#!/bin/python3 +import sqlite3 +from flask import Flask, session, redirect, url_for, escape, request +app = Flask(__name__) + +conn = sqlite3.connect('../mailing_list') +c = conn.cursor() + +@app.route('/subscribe') +def login(): + return ''' +
+

Enter information and click submit, in order to subscribe to the mailing list

+ +
+

Pick study

+ +

+ +
+ ''' + + +@app.route('/added_email', methods=['POST']) +def hello(): + + email = request.form['email'] + studie = request.form['studie'] + + query = "insert into mails (email, studie) values (?, ?)" + c.execute(query, (email, studie)) + conn.commit() + conn.close() + return 'I inserted: %s and %s, into the database' % (email, studie) + + + +if __name__ == '__main__': + app.run(port=2047) diff --git a/constants_template.py b/constants_template.py new file mode 100644 index 0000000..8c343c1 --- /dev/null +++ b/constants_template.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 + +# Username for STADS +USERNAME = 'username' + +# Password for STADS +PASSWORD = 'password' + +# Consumer Key (API Key) +cons_key = 'insert consumer key here' +# Consumer Secret (API Secret) +cons_secret = 'insert consumer secret here' +# Access Token +access_token = 'insert access token here' +# Access Token Secret +access_token_secret = 'insert access token secret here' + +EMAIL_USERNAME = 'username' +EMAIL_PASSWORD = 'password' diff --git a/mail_handler.py b/mail_handler.py new file mode 100644 index 0000000..e08bd55 --- /dev/null +++ b/mail_handler.py @@ -0,0 +1,28 @@ +#!/bin/python3 +import sqlite3 +import smtplib +import constants + +username = constants.EMAIL_USERNAME +password = constants.EMAIL_PASSWORD + +conn = sqlite3.connect('mailing_list') +cursor = conn.cursor(); + +mails = cursor.execute('SELECT email FROM mails') + +def handle(new_grades): + fromaddr = username + server = smtplib.SMTP('smtp.gmail.com:587') + server.starttls() + server.login(username, password) + msg = new_grades + + + + for email in mails: + toaddr = email + server.sendmail(fromaddr, toaddr, msg) + + server.quit() + conn.close()