#!/usr/bin/python3 import sys from bs4 import BeautifulSoup import requests from requests.auth import HTTPBasicAuth import io import os import time from twitter import * import tweepy import difflib # This imports from a file called 'constants'. This is at the moment, the file called 'constants_template' import constants import mail_handler # Open a session for requests, which will be used throughout session = requests.session() # The config for being able to utilize twitter cfg = { "consumer_key" : constants.cons_key, "consumer_secret" : constants.cons_secret, "access_token" : constants.access_token, "access_token_secret" : constants.access_token_secret } # Setting the Twitter connection up def get_api(cfg): auth = tweepy.OAuthHandler(cfg['consumer_key'], cfg['consumer_secret']) auth.set_access_token(cfg['access_token'], cfg['access_token_secret']) return tweepy.API(auth) api = get_api(cfg) # The main method for scraping Stads def findGrades(): # Calling the first get to Stads, such that I can get the correct link to follow stads = session.get('https://sbstads.au.dk/sb_STAP/sb/resultater/studresultater.jsp') # Getting the response with a meta tag, that I can then follow soup = BeautifulSoup(stads.text, 'html5lib') # Finding said meta tag meta_tag_redirect = session.get(soup.find('meta')['content'][6:]) # This should return 200, since I hopefully found the correct meta tag print(meta_tag_redirect.status_code) # Getting the url of the meta tag meta_tag_url = meta_tag_redirect.url # Trying to log in to WAYF wayf_login = session.post(meta_tag_url, data={'username':constants.USERNAME,'password':constants.PASSWORD}) # Should return 200 print(wayf_login.status_code) soup = BeautifulSoup(wayf_login.text, 'html5lib') # Finding SAMLResponse, such that I can parse it as a parameter, so WAYF will like me SAMLResponse = soup.find('input', {'name':'SAMLResponse'})['value'] # Hopefully WAYF does in fact like me wayf = session.post('https://wayf.wayf.dk/module.php/saml/sp/saml2-acs.php/wayf.wayf.dk', data={'SAMLResponse':SAMLResponse}) # If this returns 200, it does \o/ print(wayf.status_code) # After concluding that WAYF liked me, we look at the response of WAYF soup = BeautifulSoup(wayf.text, 'html5lib') # We then find the new SAMLResponse as well as a string 'RelayState' SAMLResponse = soup.find('input', {'name':'SAMLResponse'})['value'] RelayState = soup.find('input', {'name':'RelayState'})['value'] # We then do the last post, and after this, hopefully we can 'get' Stads SAMLAssertion = session.post('https://sbstads.au.dk/sb_STAP/saml/SAMLAssertionConsumer', data={'SAMLResponse':SAMLResponse,'RelayState':RelayState}) # If this returns 200, it's go time! print(SAMLAssertion.status_code) # Given that the previous print returned 200, we can now get the source code of Stads resultater = session.get('https://sbstads.au.dk/sb_STAP/sb/resultater/studresultater.jsp') # Just to check that it returns 200, so we have access print(resultater.status_code) # Given that it returned 200, we can now get the source code and thus continue in our adventure to find the grades soup = BeautifulSoup(resultater.text, 'html5lib') return soup def createGradeFile(): soup = findGrades() newest_grades = soup.find_all('tr', {'class':'DataSelect'}) if os.path.isfile('./temp_new_grades.log'): grade_file = open('temp_new_grades.log', 'r+') else: grade_file = open('temp_new_grades.log', 'w+') for grade in newest_grades: grade_file.truncate() grade_file.write('%s \n' %str.strip(grade.find_all('td')[0].getText())) grade_file.close() def diffGradeLists(): createGradeFile() new_grade_file = open('./temp_new_grades.log', 'r+') old_grade_file = open('./old_grades.log', 'r+') diff = difflib.unified_diff(old_grade_file.readlines(), new_grade_file.readlines(), fromfile='file1', tofile='file2', lineterm="\n", n=0) lines = list(diff)[2:] added = [line[1:] for line in lines if line[0] == '+'] removed = [line[1:] for line in lines if line[0] == '-'] new_courses = [] for line in added: if line not in removed: new_courses.append(line) new_grade_file.close() old_grade_file.close() return new_courses def checker(): new_grades = diffGradeLists() new_grade_file = open('./temp_new_grades.log', 'r+') old_grade_file = open('./old_grades.log', 'r+') grades_string = "New grade(s) in the following course(s):\n" if not new_grades: print('There are no new grades') else: with new_grade_file: with old_grade_file: for line in new_grade_file: old_grade_file.write(line) for i in range (0, len(new_grades)): grades_string += (new_grades[i]) print(grades_string) mail_handler.handle(grades_string) tweeter(grades_string) new_grade_file.close() old_grade_file.close() def tweeter(grades): tweet = '{:s}'.format(grades) api.update_status(status=tweet) checker()