api.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. """API module"""
  2. import re
  3. from datetime import datetime, timedelta
  4. import requests
  5. from bs4 import BeautifulSoup
  6. from app import BASE_URL, HEADERS
  7. def get_professors(state_id, department_type, start_date):
  8. """Download list of professors"""
  9. professors = []
  10. not_reached_date = True
  11. page = 0
  12. while not_reached_date:
  13. # tmp_professors = download_department(state_id, department_type, page)
  14. tmp_professors = read_department()
  15. for professor in tmp_professors:
  16. if start_date is not None: # and start_date >=:
  17. not_reached_date = False
  18. break
  19. professors.append(professor)
  20. page += 1
  21. break
  22. return professors
  23. def download_department(state_id, department_type, page):
  24. """Download the department"""
  25. response = requests.get(
  26. '{}listed/professors/{}/{}/{}'.format(BASE_URL, department_type, state_id, page*25),
  27. headers=HEADERS
  28. )
  29. return parse_department(response.text)
  30. def read_department():
  31. """Read from department file"""
  32. with open('department.html') as file:
  33. return parse_department(file)
  34. def parse_department(html):
  35. """Parse html return professors"""
  36. soup = BeautifulSoup(html, 'html.parser')
  37. professors_tree = soup.find_all(class_='list_link')
  38. professors = []
  39. today = datetime.strftime(datetime.now(), '%-d %B %Y')
  40. yesterday = datetime.strftime(datetime.now() - timedelta(1), '%-d %B %Y')
  41. for professor_tree in professors_tree:
  42. columns = professor_tree.find_all('td')
  43. date = columns[3].string
  44. date = date.replace('Today ', today)
  45. date = date.replace('Yesterday ', yesterday)
  46. datetime.strptime('Jun 1 2005 1:33PM', '%b %d %Y %I:%M%p')
  47. professors.append(
  48. {
  49. 'id': int(professor_tree['user']),
  50. 'name': re.sub(r'\s\(.*$', '', columns[1].string),
  51. 'points': int(re.sub(r'^.*\(\+|\)$', '', columns[1].string)),
  52. 'date': datetime.strptime(date, '%d %B %Y %H:%M'),
  53. }
  54. )
  55. return professors