api.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. """API module"""
  2. import re
  3. import requests
  4. from bs4 import BeautifulSoup
  5. from app import BASE_URL, HEADERS
  6. def get_professors(state_id, department_type, start_date):
  7. """Download list of professors"""
  8. professors = []
  9. not_reached_date = True
  10. page = 0
  11. while not_reached_date:
  12. # tmp_professors = download_department(state_id, department_type, page)
  13. tmp_professors = read_department()
  14. for professor in tmp_professors:
  15. if start_date is not None: # and start_date >=:
  16. not_reached_date = False
  17. break
  18. professors.append(professor)
  19. page += 1
  20. break
  21. return professors
  22. def download_department(state_id, department_type, page):
  23. """Download the department"""
  24. response = requests.get(
  25. '{}listed/professors/{}/{}/{}'.format(BASE_URL, department_type, state_id, page*25),
  26. headers=HEADERS
  27. )
  28. return parse_department(response.text)
  29. def read_department():
  30. """Read from department file"""
  31. with open('department.html') as file:
  32. return parse_department(file)
  33. def parse_department(html):
  34. """Parse html return professors"""
  35. soup = BeautifulSoup(html, 'html.parser')
  36. professors_tree = soup.find_all(class_='list_link')
  37. print(professors_tree)
  38. professors = []
  39. for professor_tree in professors_tree:
  40. print(professor_tree)
  41. columns = professor_tree.find_all('td')
  42. professors.append(
  43. {
  44. 'id': int(professor_tree['user']),
  45. 'name': re.sub(r'\s\(.*$', '', columns[1].string),
  46. 'points': int(re.sub(r'^.*\(\+|\)$', '', columns[1].string)),
  47. 'date': columns[3].string,
  48. }
  49. )
  50. print(professors)
  51. exit()
  52. return professors