12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- """API module"""
- import re
- import requests
- from bs4 import BeautifulSoup
- from app import BASE_URL, HEADERS
- def get_professors(state_id, department_type, start_date):
- """Download list of professors"""
- professors = []
- not_reached_date = True
- page = 0
- while not_reached_date:
- # tmp_professors = download_department(state_id, department_type, page)
- tmp_professors = read_department()
- for professor in tmp_professors:
- if start_date is not None: # and start_date >=:
- not_reached_date = False
- break
- professors.append(professor)
- page += 1
- break
- return professors
- def download_department(state_id, department_type, page):
- """Download the department"""
- response = requests.get(
- '{}listed/professors/{}/{}/{}'.format(BASE_URL, department_type, state_id, page*25),
- headers=HEADERS
- )
- return parse_department(response.text)
- def read_department():
- """Read from department file"""
- with open('department.html') as file:
- return parse_department(file)
- def parse_department(html):
- """Parse html return professors"""
- soup = BeautifulSoup(html, 'html.parser')
- professors_tree = soup.find_all(class_='list_link')
- print(professors_tree)
- professors = []
- for professor_tree in professors_tree:
- print(professor_tree)
- columns = professor_tree.find_all('td')
- professors.append(
- {
- 'id': int(professor_tree['user']),
- 'name': re.sub(r'\s\(.*$', '', columns[1].string),
- 'points': int(re.sub(r'^.*\(\+|\)$', '', columns[1].string)),
- 'date': columns[3].string,
- }
- )
- print(professors)
- exit()
- return professors
|