Browse Source

Fix issue with articles that have no newspaper

JoostSijm 4 years ago
parent
commit
ae10cbabd6

+ 15 - 5
src/rival_regions_wrapper/api_wrapper/article.py

@@ -18,17 +18,20 @@ class Article(object):
         soup = BeautifulSoup(response, 'html.parser')
 
         links = soup.select('.newspaper_links')
-        newspaper = links[0]
-        author = links[1]
-        region = links[2]
+        if len(links) >= 3:
+            newspaper = links[0]
+            author = links[1]
+            region = links[2]
+        else:
+            author = links[0]
+            region = links[1]
+            newspaper = None
 
         news_content = soup.select_one('.news_content')
 
         article_info = {
             'article_id': article_id,
             'article_title': unicodedata.normalize("NFKD", soup.select_one('.title_totr').text),
-            'newspaper_id': int(newspaper['action'].replace('newspaper/show/', '')),
-            'newspaper_name': newspaper.text,
             'author_name': re.sub(r',\s\skarma.*$', '', author.text),
             'author_id': int(author['action'].replace('slide/profile/', '')),
             'region_name': region.text,
@@ -37,6 +40,13 @@ class Article(object):
             'content_html': news_content.prettify(),
         }
 
+        if newspaper:
+            article_info['newspaper_id'] = int(newspaper['action'].replace('newspaper/show/', ''))
+            article_info['newspaper_name'] = newspaper.text
+        else:
+            article_info['newspaper_id'] = None
+            article_info['newspaper_name'] = None
+
         result = re.search(r'.+(\s.+,)', soup.select_one('.tc.small').text)
         try:
             article_info['language'] = re.sub(r'\s|,', '', result[1].strip())

+ 20 - 1
tests/test_rival_regions_wrapper.py

@@ -200,7 +200,7 @@ def test_work_info(api_wrapper):
     assert isinstance(response['work_exp'], dict), "Work exp should be a dict"
 
 @pytest.mark.vcr()
-def test_article_info(api_wrapper):
+def test_article_info_one(api_wrapper):
     """Test article info"""
     article_id = 2708696
     response = Article(api_wrapper).info(article_id)
@@ -217,3 +217,22 @@ def test_article_info(api_wrapper):
     assert isinstance(response['content_text'], str), "Content text should be a string"
     assert isinstance(response['content_html'], str), "Content html should be a string"
     assert isinstance(response['language'], str), "Language should be a string"
+
+@pytest.mark.vcr()
+def test_article_info_two(api_wrapper):
+    """Test article info"""
+    article_id = 2862982
+    response = Article(api_wrapper).info(article_id)
+
+    assert isinstance(response, dict), "The resonse should be a dict"
+    assert isinstance(response['article_id'], int), "Article id should be an integer"
+    assert isinstance(response['article_title'], str), "Article title should be a str"
+    assert response['newspaper_id'] is None, "Newspaper id should be none"
+    assert response['newspaper_name'] is None, "Newspaper name should be none"
+    assert isinstance(response['author_name'], str), "Author name should be a string"
+    assert isinstance(response['author_id'], int), "Author id should be an integer"
+    assert isinstance(response['region_name'], str), "Region name should be a string"
+    assert isinstance(response['region_id'], int), "Region id should be an integer"
+    assert isinstance(response['content_text'], str), "Content text should be a string"
+    assert isinstance(response['content_html'], str), "Content html should be a string"
+    assert isinstance(response['language'], str), "Language should be a string"