Merge ; commit 'c46cd09bc37604b95eec59cc0b8d90cd2eacab5b'

Cdaprod · Mar 14, 2024 · c1a670e · c1a670e
2 parents 50ae038 + c46cd09
commit c1a670e
Showing 1 changed file with 5 additions and 7 deletions.
diff --git a/app.py b/app.py
@@ -16,27 +16,25 @@ def fetch_and_parse_articles():
         author = 'David Cannan'
         summary = article.select_one('div.post__content > p').text.strip() if article.select_one('div.post__content > p') else ''
         date = article.find('time').text.strip() if article.find('time') else ''
-        link = article.find('a', class_='post__more')['href'] if article.find('a', class_='post__more') else ''
+        link = article.find('h2').find('a')['href'] if article.find('h2').find('a') else ''
         articles.append((title, author, summary, date, link))
 
     return pd.DataFrame(articles, columns=['title', 'author', 'summary', 'date', 'url'])
-
+ 
 def extract_article_content(url):
-    base_url = 'https://blog.min.io'
-    full_url = base_url + url
     try:
-        response = requests.get(full_url)
+        response = requests.get(url)
         response.raise_for_status()  # Raise an exception for 4xx or 5xx status codes
         soup = BeautifulSoup(response.text, 'html.parser')
         article_content = soup.find('section', class_='post-full-content')
 
         if article_content:
             return article_content.get_text(separator='\n', strip=True)
         else:
-            print(f"Article content not found for: {full_url}")
+            print(f"Article content not found for: {url}")
             return None
     except requests.exceptions.RequestException as e:
-        print(f"Error fetching article content: {full_url}")
+        print(f"Error fetching article content: {url}")
         print(f"Error details: {str(e)}")
         return None