Skip to content

Commit

Permalink
Merge ; commit 'c46cd09bc37604b95eec59cc0b8d90cd2eacab5b'
Browse files Browse the repository at this point in the history
  • Loading branch information
Cdaprod committed Mar 14, 2024
2 parents 50ae038 + c46cd09 commit c1a670e
Showing 1 changed file with 5 additions and 7 deletions.
12 changes: 5 additions & 7 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,25 @@ def fetch_and_parse_articles():
author = 'David Cannan'
summary = article.select_one('div.post__content > p').text.strip() if article.select_one('div.post__content > p') else ''
date = article.find('time').text.strip() if article.find('time') else ''
link = article.find('a', class_='post__more')['href'] if article.find('a', class_='post__more') else ''
link = article.find('h2').find('a')['href'] if article.find('h2').find('a') else ''
articles.append((title, author, summary, date, link))

return pd.DataFrame(articles, columns=['title', 'author', 'summary', 'date', 'url'])

def extract_article_content(url):
base_url = 'https://blog.min.io'
full_url = base_url + url
try:
response = requests.get(full_url)
response = requests.get(url)
response.raise_for_status() # Raise an exception for 4xx or 5xx status codes
soup = BeautifulSoup(response.text, 'html.parser')
article_content = soup.find('section', class_='post-full-content')

if article_content:
return article_content.get_text(separator='\n', strip=True)
else:
print(f"Article content not found for: {full_url}")
print(f"Article content not found for: {url}")
return None
except requests.exceptions.RequestException as e:
print(f"Error fetching article content: {full_url}")
print(f"Error fetching article content: {url}")
print(f"Error details: {str(e)}")
return None

Expand Down

0 comments on commit c1a670e

Please sign in to comment.