-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdao_word_cloud.py
103 lines (82 loc) · 3.5 KB
/
dao_word_cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import logging
import os
import pandas as pd
from components.utils import get_db_connection, save_to_csv
from datetime import datetime
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def fetch_dao_references():
"""Fetch all records from the database that mention 'DAO'."""
try:
conn = get_db_connection()
if conn is None:
return pd.DataFrame()
cur = conn.cursor()
query = """
SELECT date, source, headline, sentiment, sentiment_score, label, link
FROM crypto_news
WHERE LOWER(headline) LIKE '%dao%';
"""
cur.execute(query)
df = pd.DataFrame(cur.fetchall(), columns=['Date', 'Source', 'Headline', 'Sentiment', 'Sentiment Score', 'Label', 'Link'])
cur.close()
conn.close()
return df
except Exception as e:
logging.error(f"Error fetching DAO references: {e}")
return pd.DataFrame()
def preprocess_text(text):
"""Preprocess the text for word cloud generation."""
# Remove special characters, numbers, and convert to lowercase
text = re.sub(r'[^a-zA-Z\s]', '', text)
text = text.lower()
return text
def generate_dao_word_cloud(df, output_dir):
"""Generate and save a word cloud image for DAO references."""
if not df.empty:
# Preprocess the headlines
df['processed_headline'] = df['Headline'].apply(preprocess_text)
all_text = ' '.join(df['processed_headline'].tolist())
# Generate the word cloud (limit to 15 words)
wordcloud = WordCloud(width=800, height=400, max_words=15, background_color='white').generate(all_text)
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)
# Generate a unique filename based on the current date and time
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_image = os.path.join(output_dir, f"dao_wordcloud_{timestamp}.png")
# Save the word cloud image
wordcloud.to_file(output_image)
logging.info(f"DAO word cloud image saved as {output_image}")
# Display the word cloud
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
else:
logging.warning("No DAO references found; word cloud generation skipped.")
def generate_dao_report(df, output_dir):
"""Generate and save the DAO report."""
if not df.empty:
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)
# Generate a unique filename based on the current date and time
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = os.path.join(output_dir, f"dao_report_{timestamp}.csv")
save_to_csv(df, output_file)
logging.info(f"DAO report generated and saved to {output_file}.")
else:
logging.warning("No DAO references found in the database.")
def main():
"""Main function to fetch, generate, and save the DAO report and word cloud."""
df_dao = fetch_dao_references()
if not df_dao.empty:
output_dir = '/media/boilerrat/Bobby/CryptoData/BlockScent/csv'
generate_dao_report(df_dao, output_dir)
generate_dao_word_cloud(df_dao, output_dir)
else:
logging.warning("No DAO references found; report and word cloud generation skipped.")
if __name__ == "__main__":
main()