Skip to content

Commit

Permalink
Updated BERTrend default path: .../bertrend/data|config|logs etc.
Browse files Browse the repository at this point in the history
  • Loading branch information
picaultj committed Feb 11, 2025
1 parent 829f71c commit 0beff96
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 39 deletions.
20 changes: 9 additions & 11 deletions bertrend/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,13 @@
)

# Base dirs
BASE_DATA_PATH = BASE_PATH / "data"
BASE_CACHE_PATH = BASE_PATH / "cache"
BASE_OUTPUT_PATH = BASE_PATH / "output"
DATA_PATH = BASE_PATH / "data"
CACHE_PATH = BASE_PATH / "cache"
OUTPUT_PATH = BASE_PATH / "output"
CONFIG_PATH = BASE_PATH / "config"

FEED_BASE_PATH = BASE_DATA_PATH / "bertrend" / "feeds"
BERTREND_LOG_PATH = BASE_PATH / "logs" / "bertrend"
BERTREND_LOG_PATH.mkdir(parents=True, exist_ok=True)

# Define directories
DATA_PATH = BASE_DATA_PATH / "bertrend"
OUTPUT_PATH = BASE_OUTPUT_PATH / "bertrend"
CACHE_PATH = BASE_CACHE_PATH / "bertrend"
FEED_BASE_PATH = DATA_PATH / "feeds"
BERTREND_LOG_PATH = BASE_PATH / "logs"

# Weak signals
MODELS_DIR = CACHE_PATH / "models"
Expand All @@ -62,3 +57,6 @@
DATA_PATH.mkdir(parents=True, exist_ok=True)
OUTPUT_PATH.mkdir(parents=True, exist_ok=True)
CACHE_PATH.mkdir(parents=True, exist_ok=True)
CONFIG_PATH.mkdir(parents=True, exist_ok=True)
BERTREND_LOG_PATH.mkdir(parents=True, exist_ok=True)
MODELS_DIR.mkdir(parents=True, exist_ok=True)
16 changes: 8 additions & 8 deletions bertrend_apps/common/crontab_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,14 @@ def remove_from_crontab(pattern: str) -> bool:
return False


def schedule_scrapping(feed_cfg: Path):
def schedule_scrapping(feed_cfg: Path, user: str = None):
"""Schedule data scrapping on the basis of a feed configuration file"""
data_feed_cfg = load_toml_config(feed_cfg)
schedule = data_feed_cfg["data-feed"]["update_frequency"]
id = data_feed_cfg["data-feed"]["id"]
command = f"{sys.prefix}/bin/python -m bertrend_apps.data_provider scrape-feed {feed_cfg.resolve()} > {BERTREND_LOG_PATH}/cron_feed_{id}.log 2>&1"
log_path = BERTREND_LOG_PATH if not user else BERTREND_LOG_PATH / "users" / user
log_path.mkdir(parents=True, exist_ok=True)
command = f"{sys.prefix}/bin/python -m bertrend_apps.data_provider scrape-feed {feed_cfg.resolve()} > {log_path}/cron_feed_{id}.log 2>&1"
add_job_to_crontab(schedule, command, "")


Expand All @@ -98,16 +100,14 @@ def schedule_newsletter(
def check_if_scrapping_active_for_user(feed_id: str, user: str = None) -> bool:
"""Checks if a given scrapping feed is active (registered in the crontab"""
if user:
return check_cron_job(rf"scrape-feed.*/feeds/users/{user}/{feed_id}_feed.toml")
return check_cron_job(rf"scrape-feed.*/users/{user}/{feed_id}_feed.toml")
else:
return check_cron_job(rf"scrape-feed.*/feeds/{feed_id}_feed.toml")
return check_cron_job(rf"scrape-feed.*/{feed_id}_feed.toml")


def remove_scrapping_for_user(feed_id: str, user: str = None):
"""Removes from the crontab the job matching the provided feed_id"""
if user:
return remove_from_crontab(
rf"scrape-feed.*/feeds/users/{user}/{feed_id}_feed.toml"
)
return remove_from_crontab(rf"scrape-feed.*/users/{user}/{feed_id}_feed.toml")
else:
return remove_from_crontab(rf"scrape-feed.*/feeds/{feed_id}_feed.toml")
return remove_from_crontab(rf"scrape-feed.*/{feed_id}_feed.toml")
4 changes: 2 additions & 2 deletions bertrend_apps/common/mail_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
from googleapiclient.errors import HttpError
from loguru import logger

from bertrend import BASE_DATA_PATH
from bertrend import BASE_PATH

SCOPES = ["https://mail.google.com/"] # full access to mail API
FROM = "wattelse.ai@gmail.com"
TOKEN_PATH = BASE_DATA_PATH / "gmail_token.json"
TOKEN_PATH = BASE_PATH / "gmail_token.json"
DEFAULT_GMAIL_CREDENTIALS_PATH = (
Path(__file__).parent.parent / "config" / "gmail_credentials.json"
)
Expand Down
17 changes: 7 additions & 10 deletions bertrend_apps/prospective_demo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
# This file is part of BERTrend.
from pathlib import Path

from bertrend import MODELS_DIR, FEED_BASE_PATH
from bertrend import MODELS_DIR, FEED_BASE_PATH, CONFIG_PATH

# Feed config path
USER_FEEDS_BASE_PATH = FEED_BASE_PATH / "users"
USER_FEEDS_BASE_PATH.mkdir(parents=True, exist_ok=True)
# Config path for users
CONFIG_FEEDS_BASE_PATH = CONFIG_PATH / "users"
CONFIG_FEEDS_BASE_PATH.mkdir(parents=True, exist_ok=True)

# Models config path
BASE_MODELS_DIR = MODELS_DIR / "prospective_demo" / "users"
BASE_MODELS_DIR = MODELS_DIR / "users"
INTERPRETATION_PATH = "interpretation"

# some identifiers
Expand Down Expand Up @@ -39,7 +39,7 @@


def get_user_feed_path(user_name: str, feed_id: str) -> Path:
feed_path = USER_FEEDS_BASE_PATH / user_name / f"{feed_id}_feed.toml"
feed_path = CONFIG_FEEDS_BASE_PATH / user_name / f"{feed_id}_feed.toml"
return feed_path


Expand All @@ -51,8 +51,5 @@ def get_user_models_path(user_name: str, model_id: str) -> Path:


def get_model_cfg_path(user_name: str, model_id: str) -> Path:
model_cfg_path = (
get_user_models_path(user_name=user_name, model_id=model_id)
/ f"analysis_{model_id}.toml"
)
model_cfg_path = CONFIG_FEEDS_BASE_PATH / user_name / f"{model_id}_analysis.toml"
return model_cfg_path
4 changes: 2 additions & 2 deletions bertrend_apps/prospective_demo/feeds_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from loguru import logger

from bertrend import FEED_BASE_PATH, load_toml_config
from bertrend_apps.prospective_demo import USER_FEEDS_BASE_PATH
from bertrend_apps.prospective_demo import CONFIG_FEEDS_BASE_PATH


def read_user_feeds(username: str) -> tuple[dict[str, dict], dict[str, Path]]:
"""Read user feed config files"""
user_feed_dir = USER_FEEDS_BASE_PATH / username
user_feed_dir = CONFIG_FEEDS_BASE_PATH / username
user_feed_dir.mkdir(parents=True, exist_ok=True)
logger.debug(f"Reading user feeds from: {user_feed_dir}")
matching_files = user_feed_dir.rglob("*_feed.toml")
Expand Down
14 changes: 9 additions & 5 deletions bertrend_apps/prospective_demo/feeds_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from bertrend_apps.prospective_demo.feeds_common import (
read_user_feeds,
)
from bertrend_apps.prospective_demo import USER_FEEDS_BASE_PATH
from bertrend_apps.prospective_demo import CONFIG_FEEDS_BASE_PATH
from bertrend_apps.prospective_demo.models_info import (
remove_scheduled_training_for_user,
)
Expand Down Expand Up @@ -113,7 +113,9 @@ def edit_feed_monitoring(config: dict | None = None):
if not config:
config = {}
config["id"] = "feed_" + chosen_id
config["feed_dir_path"] = st.session_state.username + "/feed_" + chosen_id
config["feed_dir_path"] = (
"users/" + st.session_state.username + "/feed_" + chosen_id
)
config["query"] = query
config["provider"] = provider
if not config.get("max_results"):
Expand Down Expand Up @@ -144,13 +146,13 @@ def edit_feed_monitoring(config: dict | None = None):
def save_feed_config(chosen_id, feed_config: dict):
"""Save the feed configuration to disk as a TOML file."""
feed_path = (
USER_FEEDS_BASE_PATH / st.session_state.username / f"{chosen_id}_feed.toml"
CONFIG_FEEDS_BASE_PATH / st.session_state.username / f"{chosen_id}_feed.toml"
)
# Save the dictionary to a TOML file
with open(feed_path, "w") as toml_file:
toml.dump({"data-feed": feed_config}, toml_file)
logger.debug(f"Saved feed config {feed_config} to {feed_path}")
schedule_scrapping(feed_path)
schedule_scrapping(feed_path, user=st.session_state.username)
st.rerun()


Expand Down Expand Up @@ -216,7 +218,9 @@ def toggle_feed(cfg: dict):
st.toast(f"Le flux **{feed_id}** est déactivé !", icon=INFO_ICON)
logger.info(f"Flux {feed_id} désactivé !")
else:
schedule_scrapping(st.session_state.feed_files[feed_id])
schedule_scrapping(
st.session_state.feed_files[feed_id], user=st.session_state.username
)
st.toast(f"Le flux **{feed_id}** est activé !", icon=WARNING_ICON)
logger.info(f"Flux {feed_id} activé !")
time.sleep(0.2)
Expand Down
2 changes: 1 addition & 1 deletion bertrend_apps/prospective_demo/models_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def schedule_training_for_user(model_id: str, user: str):
schedule = generate_crontab_expression(
st.session_state.model_analysis_cfg[model_id]["model_config"]["granularity"]
)
logpath = BERTREND_LOG_PATH / user / model_id
logpath = BERTREND_LOG_PATH / "users" / user
logpath.mkdir(parents=True, exist_ok=True)
command = (
f"{sys.prefix}/bin/python -m bertrend_apps.prospective_demo.process_new_data {user} {model_id} "
Expand Down

0 comments on commit 0beff96

Please sign in to comment.