Skip to content

Commit

Permalink
Merge pull request #17 from voynow/1-aggregate-multi-run-days-into-si…
Browse files Browse the repository at this point in the history
…ngle-daily-activity

grouping on date, adding progress bar to email
  • Loading branch information
voynow authored Aug 21, 2024
2 parents 7708677 + 0109283 commit 5e28cfe
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 58 deletions.
92 changes: 62 additions & 30 deletions src/activities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import timedelta
from datetime import datetime, timedelta
from typing import Dict, List

import polars as pl
Expand Down Expand Up @@ -43,54 +43,83 @@ def activities_to_df(activities: List[Activity]) -> pl.DataFrame:
)


def preprocess_activities_df(df: pl.DataFrame) -> pl.DataFrame:
def add_missing_dates(
df: pl.DataFrame, start_date: datetime, end_date: datetime
) -> pl.DataFrame:
"""
Cleans and transforms the activities DataFrame by sorting, converting units,
and adding derived columns.
Ensures that the DataFrame contains all dates between the start and end date
:param df: The initial polars DataFrame containing activity data
:param start_date: The start date of the range
:param end_date: The end date of the range
"""
df_with_date = df.with_columns(df["start_date_local"].dt.date().alias("date")).drop(
"start_date_local"
)
n_days = (end_date.date() - start_date.date()).days + 1
date_range_df = pl.DataFrame(
{"date": [start_date.date() + timedelta(days=i) for i in range(n_days)]}
)
return date_range_df.join(df_with_date, on="date", how="left")


def preprocess(df: pl.DataFrame) -> pl.DataFrame:
"""
:param df: The initial polars DataFrame containing activity data
:return: A transformed polars DataFrame with cleansed data
"""
METERS_PER_MILE = 1609.34
FEET_PER_METER = 3.28084
MICROSECONDS_PER_MINUTE = 1_000_000 * 60

# Define transformation operations for each column
col_operations = [
pl.col("start_date_local")
pl.col("date")
.dt.strftime("%a")
.str.to_lowercase()
.first()
.alias("day_of_week"),
pl.col("start_date_local").dt.week().alias("week_of_year"),
pl.col("start_date_local").dt.year().alias("year"),
(pl.col("distance") / 1609.34).alias("distance_in_miles"),
(pl.col("total_elevation_gain") * 3.28084).alias("elevation_gain_in_feet"),
(pl.col("moving_time") / 1_000_000 / 60).alias("moving_time_in_minutes"),
pl.col("date").dt.week().first().alias("week_of_year"),
pl.col("date").dt.year().first().alias("year"),
pl.col("distance").sum().alias("distance_in_miles") / METERS_PER_MILE,
pl.col("total_elevation_gain").sum().alias("elevation_gain_in_feet")
* FEET_PER_METER,
(pl.col("moving_time").sum() / MICROSECONDS_PER_MINUTE).alias(
"moving_time_in_minutes"
),
(
(pl.col("moving_time") / 1_000_000 / 60) / (pl.col("distance") / 1609.34)
(pl.col("moving_time").sum() / MICROSECONDS_PER_MINUTE)
/ (pl.col("distance").sum() / METERS_PER_MILE)
).alias("pace_minutes_per_mile"),
]

# Apply transformations, sorting, column removals, and filtering
return (
df.sort("start_date_local")
.with_columns(col_operations)
.drop(["distance", "total_elevation_gain", "moving_time"])
df.groupby("date")
.agg(col_operations)
.sort("date")
# drop incomplete first week
.filter(pl.col("week_of_year") != pl.col("week_of_year").min())
)


def get_activities_df(strava_client: Client, num_weeks: int = 8) -> pl.DataFrame:
"""
Fetches and returns activities data for a given athlete ID as a DataFrame,
cleansed and processed
Fetches activities for a given athlete ID and returns a DataFrame with daily aggregated activities
:param athlete_id: The Strava athlete ID
:param num_weeks: The number of weeks to fetch activities for
:return: A cleaned and processed DataFrame of the athlete's activities
:param strava_client: The Strava client object to fetch data.
:param num_weeks: The number of weeks to fetch activities for.
:return: A cleaned and processed DataFrame of the athlete's daily aggregated activities.
"""
timedela_x_weeks = datetime_now_est() - timedelta(weeks=num_weeks)
activities = strava_client.get_activities(
after=timedela_x_weeks, before=datetime_now_est()
)
end_date = datetime_now_est()
start_date = end_date - timedelta(weeks=num_weeks)

activities = strava_client.get_activities(after=start_date, before=end_date)
raw_df = activities_to_df(activities)
return preprocess_activities_df(raw_df)
all_dates_df = add_missing_dates(
df=raw_df, start_date=start_date, end_date=end_date
)
return preprocess(all_dates_df)


def get_activity_summaries(strava_client, num_weeks=8) -> List[ActivitySummary]:
Expand All @@ -103,9 +132,9 @@ def get_activity_summaries(strava_client, num_weeks=8) -> List[ActivitySummary]:
"""
df = get_activities_df(strava_client, num_weeks)
concise_activities_df = df.with_columns(
pl.col("start_date_local")
.apply(lambda x: x.strftime("%A, %B %d, %Y %I:%M %p"), return_dtype=pl.Utf8)
.alias("date_and_time"),
pl.col("date").apply(
lambda x: x.strftime("%A, %B %d, %Y"), return_dtype=pl.Utf8
),
pl.col("distance_in_miles").apply(lambda x: round(x, 2)),
pl.col("elevation_gain_in_feet").apply(lambda x: round(x, 2)),
pl.col("pace_minutes_per_mile").apply(lambda x: round(x, 2)),
Expand All @@ -116,7 +145,6 @@ def get_activity_summaries(strava_client, num_weeks=8) -> List[ActivitySummary]:
"week_of_year",
"year",
"moving_time_in_minutes",
"start_date_local",
)
return [ActivitySummary(**row) for row in concise_activities_df.rows(named=True)]

Expand All @@ -136,9 +164,13 @@ def get_day_of_week_summaries(activities_df: pl.DataFrame) -> List[DayOfWeekSumm
activities_df.groupby("day_of_week")
.agg(
[
pl.col("id").count().alias("number_of_runs"),
pl.when(pl.col("distance_in_miles") > 0.25)
.then(pl.lit(1))
.otherwise(pl.lit(0))
.sum()
.alias("number_of_runs"),
pl.col("distance_in_miles").mean().alias("avg_miles"),
pl.col("pace_minutes_per_mile").mean().alias("avg_pace"),
pl.col("pace_minutes_per_mile").drop_nans().mean().alias("avg_pace"),
]
)
.with_columns(
Expand Down
40 changes: 29 additions & 11 deletions src/email_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@
)


def space(n: int = 1):
return " " * n


def training_week_update_to_html(
mid_week_analysis: MidWeekAnalysis,
training_week_update_with_planning: TrainingWeekWithPlanning,
Expand All @@ -38,15 +34,15 @@ def training_week_update_to_html(

completed_sessions = {}
for activity in mid_week_analysis.activities:
activity_datetime = datetime.strptime(
activity.date_and_time, "%A, %B %d, %Y %I:%M %p"
)
activity_datetime = datetime.strptime(activity.date, "%A, %B %d, %Y")
completed_sessions[activity_datetime.strftime("%A").lower()] = activity

total_miles = round(mid_week_analysis.miles_target, 1)
miles_remaining = round(mid_week_analysis.miles_remaining, 1)
miles_ran = round(mid_week_analysis.miles_ran, 2)
miles_target = round(mid_week_analysis.miles_target, 2)
progress_percentage = (miles_ran / miles_target) * 100 if miles_target > 0 else 0

html_content = """
html_content = (
"""
<html>
<head>
<style>
Expand Down Expand Up @@ -129,6 +125,24 @@ def training_week_update_to_html(
color: #ffffff;
margin-bottom: 5px;
}
.progress-bar {
width: 100%;
background-color: #f3f3f3;
border-radius: 5px;
overflow: hidden;
margin-top: 10px;
}
.progress {
height: 20px;
width: """
+ f"{progress_percentage}%"
+ """;
background-color: #28a745;
text-align: center;
color: white;
line-height: 20px;
border-radius: 5px 0 0 5px;
}
.footer {
background-color: #f1f1f1;
text-align: center;
Expand All @@ -146,6 +160,7 @@ def training_week_update_to_html(
<div class="content">
<ul>
"""
)
# Add completed activities
for day, activity in completed_sessions.items():
html_content += f"""
Expand Down Expand Up @@ -175,7 +190,10 @@ def training_week_update_to_html(
</ul>
<div class="miles-summary" style="text-align: center;">
<div class="miles-info" style="margin: 0 auto;">
<span class="miles-label">Total Miles Planned: {total_miles}{space(8)}Miles Remaining: {miles_remaining}</span>
<span class="miles-label">Completed {miles_ran} out of {miles_target} miles</span>
<div class="progress-bar">
<div class="progress">{round(progress_percentage)}%</div>
</div>
</div>
</div>
</div>
Expand Down
4 changes: 2 additions & 2 deletions src/types/activity_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@


class ActivitySummary(BaseModel):
date_and_time: str
"""Datetime formatted as 'Monday, August 13, 2024 08:00 PM'"""
date: str
"""Datetime formatted as 'Monday, August 13, 2024'"""

distance_in_miles: float
elevation_gain_in_feet: float
Expand Down
38 changes: 23 additions & 15 deletions test.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -57,17 +57,13 @@
"metadata": {},
"outputs": [],
"source": [
"@freeze_time(\"2024-08-11 20:00:00\")\n",
"def get_activities_df_wrapper(strava_client):\n",
" return get_activities_df(strava_client)\n",
"\n",
"client_preferences = \"A) Training for a marathon B) This will be my second marathon C) Prefer workouts on Wednesdays and long runs on Saturdays\"\n",
"sysmsg_base = f\"{COACH_ROLE}\\nYour client has included the following preferences: {client_preferences}\\n\"\n",
"\n",
"# activities setup\n",
"athlete_id = os.environ[\"JAMIES_ATHLETE_ID\"]\n",
"strava_client = get_strava_client(athlete_id)\n",
"activities_df = get_activities_df_wrapper(strava_client)\n",
"activities_df = get_activities_df(strava_client)\n",
"\n",
"# gen training week pipeline\n",
"day_of_week_summaries = get_day_of_week_summaries(activities_df)\n",
Expand Down Expand Up @@ -101,18 +97,14 @@
"metadata": {},
"outputs": [],
"source": [
"@freeze_time(\"2024-08-13 20:00:00\")\n",
"def mock_get_activity_summaries(strava_client, num_weeks=8):\n",
" return get_activity_summaries(strava_client, num_weeks)\n",
"\n",
"client_preferences = \"A) Training for a marathon B) This will be my second marathon C) Prefer workouts on Wednesdays and long runs on Saturdays\"\n",
"sysmsg_base = f\"{COACH_ROLE}\\nYour client has included the following preferences: {client_preferences}\\n\"\n",
"\n",
"athlete_id = os.environ[\"JAMIES_ATHLETE_ID\"]\n",
"strava_client = get_strava_client(athlete_id)\n",
"\n",
"training_week_with_coaching = get_training_week_with_coaching(athlete_id)\n",
"current_weeks_activity_summaries = mock_get_activity_summaries(strava_client, num_weeks=1)\n",
"current_weeks_activity_summaries = get_activity_summaries(strava_client, num_weeks=1)\n",
"mid_week_analysis = MidWeekAnalysis(\n",
" activities=current_weeks_activity_summaries,\n",
" training_week=training_week_with_coaching.training_week,\n",
Expand All @@ -137,19 +129,35 @@
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# freeze time tests"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 120,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"athlete_id='98390356' token still valid until 2024-08-21 23:22:07+00:00\n",
"athlete_id='98390356' token still valid until 2024-08-21 23:22:07+00:00\n"
]
}
],
"source": [
"from src.lambda_function import lambda_handler\n",
"\n",
"@freeze_time(\"2024-08-18 20:00:00\")\n",
"def test_sunday_workflow():\n",
" lambda_handler({}, {})\n",
"\n",
"@freeze_time(\"2024-08-13 20:00:00\")\n",
"@freeze_time(\"2024-08-20 20:00:00\")\n",
"def test_mid_week_workflow():\n",
" lambda_handler({}, {})\n",
"\n",
Expand Down

0 comments on commit 5e28cfe

Please sign in to comment.