Skip to content

Commit

Permalink
Merge pull request #476 from openedx/eahmadjaved/ENT-9232
Browse files Browse the repository at this point in the history
feat: add endpoints to get completion data for an enterprise customer
  • Loading branch information
jajjibhai008 authored Aug 13, 2024
2 parents db59afd + a09356a commit 20aa76d
Show file tree
Hide file tree
Showing 11 changed files with 697 additions and 10 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ Unreleased

=========================

[8.7.0] - 2024-08-13
---------------------
* feat: add endpoints to get completion data for an enterprise customer

[8.6.1] - 2024-08-12
---------------------
* Dependency updates
Expand Down
2 changes: 1 addition & 1 deletion enterprise_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Enterprise data api application. This Django app exposes API endpoints used by enterprises.
"""

__version__ = "8.6.1"
__version__ = "8.7.0"
261 changes: 261 additions & 0 deletions enterprise_data/admin_analytics/completions_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
"""This module contains utility functions for completions analytics."""
from enterprise_data.utils import date_filter


def date_aggregation(level, group, date, df, type_="count"):
"""Perform date aggregation on a DataFrame.
This function aggregates data based on the specified level of aggregation (e.g., daily, weekly, monthly, quarterly)
and returns the aggregated data.
Args:
level (str): The level of aggregation. Possible values are "Daily", "Weekly", "Monthly", and "Quarterly".
group (list): A list of column names to group the data by.
date (str): The name of the date column in the DataFrame.
df (pandas.DataFrame): The DataFrame containing the data to be aggregated.
type_ (str, optional): The type of aggregation to perform. Possible values
are "count" and "sum". Defaults to "count".
Returns:
pandas.DataFrame: The aggregated data.
"""
if type_ == "count":
if level == "Daily":
df = df.groupby(group).size().reset_index()
group.append("count")
df.columns = group
elif level == "Weekly":
df[date] = df[date].dt.to_period("W").dt.start_time
df = df.groupby(group).size().reset_index()
group.append("count")
df.columns = group
elif level == "Monthly":
df[date] = df[date].dt.to_period("M").dt.start_time
df = df.groupby(group).size().reset_index()
group.append("count")
df.columns = group
elif level == "Quarterly":
df[date] = df[date].dt.to_period("Q").dt.start_time
df = df.groupby(group).size().reset_index()
group.append("count")
df.columns = group
elif type_ == "sum":
if level == "Daily":
df = df.groupby(group).sum().reset_index()
group.append("sum")
df.columns = group
elif level == "Weekly":
df[date] = df[date].dt.to_period("W").dt.start_time
df = df.groupby(group).sum().reset_index()
group.append("sum")
df.columns = group
elif level == "Monthly":
df[date] = df[date].dt.to_period("M").dt.start_time
df = df.groupby(group).sum().reset_index()
group.append("sum")
df.columns = group
elif level == "Quarterly":
df[date] = df[date].dt.to_period("Q").dt.start_time
df = df.groupby(group).sum().reset_index()
group.append("sum")
df.columns = group

return df


def calculation(calc, df, type_="count"):
"""Perform a calculation on the given DataFrame based on the specified calculation type.
Args:
calc (str): The calculation type. Possible values are "Total", "Running Total",
"Moving Average (3 Period)", and "Moving Average (7 Period)".
df (pandas.DataFrame): The filtered enrollments data.
type_ (str, optional): The type of calculation to perform. Default is "count".
Returns:
pandas.DataFrame: The aggregated data after performing the calculation.
"""
if type_ == "count":
if calc == "Total":
pass
elif calc == "Running Total":
df["count"] = df.groupby("enroll_type")["count"].cumsum()
elif calc == "Moving Average (3 Period)":
df["count"] = (
df.groupby("enroll_type")["count"]
.rolling(3)
.mean()
.droplevel(level=[0])
)
elif calc == "Moving Average (7 Period)":
df["count"] = (
df.groupby("enroll_type")["count"]
.rolling(7)
.mean()
.droplevel(level=[0])
)
elif type_ == "sum":
if calc == "Total":
pass
elif calc == "Running Total":
df["sum"] = df.groupby("enroll_type")["sum"].cumsum()
elif calc == "Moving Average (3 Period)":
df["sum"] = (
df.groupby("enroll_type")["sum"].rolling(3).mean().droplevel(level=[0])
)
elif calc == "Moving Average (7 Period)":
df["sum"] = (
df.groupby("enroll_type")["sum"].rolling(7).mean().droplevel(level=[0])
)

return df


def get_completions_over_time(start_date, end_date, dff, date_agg, calc):
"""Get agreggated data for completions over time graph.
Args:
start_date (datetime): The start date for the date filter.
end_date (datetime): The end date for the date filter.
dff (pandas.DataFrame): enrollments data
date_agg (str): It denotes the granularity of the aggregated date which can be Daily, Weekly, Monthly, Quarterly
calc (str): Calculations denoiated the period for the running averages. It can be Total, Running Total, Moving
Average (3 Period), Moving Average (7 Period)
"""

dff = dff[dff["has_passed"] == 1]

# Date filtering.
dff = date_filter(start=start_date, end=end_date, data_frame=dff, date_column="passed_date")

# Date aggregation.
dff = date_aggregation(
level=date_agg, group=["passed_date", "enroll_type"], date="passed_date", df=dff
)

# Calculating metric.
dff = calculation(calc=calc, df=dff)

return dff


def get_top_courses_by_completions(start_date, end_date, dff):
"""Get top 10 courses by completions.
Args:
start_date (datetime): The start date for the date filter.
end_date (datetime): The end date for the date filter.
dff (pandas.DataFrame): Enrollments data
"""

dff = dff[dff["has_passed"] == 1]

# Date filtering.
dff = date_filter(start=start_date, end=end_date, data_frame=dff, date_column="passed_date")

courses = list(
dff.groupby(["course_key"]).size().sort_values(ascending=False)[:10].index
)

dff = (
dff[dff.course_key.isin(courses)]
.groupby(["course_key", "course_title", "enroll_type"])
.size()
.reset_index()
)
dff.columns = ["course_key", "course_title", "enroll_type", "count"]

return dff


def get_top_subjects_by_completions(start_date, end_date, dff):
"""Get top 10 subjects by completions.
Args:
start_date (datetime): The start date for the date filter.
end_date (datetime): The end date for the date filter.
dff (pandas.DataFrame): Enrollments data
"""

dff = dff[dff["has_passed"] == 1]

# Date filtering.
dff = date_filter(start=start_date, end=end_date, data_frame=dff, date_column="passed_date")

subjects = list(
dff.groupby(["course_subject"]).size().sort_values(ascending=False)[:10].index
)

dff = (
dff[dff.course_subject.isin(subjects)]
.groupby(["course_subject", "enroll_type"])
.size()
.reset_index()
)
dff.columns = ["course_subject", "enroll_type", "count"]

return dff


def get_csv_data_for_completions_over_time(
start_date, end_date, enrollments, date_agg, calc
):
"""Get csv data for completions over time graph.
Args:
start_date (datetime): The start date for the date filter.
end_date (datetime): The end date for the date filter.
enrollments (pandas.DataFrame): Filtered enrollments data
date_agg (str): it denotes the granularity of the aggregated date which can be Daily, Weekly, Monthly, Quarterly
calc (str): calculations denoiated the period for the running averages. It can be Total, Running Total, Moving
Average (3 Period), Moving Average (7 Period)
Returns:
dict: csv data
"""

dff = get_completions_over_time(start_date, end_date, enrollments, date_agg, calc)
dff = dff.pivot(index="passed_date", columns="enroll_type", values="count")
filename = (
f"Completions Timeseries, {start_date} - {end_date} ({date_agg} {calc}).csv"
)
return {"filename": filename, "data": dff}


def get_csv_data_for_top_courses_by_completions(start_date, end_date, enrollments):
"""Get csv data for top 10 courses by completions.
Args:
start_date (datetime): The start date for the date filter.
end_date (datetime): The end date for the date filter.
enrollments (pandas.DataFrame): Filtered enrollments data
Returns:
dict: csv data
"""

dff = get_top_courses_by_completions(start_date, end_date, enrollments)
dff = dff.pivot(
index=["course_key", "course_title"], columns="enroll_type", values="count"
)
filename = f"Top 10 Courses by Completions, {start_date} - {end_date}.csv"
return {"filename": filename, "data": dff}


def get_csv_data_for_top_subjects_by_completions(start_date, end_date, enrollments):
"""Get csv data for top 10 subjects by completions.
Args:
start_date (datetime): The start date for the date filter.
end_date (datetime): The end date for the date filter.
enrollments (pandas.DataFrame): Filtered enrollments data
Returns:
dict: csv data
"""

dff = get_top_subjects_by_completions(start_date, end_date, enrollments)
dff = dff.pivot(index="course_subject", columns="enroll_type", values="count")
filename = f"Top 10 Subjects by Completions, {start_date} - {end_date}.csv"
return {"filename": filename, "data": dff}
5 changes: 4 additions & 1 deletion enterprise_data/admin_analytics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ class ChartType(Enum):
BUBBLE = 'bubble'
TOP_SKILLS_ENROLLMENT = 'top_skills_enrollment'
TOP_SKILLS_COMPLETION = 'top_skills_completion'
COMPLETIONS_OVER_TIME = 'completions_over_time'
TOP_COURSES_BY_COMPLETIONS = 'top_courses_by_completions'
TOP_SUBJECTS_BY_COMPLETIONS = 'top_subjects_by_completions'


def granularity_aggregation(level, group, date, data_frame, aggregation_type="count"):
Expand Down Expand Up @@ -172,7 +175,7 @@ def get_skills_bubble_chart_df(skills_filtered):
""" Get the skills data for the bubble chart.
Args:
skills_filtered (list): The skills data.
skills_filtered (pandas.DataFrame): The skills data.
Returns:
(pandas.DataFrame): The skills data for the bubble chart.
Expand Down
5 changes: 5 additions & 0 deletions enterprise_data/api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,11 @@ class AdminAnalyticsAggregatesQueryParamsSerializer(serializers.Serializer): #
"""
start_date = serializers.DateField(required=False)
end_date = serializers.DateField(required=False)
granularity = serializers.CharField(required=False)
calculation = serializers.CharField(required=False)
response_type = serializers.CharField(required=False)
page = serializers.IntegerField(required=False)
chart_type = serializers.CharField(required=False)

def validate(self, attrs):
"""
Expand Down
11 changes: 11 additions & 0 deletions enterprise_data/api/v1/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.urls import re_path

from enterprise_data.api.v1.views import enterprise_admin as enterprise_admin_views
from enterprise_data.api.v1.views import enterprise_completions as enterprise_completions_views
from enterprise_data.api.v1.views import enterprise_learner as enterprise_learner_views
from enterprise_data.api.v1.views import enterprise_offers as enterprise_offers_views
from enterprise_data.api.v1.views.analytics_enrollments import (
Expand Down Expand Up @@ -71,6 +72,16 @@
enterprise_admin_views.EnterpriseAdminAnalyticsSkillsView.as_view(),
name='enterprise-admin-analytics-skills'
),
re_path(
fr'^admin/anlaytics/(?P<enterprise_id>{UUID4_REGEX})/completions/stats$',
enterprise_completions_views.EnterrpiseAdminCompletionsStatsView.as_view(),
name='enterprise-admin-analytics-completions-stats'
),
re_path(
fr'^admin/anlaytics/(?P<enterprise_id>{UUID4_REGEX})/completions$',
enterprise_completions_views.EnterrpiseAdminCompletionsView.as_view(),
name='enterprise-admin-analytics-completions'
),
]

urlpatterns += router.urls
14 changes: 9 additions & 5 deletions enterprise_data/api/v1/views/enterprise_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,17 +189,21 @@ def get(self, request, enterprise_id):
data=request.GET
)
serializer.is_valid(raise_exception=True)

start_date = serializer.data.get("start_date")
end_date = serializer.data.get("end_date", datetime.now())

last_updated_at = fetch_max_enrollment_datetime()
cache_expiry = (
last_updated_at + timedelta(days=1) if last_updated_at else datetime.now()
)

enrollment = fetch_and_cache_enrollments_data(
enterprise_id, cache_expiry
).copy()

start_date = serializer.data.get('start_date', enrollment.enterprise_enrollment_date.min())
end_date = serializer.data.get('end_date', datetime.now())

skills = fetch_and_cache_skills_data(enterprise_id, cache_expiry).copy()

if request.GET.get("format") == "csv":
if serializer.data.get('response_type') == 'csv':
csv_data = get_top_skills_csv_data(skills, start_date, end_date)
response = HttpResponse(content_type='text/csv')
filename = f"Skills by Enrollment and Completion, {start_date} - {end_date}.csv"
Expand Down
Loading

0 comments on commit 20aa76d

Please sign in to comment.