diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5b8e4d07..61343a7e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,6 +16,10 @@ Unreleased ========================= +[9.3.0] - 2024-09-30 +--------------------- + * refactor: Further improvement in SQL queries for leaderboard API endpoint. + [9.2.2] - 2024-09-27 --------------------- * fix: remove the cache logging on EnterpriseLearnerEnrollmentViewSet. diff --git a/enterprise_data/__init__.py b/enterprise_data/__init__.py index 9d5b9e87..4d6cdd3e 100644 --- a/enterprise_data/__init__.py +++ b/enterprise_data/__init__.py @@ -2,4 +2,4 @@ Enterprise data api application. This Django app exposes API endpoints used by enterprises. """ -__version__ = "9.2.2" +__version__ = "9.3.0" diff --git a/enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py b/enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py index 1a1158aa..1e61b804 100644 --- a/enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py +++ b/enterprise_data/admin_analytics/database/queries/fact_engagement_admin_dash.py @@ -111,59 +111,62 @@ def get_engagement_time_series_data_query(): """ @staticmethod - def get_all_leaderboard_data_query(): + def get_engagement_data_for_leaderboard_query(): """ - Get the query to fetch the leaderboard data. + Get the query to fetch the engagement data for leaderboard. - Query should fetch the leaderboard data for the enterprise customer to show in the data table. + Query should fetch the engagement data for like learning time, session length of + the enterprise learners to show in the leaderboard. Returns: - (str): Query to fetch the leaderboard data. + (str): Query to fetch the engagement data for leaderboard. """ return """ - WITH Engagement AS ( - SELECT - email, - ROUND(SUM(learning_time_seconds) / 3600, 1) as learning_time_hours, - SUM(is_engaged) as session_count, - CASE - WHEN SUM(is_engaged) = 0 THEN 0.0 - ELSE ROUND(SUM(learning_time_seconds) / 3600 / SUM(is_engaged), 1) - END AS average_session_length - FROM fact_enrollment_engagement_day_admin_dash - WHERE enterprise_customer_uuid=%(enterprise_customer_uuid)s AND - (activity_date BETWEEN %(start_date)s AND %(end_date)s) AND - is_engaged = 1 - GROUP BY email - ), - Completions AS ( - SELECT email, count(course_key) as course_completion_count - FROM fact_enrollment_admin_dash - WHERE enterprise_customer_uuid=%(enterprise_customer_uuid)s AND - (passed_date BETWEEN %(start_date)s AND %(end_date)s) AND - has_passed = 1 - GROUP BY email - ) SELECT - Engagement.email, - Engagement.learning_time_hours, - Engagement.session_count, - Engagement.average_session_length, - Completions.course_completion_count - FROM Engagement - LEFT JOIN Completions - ON Engagement.email = Completions.email - ORDER BY - Engagement.learning_time_hours DESC, - Engagement.session_count DESC, - Completions.course_completion_count DESC + email, + ROUND(SUM(learning_time_seconds) / 3600, 1) as learning_time_hours, + SUM(is_engaged) as session_count, + CASE + WHEN SUM(is_engaged) = 0 THEN 0.0 + ELSE ROUND(SUM(learning_time_seconds) / 3600 / SUM(is_engaged), 1) + END AS average_session_length + FROM fact_enrollment_engagement_day_admin_dash + WHERE enterprise_customer_uuid=%(enterprise_customer_uuid)s AND + (activity_date BETWEEN %(start_date)s AND %(end_date)s) AND + is_engaged = 1 + GROUP BY email + ORDER BY learning_time_hours DESC LIMIT %(limit)s OFFSET %(offset)s; """ + @staticmethod + def get_completion_data_for_leaderboard_query(email_list: list): + """ + Get the query to fetch the completions data for leaderboard. + + Query should fetch the completion data for like course completion count of + the enterprise learners to show in the leaderboard. + + Arguments: + email_list (str): List of emails to filter the completions data. + + Returns: + (list): Query to fetch the completions data for leaderboard. + """ + return f""" + SELECT email, count(course_key) as course_completion_count + FROM fact_enrollment_admin_dash + WHERE enterprise_customer_uuid=%(enterprise_customer_uuid)s AND + (passed_date BETWEEN %(start_date)s AND %(end_date)s) AND + has_passed = 1 AND + email IN {str(tuple(email_list))} + GROUP BY email; + """ + @staticmethod def get_leaderboard_data_count_query(): """ - Get the query to fetch the leaderboard row count. + Get the query to fetch the leaderboard row count and null email counter. Query should fetch the count of rows for the leaderboard data for the enterprise customer. @@ -171,26 +174,13 @@ def get_leaderboard_data_count_query(): (str): Query to fetch the leaderboard row count. """ return """ - WITH Engagement AS ( - SELECT - email - FROM fact_enrollment_engagement_day_admin_dash - WHERE enterprise_customer_uuid=%(enterprise_customer_uuid)s AND - (activity_date BETWEEN %(start_date)s AND %(end_date)s) AND - is_engaged = 1 - GROUP BY email - ), - Completions AS ( - SELECT email, count(course_key) as course_completion_count - FROM fact_enrollment_admin_dash - WHERE enterprise_customer_uuid=%(enterprise_customer_uuid)s AND - (passed_date BETWEEN %(start_date)s AND %(end_date)s) AND - has_passed = 1 - GROUP BY email - ) SELECT - count(*) - FROM Engagement - LEFT JOIN Completions - ON Engagement.email = Completions.email + COUNT(*) OVER () AS record_count, + sum(case when email is null then 1 else 0 end) null_count + FROM fact_enrollment_engagement_day_admin_dash + WHERE enterprise_customer_uuid=%(enterprise_customer_uuid)s AND + (activity_date BETWEEN %(start_date)s AND %(end_date)s) AND + is_engaged = 1 + GROUP BY email + LIMIT 1; """ diff --git a/enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py b/enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py index 5bc2f718..bd33aca3 100644 --- a/enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py +++ b/enterprise_data/admin_analytics/database/tables/fact_engagement_admin_dash.py @@ -153,11 +153,14 @@ def get_engagement_time_series_data(self, enterprise_customer_uuid: UUID, start_ as_dict=True, ) - def get_all_leaderboard_data( + def _get_engagement_data_for_leaderboard( self, enterprise_customer_uuid: UUID, start_date: date, end_date: date, limit: int, offset: int ): """ - Get the leaderboard data for the given enterprise customer. + Get the engagement data for leaderboard. + + The engagement data would include fields like learning time, session length of + the enterprise learners to show in the leaderboard. Arguments: enterprise_customer_uuid (UUID): The UUID of the enterprise customer. @@ -167,10 +170,10 @@ def get_all_leaderboard_data( offset (int): The number of records to skip. Returns: - list[dict]: The leaderboard data. + list[dict]: The engagement data for leaderboard. """ return run_query( - query=self.queries.get_all_leaderboard_data_query(), + query=self.queries.get_engagement_data_for_leaderboard_query(), params={ 'enterprise_customer_uuid': enterprise_customer_uuid, 'start_date': start_date, @@ -181,6 +184,70 @@ def get_all_leaderboard_data( as_dict=True, ) + def _get_completion_data_for_leaderboard_query( + self, enterprise_customer_uuid: UUID, start_date: date, end_date: date, email_list: list + ): + """ + Get the completion data for leaderboard. + + The completion data would include fields like course completion count of enterprise learners of the + given enterprise customer. + + Arguments: + enterprise_customer_uuid (UUID): The UUID of the enterprise customer. + start_date (date): The start date. + end_date (date): The end date. + email_list (list): List of emails of the enterprise learners. + + Returns: + list[dict]: The engagement data for leaderboard. + """ + return run_query( + query=self.queries.get_completion_data_for_leaderboard_query(email_list), + params={ + 'enterprise_customer_uuid': enterprise_customer_uuid, + 'start_date': start_date, + 'end_date': end_date, + }, + as_dict=True, + ) + + def get_all_leaderboard_data( + self, enterprise_customer_uuid: UUID, start_date: date, end_date: date, limit: int, offset: int + ): + """ + Get the leaderboard data for the given enterprise customer. + + Arguments: + enterprise_customer_uuid (UUID): The UUID of the enterprise customer. + start_date (date): The start date. + end_date (date): The end date. + limit (int): The maximum number of records to return. + offset (int): The number of records to skip. + + Returns: + list[dict]: The leaderboard data. + """ + engagement_data = self._get_engagement_data_for_leaderboard( + enterprise_customer_uuid=enterprise_customer_uuid, + start_date=start_date, + end_date=end_date, + limit=limit, + offset=offset, + ) + engagement_data_dict = {engagement['email']: engagement for engagement in engagement_data} + completion_data = self._get_completion_data_for_leaderboard_query( + enterprise_customer_uuid=enterprise_customer_uuid, + start_date=start_date, + end_date=end_date, + email_list=list(engagement_data_dict.keys()), + ) + for completion in completion_data: + email = completion['email'] + engagement_data_dict[email]['course_completion_count'] = completion['course_completion_count'] + + return list(engagement_data_dict.values()) + def get_leaderboard_data_count(self, enterprise_customer_uuid: UUID, start_date: date, end_date: date): """ Get the total number of leaderboard records for the given enterprise customer.