Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: stream csv downloads #428

Merged
merged 1 commit into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ Unreleased
----------

=========================
[6.0.0] - 2024-02-13
---------------------
* Add streaming csv support
* Add support to avoid call to LMS for filtering enrollments

[5.5.1] - 2024-01-10
---------------------
* Added retry mechanism for failed report deliveries.
Expand Down
2 changes: 1 addition & 1 deletion enterprise_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
Enterprise data api application. This Django app exposes API endpoints used by enterprises.
"""

__version__ = "5.5.1"
__version__ = "6.0.0"
2 changes: 1 addition & 1 deletion enterprise_data/api/v1/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Meta:
model = EnterpriseLearnerEnrollment
# Do not change the order of fields below. Ordering is important becuase `progress_v3`
# csv generated in `enterprise_reporting` should be same as csv generated on `admin-portal`
# Order and field names below should match with `EnterpriseLearnerEnrollmentViewSet.header`
# Order and field names below should match with `EnrollmentsCSVRenderer.header`
fields = (
'enrollment_id', 'enterprise_enrollment_id', 'is_consent_granted', 'paid_by',
'user_current_enrollment_mode', 'enrollment_date', 'unenrollment_date',
Expand Down
41 changes: 30 additions & 11 deletions enterprise_data/api/v1/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@
from rest_framework.status import HTTP_200_OK, HTTP_404_NOT_FOUND
from rest_framework.views import APIView

from django.conf import settings
from django.core.paginator import Paginator
from django.db.models import Count, Max, OuterRef, Prefetch, Q, Subquery, Value
from django.db.models.fields import IntegerField
from django.db.models.functions import Coalesce
from django.http import StreamingHttpResponse
from django.utils import timezone

from enterprise_data.api.v1 import serializers
Expand All @@ -34,6 +37,7 @@
EnterpriseOffer,
)
from enterprise_data.paginators import EnterpriseEnrollmentsPagination
from enterprise_data.renderers import EnrollmentsCSVRenderer
from enterprise_data.utils import get_cache_key

LOGGER = getLogger(__name__)
Expand Down Expand Up @@ -81,6 +85,7 @@ class EnterpriseLearnerEnrollmentViewSet(EnterpriseViewSetMixin, viewsets.ReadOn
ENROLLMENT_MODE_FILTER = 'user_current_enrollment_mode'
COUPON_CODE_FILTER = 'coupon_code'
OFFER_FILTER = 'offer_type'
# TODO: Remove after we release the streaming csv changes
# This will be used as CSV header for csv generated from `admin-portal`.
# Do not change the order of fields below. Ordering is important because csv generated
# on `admin-portal` should match `progress_v3` csv generated in `enterprise_reporting`
Expand All @@ -101,6 +106,7 @@ class EnterpriseLearnerEnrollmentViewSet(EnterpriseViewSetMixin, viewsets.ReadOn
'course_product_line', 'budget_id'
]

# TODO: Remove after we release the streaming csv changes
def get_renderer_context(self):
renderer_context = super().get_renderer_context()
renderer_context['header'] = self.header
Expand All @@ -124,22 +130,35 @@ def get_queryset(self):
if cached_response.is_found:
return cached_response.value
else:
enterprise = EnterpriseLearner.objects.filter(enterprise_customer_uuid=enterprise_customer_uuid).exists()

if not enterprise:
LOGGER.warning(
"[Data Overview Failure] Wrong Enterprise UUID. UUID [%s], Endpoint ['%s'], User: [%s]",
enterprise_customer_uuid,
self.request.get_full_path(),
self.request.user.username,
)

enrollments = EnterpriseLearnerEnrollment.objects.filter(enterprise_customer_uuid=enterprise_customer_uuid)

enrollments = self.apply_filters(enrollments)
TieredCache.set_all_tiers(cache_key, enrollments, DEFAULT_LEARNER_CACHE_TIMEOUT)
return enrollments

def list(self, request, *args, **kwargs):
"""
Override the list method to handle streaming CSV download.
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One idea for rollout: introduce a feature flag, where if the flag is off, this method can probably just return super().list(...). And if it's on, it can do all the new stuff you've introduced.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, here's an example from SO that might give you an idea for how to structure this code a little differently: https://stackoverflow.com/a/65564367 It might help you simplify this a little bit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you say if enable/disable old/new functionality based on a query param passed from admin-portal?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, sure, that's a good idea too.

if self.request.query_params.get('streaming_csv_enabled') == 'true':
if request.accepted_renderer.format == 'csv':
return StreamingHttpResponse(
EnrollmentsCSVRenderer().render(self._stream_serialized_data()),
content_type="text/csv",
headers={"Content-Disposition": 'attachment; filename="learner_progress_report.csv"'},
)

return super().list(request, *args, **kwargs)

def _stream_serialized_data(self):
"""
Stream the serialized data.
"""
queryset = self.filter_queryset(self.get_queryset())
serializer = self.get_serializer_class()
paginator = Paginator(queryset, per_page=settings.ENROLLMENTS_PAGE_SIZE)
for page_number in paginator.page_range:
yield from serializer(paginator.page(page_number).object_list, many=True).data

def apply_filters(self, queryset):
"""
Filters enrollments based on query params.
Expand Down
17 changes: 14 additions & 3 deletions enterprise_data/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,25 @@ class AuditEnrollmentsFilterBackend(filters.BaseFilterBackend, FiltersMixin):
`user_current_enrollment_mode` field.
"""

def filter_queryset(self, request, queryset, view):
def exclude_audit_enrollments(self, view):
"""
Filter out queryset for results where enrollment mode is `audit`.
Determine if audit enrollments should be excluded.
"""
# this will be passed from admin-portal to avoid api call to lms
audit_enrollments = view.request.query_params.get('audit_enrollments')
if audit_enrollments:
return audit_enrollments == 'false'

enterprise_uuid = view.kwargs['enterprise_id']
enterprise_customer = self.get_enterprise_customer(enterprise_uuid)
return enterprise_customer.get('enable_audit_data_reporting') is False

if not enterprise_customer.get('enable_audit_data_reporting'):
def filter_queryset(self, request, queryset, view):
"""
Filter out queryset for results where enrollment mode is `audit`.
"""
if self.exclude_audit_enrollments(view):
enterprise_uuid = view.kwargs['enterprise_id']
LOGGER.info(f'[AuditEnrollmentsFilterBackend] excluding audit enrollments for: {enterprise_uuid}')
# Filter out enrollments that have audit mode and do not have a coupon code or an offer.
filter_query = {
Expand Down
25 changes: 25 additions & 0 deletions enterprise_data/migrations/0039_auto_20240212_1403.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 3.2.23 on 2024-02-12 14:03

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('enterprise_data', '0038_enterpriseoffer_export_timestamp'),
]

operations = [
migrations.AddIndex(
model_name='enterpriselearnerenrollment',
index=models.Index(fields=['enterprise_customer_uuid', 'enterprise_user_id', 'user_current_enrollment_mode'], name='enterprise__enterpr_6b0be8_idx'),
),
migrations.AddIndex(
model_name='enterpriselearnerenrollment',
index=models.Index(fields=['enterprise_customer_uuid', 'offer_id', 'budget_id'], name='enterprise__enterpr_66e37f_idx'),
),
migrations.AddIndex(
model_name='enterpriselearnerenrollment',
index=models.Index(fields=['enterprise_customer_uuid', 'user_current_enrollment_mode', 'coupon_code', 'offer_type'], name='enterprise__enterpr_1e8e98_idx'),
),
]
7 changes: 7 additions & 0 deletions enterprise_data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ class Meta:
db_table = 'enterprise_learner_enrollment'
verbose_name = _("Enterprise Learner Enrollment")
verbose_name_plural = _("Enterprise Learner Enrollments")
indexes = [
models.Index(fields=['enterprise_customer_uuid', 'enterprise_user_id', 'user_current_enrollment_mode']),
models.Index(fields=['enterprise_customer_uuid', 'offer_id', 'budget_id']),
models.Index(fields=[
'enterprise_customer_uuid', 'user_current_enrollment_mode', 'coupon_code', 'offer_type'
]),
]

enterprise_enrollment_id = models.PositiveIntegerField(primary_key=True)
enrollment_id = models.PositiveIntegerField(null=True)
Expand Down
31 changes: 31 additions & 0 deletions enterprise_data/renderers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
Renderers for enterprise data views.
"""

from rest_framework_csv.renderers import CSVStreamingRenderer


class EnrollmentsCSVRenderer(CSVStreamingRenderer):
"""
Custom streaming csv renderer for EnterpriseLearnerEnrollment data.
"""

# This will be used as CSV header for csv generated from `admin-portal`.
# Do not change the order of fields below. Ordering is important because csv generated
# on `admin-portal` should match `progress_v3` csv generated in `enterprise_reporting`
# Order and field names below should match with `EnterpriseLearnerEnrollmentSerializer.fields`
header = [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could probably do something like

header = [field.name for field in EnterpriseLearnerEnrollment._meta.get_fields()]

'enrollment_id', 'enterprise_enrollment_id', 'is_consent_granted', 'paid_by',
'user_current_enrollment_mode', 'enrollment_date', 'unenrollment_date',
'unenrollment_end_within_date', 'is_refunded', 'seat_delivery_method',
'offer_id', 'offer_name', 'offer_type', 'coupon_code', 'coupon_name', 'contract_id',
'course_list_price', 'amount_learner_paid', 'course_key', 'courserun_key',
'course_title', 'course_pacing_type', 'course_start_date', 'course_end_date',
'course_duration_weeks', 'course_max_effort', 'course_min_effort',
'course_primary_program', 'primary_program_type', 'course_primary_subject', 'has_passed',
'last_activity_date', 'progress_status', 'passed_date', 'current_grade',
'letter_grade', 'enterprise_user_id', 'user_email', 'user_account_creation_date',
'user_country_code', 'user_username', 'enterprise_name', 'enterprise_customer_uuid',
'enterprise_sso_uid', 'created', 'course_api_url', 'total_learning_time_hours', 'is_subsidy',
'course_product_line', 'budget_id'
]
1 change: 1 addition & 0 deletions enterprise_data/settings/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def root(*args):
SITE_NAME = 'analytics-data-api'

ENTERPRISE_REPORTING_DB_ALIAS = 'default'
ENROLLMENTS_PAGE_SIZE = 10000

# Required for use with edx-drf-extensions JWT functionality:
# USER_SETTINGS overrides for djangorestframework-jwt APISettings class
Expand Down
1 change: 1 addition & 0 deletions requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ edx-drf-extensions
edx-opaque-keys
Django
django-fernet-fields-v2
djangorestframework-csv
django-filter
django-model-utils
edx-rbac
Expand Down
21 changes: 12 additions & 9 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,22 @@ asgiref==3.7.2
# via django
asn1crypto==1.5.1
# via snowflake-connector-python
awscli==1.32.24
awscli==1.32.35
# via -r requirements/reporting.in
bcrypt==4.1.2
# via paramiko
billiard==3.6.4.0
# via celery
boto3==1.34.24
boto3==1.34.35
# via -r requirements/reporting.in
botocore==1.34.24
botocore==1.34.35
# via
# awscli
# boto3
# s3transfer
celery==4.4.7
# via -r requirements/reporting.in
certifi==2023.11.17
certifi==2024.2.2
# via
# py2neo
# requests
Expand Down Expand Up @@ -84,8 +84,11 @@ django-waffle==4.1.0
# edx-drf-extensions
djangorestframework==3.14.0
# via
# djangorestframework-csv
# drf-jwt
# edx-drf-extensions
djangorestframework-csv==3.0.2
# via -r requirements/base.in
docutils==0.16
# via awscli
drf-jwt==1.19.2
Expand All @@ -95,7 +98,7 @@ edx-django-utils==5.10.1
# -r requirements/base.in
# edx-drf-extensions
# edx-rest-api-client
edx-drf-extensions==9.1.2
edx-drf-extensions==10.2.0
# via
# -r requirements/base.in
# edx-rbac
Expand All @@ -109,7 +112,7 @@ edx-rest-api-client==5.6.1
# via -r requirements/base.in
factory-boy==3.3.0
# via -r requirements/base.in
faker==22.5.0
faker==22.7.0
# via factory-boy
filelock==3.13.1
# via snowflake-connector-python
Expand All @@ -127,7 +130,7 @@ kombu==4.6.11
# via celery
monotonic==1.6
# via py2neo
newrelic==9.5.0
newrelic==9.6.0
# via edx-django-utils
packaging==23.2
# via
Expand Down Expand Up @@ -176,7 +179,7 @@ python-dateutil==2.8.2
# botocore
# faker
# vertica-python
pytz==2023.3.post1
pytz==2024.1
# via
# celery
# django
Expand Down Expand Up @@ -212,7 +215,7 @@ six==1.16.0
# vertica-python
slumber==0.7.1
# via edx-rest-api-client
snowflake-connector-python==3.6.0
snowflake-connector-python==3.7.0
# via -r requirements/reporting.in
sortedcontainers==2.4.0
# via snowflake-connector-python
Expand Down
6 changes: 3 additions & 3 deletions requirements/ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# make upgrade
#
coverage==7.4.0
coverage==7.4.1
# via -r requirements/ci.in
distlib==0.3.8
# via virtualenv
Expand All @@ -14,9 +14,9 @@ filelock==3.13.1
# virtualenv
packaging==23.2
# via tox
platformdirs==4.1.0
platformdirs==4.2.0
# via virtualenv
pluggy==1.3.0
pluggy==1.4.0
# via tox
py==1.11.0
# via tox
Expand Down
Loading
Loading