Skip to content

Commit

Permalink
feat: indexing bayesian average ranking for courses in algolia
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-sheehan-edx committed Jan 25, 2024
1 parent 25cb40c commit 2f42a38
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def enterprise_catalog(self):

def get_permission_object(self):
"""
Retrieves the apporpriate object to use during edx-rbac's permission checks.
Retrieves the appropriate object to use during edx-rbac's permission checks.
This object is passed to the rule predicate(s).
"""
Expand Down
1 change: 1 addition & 0 deletions enterprise_catalog/apps/api_client/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
DISCOVERY_COURSE_REVIEWS_ENDPOINT = urljoin(settings.DISCOVERY_SERVICE_API_URL, 'course_review/')
DISCOVERY_OFFSET_SIZE = 200
DISCOVERY_CATALOG_QUERY_CACHE_KEY_TPL = 'catalog_query:{id}'
DISCOVERY_AVERAGE_COURSE_REVIEW_CACHE_KEY = 'average_course_review'

# Enterprise API Client Constants
ENTERPRISE_API_URL = urljoin(settings.LMS_BASE_URL, '/enterprise/api/v1/')
Expand Down
13 changes: 12 additions & 1 deletion enterprise_catalog/apps/api_client/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import requests
from celery.exceptions import SoftTimeLimitExceeded
from django.conf import settings
from django.core.cache import cache

from .base_oauth import BaseOAuthClient
from .constants import (
DISCOVERY_AVERAGE_COURSE_REVIEW_CACHE_KEY,
DISCOVERY_COURSE_REVIEWS_ENDPOINT,
DISCOVERY_COURSES_ENDPOINT,
DISCOVERY_OFFSET_SIZE,
Expand Down Expand Up @@ -122,7 +124,7 @@ def _retrieve_course_reviews(self, request_params):
break
return response.json()

def get_course_reviews(self, course_keys=None):
def get_course_reviews(self, course_keys=None, set_average_course_rating=False):
"""
Return results from the discovery service's /course_review endpoint as an object of key = course key, value =
course review. If course_keys is specified, only return results for those course keys.
Expand All @@ -149,6 +151,15 @@ def get_course_reviews(self, course_keys=None):
)
}

if set_average_course_rating:
rolling_rating_sum = 0.0

Check warning on line 155 in enterprise_catalog/apps/api_client/discovery.py

View check run for this annotation

Codecov / codecov/patch

enterprise_catalog/apps/api_client/discovery.py#L155

Added line #L155 was not covered by tests
for value in results.values():
rolling_rating_sum += float(value.get('avg_course_rating'))

Check warning on line 157 in enterprise_catalog/apps/api_client/discovery.py

View check run for this annotation

Codecov / codecov/patch

enterprise_catalog/apps/api_client/discovery.py#L157

Added line #L157 was not covered by tests

total_average_course_rating = rolling_rating_sum / len(results)
cache_key = DISCOVERY_AVERAGE_COURSE_REVIEW_CACHE_KEY
cache.set(cache_key, total_average_course_rating, settings.DISCOVERY_AVERAGE_COURSE_REVIEW_CACHE_TIMEOUT)

Check warning on line 161 in enterprise_catalog/apps/api_client/discovery.py

View check run for this annotation

Codecov / codecov/patch

enterprise_catalog/apps/api_client/discovery.py#L159-L161

Added lines #L159 - L161 were not covered by tests

return results

def get_metadata_by_query(self, catalog_query):
Expand Down
22 changes: 22 additions & 0 deletions enterprise_catalog/apps/catalog/algolia_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@
import time

from dateutil import parser
from django.core.cache import cache
from django.utils.translation import gettext as _

from enterprise_catalog.apps.api.v1.utils import is_course_run_active
from enterprise_catalog.apps.api_client.algolia import AlgoliaSearchClient
from enterprise_catalog.apps.api_client.constants import (
DISCOVERY_AVERAGE_COURSE_REVIEW_CACHE_KEY,
)
from enterprise_catalog.apps.catalog.constants import (
COURSE,
EXEC_ED_2U_COURSE_TYPE,
Expand Down Expand Up @@ -85,6 +89,7 @@
'normalized_metadata',
'reviews_count',
'avg_course_rating',
'course_bayesian_average',
]

# default configuration for the index
Expand Down Expand Up @@ -137,6 +142,7 @@
'customRanking': [
'asc(visible_via_association)',
'asc(created)',
'desc(course_bayesian_average)',
'desc(recent_enrollment_count)',
],
}
Expand Down Expand Up @@ -331,6 +337,21 @@ def get_algolia_object_id(content_type, uuid):
return None


def get_course_bayesian_average(course):
"""
https://www.algolia.com/doc/guides/managing-results/must-do/custom-ranking/how-to/bayesian-average/
"""
cache_key = DISCOVERY_AVERAGE_COURSE_REVIEW_CACHE_KEY
total_average_rating = cache.get(cache_key)
avg_review = course.get('avg_course_rating')
ratings_count = course.get('reviews_count')
if avg_review is not None and ratings_count is not None:
bayes_avg = ((avg_review * ratings_count) + (total_average_rating * 15)) / (ratings_count + 15)

Check warning on line 349 in enterprise_catalog/apps/catalog/algolia_utils.py

View check run for this annotation

Codecov / codecov/patch

enterprise_catalog/apps/catalog/algolia_utils.py#L349

Added line #L349 was not covered by tests
else:
bayes_avg = 0
return bayes_avg


def get_course_language(course):
"""
Gets the human-readable language name associated with the advertised course run. Used for
Expand Down Expand Up @@ -1204,6 +1225,7 @@ def _algolia_object_from_product(product, algolia_fields):
'learning_type_v2': get_learning_type_v2(searchable_product),
'reviews_count': get_reviews_count(searchable_product),
'avg_course_rating': get_avg_course_rating(searchable_product),
'course_bayesian_average': get_course_bayesian_average(searchable_product),
})
elif searchable_product.get('content_type') == PROGRAM:
searchable_product.update({
Expand Down
2 changes: 2 additions & 0 deletions enterprise_catalog/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,11 @@

# How long we keep API Client data in cache. (seconds)
ONE_HOUR = 60 * 60
ONE_DAY = ONE_HOUR * 24
ENTERPRISE_CUSTOMER_CACHE_TIMEOUT = ONE_HOUR
DISCOVERY_CATALOG_QUERY_CACHE_TIMEOUT = ONE_HOUR
DISCOVERY_COURSE_DATA_CACHE_TIMEOUT = ONE_HOUR
DISCOVERY_AVERAGE_COURSE_REVIEW_CACHE_TIMEOUT = ONE_DAY

# URLs
LMS_BASE_URL = os.environ.get('LMS_BASE_URL', '')
Expand Down

0 comments on commit 2f42a38

Please sign in to comment.