From bbc60062ae19955e0770faa2718ae8a0c86e3e4e Mon Sep 17 00:00:00 2001 From: Alexander J Sheehan Date: Thu, 1 Feb 2024 18:49:09 +0000 Subject: [PATCH] feat: adding the ability to generate algolia objects from a single piece of content metadata --- enterprise_catalog/apps/api/tasks.py | 17 ++++- .../apps/api/tests/test_tasks.py | 74 +++++++++++++++++++ 2 files changed, 89 insertions(+), 2 deletions(-) diff --git a/enterprise_catalog/apps/api/tasks.py b/enterprise_catalog/apps/api/tasks.py index 326ff77b1..dd2a0de93 100644 --- a/enterprise_catalog/apps/api/tasks.py +++ b/enterprise_catalog/apps/api/tasks.py @@ -709,6 +709,20 @@ def add_metadata_to_algolia_objects( _add_in_algolia_products_by_object_id(algolia_products_by_object_id, batched_metadata) +def get_algolia_objects_from_course_content_metadata(content_metadata): + content_key = content_metadata.content_key + context_accumulator = { + 'total_algolia_products_count': 0, + 'discarded_algolia_object_ids': defaultdict(int), + } + algolia_product = _get_algolia_products_for_batch(0, [content_key], {content_key}, {}, {}, context_accumulator) + logger.info( + f"get_algolia_objects_from_course_content_metadata created algolia object: {algolia_product} for course: " + f"{content_key} with context: {context_accumulator}" + ) + return algolia_product + + # pylint: disable=too-many-statements def _get_algolia_products_for_batch( batch_num, @@ -966,7 +980,6 @@ def _get_algolia_products_for_batch( f'{len(algolia_products_by_object_id)} generated algolia products kept, ' f'{duplicate_algolia_records_discarded} generated algolia products discarded.' ) - # extract only the fields we care about. return create_algolia_objects(algolia_products_by_object_id.values(), ALGOLIA_FIELDS) @@ -1026,7 +1039,7 @@ def _index_content_keys_in_algolia(content_keys, algolia_client, dry_run=False): ) # Feed the un-evaluated flat iterable of algolia products into the 3rd party library function. As of this writing, - # this library function will chunk the interable again using a default batch size of 1000. + # this library function will chunk the iterable again using a default batch size of 1000. # # See function documentation for indication that an Iterator is accepted: # https://github.com/algolia/algoliasearch-client-python/blob/e0a2a578464a1b01caaa84dba927b99ae8476af3/algoliasearch/search_index.py#L89 diff --git a/enterprise_catalog/apps/api/tests/test_tasks.py b/enterprise_catalog/apps/api/tests/test_tasks.py index 1e8645908..9b3e96c8d 100644 --- a/enterprise_catalog/apps/api/tests/test_tasks.py +++ b/enterprise_catalog/apps/api/tests/test_tasks.py @@ -752,6 +752,80 @@ def _sort_tags_in_algolia_object_list(self, algolia_obj): obj['academy_tags'] = sorted(obj['academy_tags']) return algolia_obj + def test_get_algolia_objects_from_course_metadata(self): + """ + Test that the ``get_algolia_objects_from_course_content_metadata`` method generates a set of algolia objects to + index from a single course content metadata object + """ + test_course = ContentMetadataFactory(content_type=COURSE, content_key='test-course-1') + # Create all 5 test catalogs. + catalog_queries = [CatalogQueryFactory(uuid=uuid.uuid4()) for _ in range(3)] + catalogs = [ + EnterpriseCatalogFactory(catalog_query=query) + for query in catalog_queries + ] + + test_course.catalog_queries.set(catalog_queries[0:3]) + + algolia_objects = tasks.get_algolia_objects_from_course_content_metadata(test_course) + # Should look something like- + # [{'advertised_course_run': {'availability': 'current', + # 'end': None, + # 'key': 'course-v1:edX+DemoX', + # 'max_effort': None, + # 'min_effort': None, + # 'pacing_type': None, + # 'start': None, + # 'upgrade_deadline': 32503680000.0, + # 'weeks_to_complete': None}, + # 'aggregation_key': 'course:test-course-1', + # 'availability': ['Available Now'], + # 'card_image_url': 'https://picsum.photos/540/209.jpg', + # 'content_type': 'course', + # 'course_bayesian_average': 0, + # 'course_runs': [{'availability': 'current', + # 'end': None, + # 'key': 'course-v1:edX+DemoX', + # 'max_effort': None, + # 'min_effort': None, + # 'pacing_type': None, + # 'start': None, + # 'upgrade_deadline': 32503680000.0, + # 'weeks_to_complete': None}], + # 'enterprise_catalog_uuids': ['0bdd57b7-b1cb-4775-b0dc-3cf49ff7d7f2', + # '2b861d68-06d7-415b-9baa-b5f496fafa1a', + # 'add4b32e-8b32-4cb3-8de6-956210377330'], + # 'key': 'test-course-1', + # 'learning_type': 'course', + # 'learning_type_v2': 'course', + # 'marketing_url': 'https://marketing.url/test-course-1', + # 'objectID': 'course-be9a029e-8990-4f94-bf24-770fece63344-catalog-uuids-0', + # 'partners': [{'logo_image_url': 'https://dummyimage.com/265x132.jpg', + # 'name': 'Partner Name'}], + # 'program_titles': [], + # 'programs': [], + # 'skill_names': [], + # 'skills': [], + # 'subjects': [], + # 'title': 'Fake Content Title UItWeUluIK', + # 'upcoming_course_runs': 0, + # 'uuid': 'be9a029e-8990-4f94-bf24-770fece63344'}, ... ] + for algo_object in algolia_objects: + assert algo_object.get('key') == test_course.content_key + assert algo_object.get('uuid') == test_course.json_metadata.get('uuid') + + if object_catalogs := algo_object.get('enterprise_catalog_uuids'): + assert set(object_catalogs) == {str(catalog.uuid) for catalog in catalogs} + + if object_customers := algo_object.get('enterprise_customer_uuids'): + assert set(object_customers) == {str(catalog.enterprise_uuid) for catalog in catalogs} + + if object_queries := algo_object.get('enterprise_catalog_query_uuids'): + assert set(object_queries) == {str(query.uuid) for query in catalog_queries} + + if object_queries_titles := algo_object.get('enterprise_catalog_query_titles'): + assert set(object_queries_titles) == {str(query.title) for query in catalog_queries} + @mock.patch('enterprise_catalog.apps.api.tasks.get_initialized_algolia_client', return_value=mock.MagicMock()) def test_index_algolia_program_common_uuids_only(self, mock_search_client): """