From fd0893315a94b8aafa5efdfee4fa82b688d7f2c5 Mon Sep 17 00:00:00 2001 From: Aryamanz29 Date: Tue, 7 Jan 2025 16:10:12 +0530 Subject: [PATCH 1/2] FT-792: Added support for `MongoDBCrawler` workflow package --- pyatlan/model/enums.py | 1 + pyatlan/model/packages/__init__.py | 4 +- pyatlan/model/packages/mongodb_crawler.py | 204 ++++++++++++++++++ .../data/package_requests/mongodb_basic.json | 124 +++++++++++ tests/unit/test_packages.py | 27 +++ 5 files changed, 359 insertions(+), 1 deletion(-) create mode 100644 pyatlan/model/packages/mongodb_crawler.py create mode 100644 tests/unit/data/package_requests/mongodb_basic.json diff --git a/pyatlan/model/enums.py b/pyatlan/model/enums.py index fc95ee906..0deeec6f1 100644 --- a/pyatlan/model/enums.py +++ b/pyatlan/model/enums.py @@ -2217,6 +2217,7 @@ class WorkflowPackage(str, Enum): SIGMA = "atlan-sigma" SNOWFLAKE = "atlan-snowflake" SNOWFLAKE_MINER = "atlan-snowflake-miner" + MONGODB = "atlan-mongodb" SODA = "atlan-soda" SYNAPSE = "atlan-synapse" TABLEAU = "atlan-tableau" diff --git a/pyatlan/model/packages/__init__.py b/pyatlan/model/packages/__init__.py index fbb9a0104..a170c7de4 100644 --- a/pyatlan/model/packages/__init__.py +++ b/pyatlan/model/packages/__init__.py @@ -7,6 +7,7 @@ from .dbt_crawler import DbtCrawler from .dynamo_d_b_crawler import DynamoDBCrawler from .glue_crawler import GlueCrawler +from .mongodb_crawler import MongoDBCrawler from .postgres_crawler import PostgresCrawler from .powerbi_crawler import PowerBICrawler from .relational_assets_builder import RelationalAssetsBuilder @@ -28,8 +29,9 @@ "SQLServerCrawler", "SigmaCrawler", "SnowflakeCrawler", - "SnowflakeMiner", + "MongoDBCrawler", "TableauCrawler", + "SnowflakeMiner", "AssetImport", "AssetExportBasic", "RelationalAssetsBuilder", diff --git a/pyatlan/model/packages/mongodb_crawler.py b/pyatlan/model/packages/mongodb_crawler.py new file mode 100644 index 000000000..5d3e739cc --- /dev/null +++ b/pyatlan/model/packages/mongodb_crawler.py @@ -0,0 +1,204 @@ +from __future__ import annotations + +from typing import List, Optional + +from pyatlan.model.enums import AtlanConnectorType, WorkflowPackage +from pyatlan.model.packages.base.crawler import AbstractCrawler +from pyatlan.model.workflow import WorkflowMetadata + + +class MongoDBCrawler(AbstractCrawler): + """ + Base configuration for a new MongoDB crawler. + + :param connection_name: name for the connection + :param admin_roles: admin roles for the connection + :param admin_groups: admin groups for the connection + :param admin_users: admin users for the connection + :param allow_query: allow data to be queried in the + connection (True) or not (False), default: True + :param allow_query_preview: allow sample data viewing for + assets in the connection (True) or not (False), default: True + :param row_limit: maximum number of rows + that can be returned by a query, default: 10000 + """ + + _NAME = "mongodb" + _PACKAGE_NAME = "@atlan/mongodb" + _PACKAGE_PREFIX = WorkflowPackage.MONGODB.value + _CONNECTOR_TYPE = AtlanConnectorType.MONGODB + _PACKAGE_ICON = "https://assets.atlan.com/assets/mongoDB.svg" + _PACKAGE_LOGO = "https://assets.atlan.com/assets/mongoDB.svg" + + def __init__( + self, + connection_name: str, + admin_roles: Optional[List[str]] = None, + admin_groups: Optional[List[str]] = None, + admin_users: Optional[List[str]] = None, + allow_query: bool = True, + allow_query_preview: bool = True, + row_limit: int = 10000, + ): + super().__init__( + connection_name=connection_name, + connection_type=self._CONNECTOR_TYPE, + admin_roles=admin_roles, + admin_groups=admin_groups, + admin_users=admin_users, + allow_query=allow_query, + allow_query_preview=allow_query_preview, + row_limit=row_limit, + source_logo=self._PACKAGE_LOGO, + ) + + def direct(self, hostname: str, port: int = 27017) -> MongoDBCrawler: + """ + Set up the crawler to extract directly from the MongoDB Atlas. + + :param hostname: hostname of the Atlas SQL connection + :param port: port number of the Atlas SQL connection. default: `27017` + :returns: crawler, set up to extract directly from the Atlas SQL connection + """ + local_creds = { + "name": f"default-{self._NAME}-{self._epoch}-0", + "host": hostname, + "port": port, + "connector_config_name": f"atlan-connectors-{self._NAME}", + } + self._credentials_body.update(local_creds) + self._parameters.append(dict(name="extraction-method", value="direct")) + return self + + def basic_auth( + self, + username: str, + password: str, + native_host: str, + default_db: str, + auth_db: str = "admin", + is_ssl: bool = True, + ) -> MongoDBCrawler: + """ + Set up the crawler to use basic authentication. + + :param username: through which to access Atlas SQL connection. + :param password: through which to access Atlas SQL connection. + :param native_host: native host address for the MongoDB connection. + :param default_db: default database to connect to. + :param auth_db: authentication database to use (default is `"admin"`). + :param is_ssl: whether to use SSL for the connection (default is `True`). + :returns: crawler, set up to use basic authentication + """ + local_creds = { + "authType": "basic", + "username": username, + "password": password, + "extra": { + "native-host": native_host, + "default-database": default_db, + "authsource": auth_db, + "ssl": is_ssl, + }, + } + self._credentials_body.update(local_creds) + return self + + def include(self, assets: dict) -> MongoDBCrawler: + """ + Defines the filter for assets to include when crawling. + + :param assets: dict where keys are database names, and values are lists of collections. + :returns: crawler, set to include only those assets specified + :raises InvalidRequestException: In the unlikely + event the provided filter cannot be translated + """ + include_assets = assets or {} + to_include = self.build_hierarchical_filter(include_assets) + self._parameters.append( + dict(dict(name="include-filter", value=to_include or "{}")) + ) + return self + + def exclude(self, assets: dict) -> MongoDBCrawler: + """ + Defines the filter for assets to exclude when crawling. + + :param assets: dict where keys are database names, and values are lists of collections. + :returns: crawler, set to exclude only those assets specified + :raises InvalidRequestException: In the unlikely + event the provided filter cannot be translated + """ + exclude_assets = assets or {} + to_exclude = self.build_hierarchical_filter(exclude_assets) + self._parameters.append(dict(name="exclude-filter", value=to_exclude or "{}")) + return self + + def exclude_regex(self, regex: str) -> MongoDBCrawler: + """ + Defines the exclude regex for crawler + ignore collections based on a naming convention. + + :param regex: exclude regex for the crawler + :returns: crawler, set to exclude + only those assets specified in the regex + """ + self._parameters.append(dict(name="temp-table-regex", value=regex)) + return self + + def _set_required_metadata_params(self): + self._parameters.append( + {"name": "credentials-fetch-strategy", "value": "credential_guid"} + ) + self._parameters.append( + {"name": "credential-guid", "value": "{{credentialGuid}}"} + ) + self._parameters.append( + { + "name": "connection", + "value": self._get_connection().json( + by_alias=True, exclude_unset=True, exclude_none=True + ), + } + ) + self._parameters.append(dict(name="publish-mode", value="production")) + self._parameters.append(dict(name="atlas-auth-type", value="internal")) + + def _get_metadata(self) -> WorkflowMetadata: + self._set_required_metadata_params() + return WorkflowMetadata( + labels={ + "orchestration.atlan.com/certified": "true", + "orchestration.atlan.com/source": self._NAME, + "orchestration.atlan.com/sourceCategory": "nosql", + "orchestration.atlan.com/type": "connector", + "orchestration.atlan.com/verified": "true", + "package.argoproj.io/installer": "argopm", + "package.argoproj.io/name": f"a-t-ratlans-l-a-s-h{self._NAME}", + "package.argoproj.io/registry": "httpsc-o-l-o-ns-l-a-s-hs-l-a-s-hpackages.atlan.com", + f"orchestration.atlan.com/default-{self._NAME}-{self._epoch}": "true", + "orchestration.atlan.com/atlan-ui": "true", + }, + annotations={ + "orchestration.atlan.com/allowSchedule": "true", + "orchestration.atlan.com/categories": "nosql,crawler", + "orchestration.atlan.com/dependentPackage": "", + "orchestration.atlan.com/docsUrl": "https://ask.atlan.com/hc/en-us/articles/6037440864145", + "orchestration.atlan.com/emoji": "\U0001f680", + "orchestration.atlan.com/icon": self._PACKAGE_ICON, + "orchestration.atlan.com/logo": self._PACKAGE_LOGO, + "orchestration.atlan.com/marketplaceLink": f"https://packages.atlan.com/-/web/detail/{self._PACKAGE_NAME}", # noqa + "orchestration.atlan.com/name": "MongoDB Assets", + "package.argoproj.io/author": "Atlan", + "package.argoproj.io/description": f"Package to crawl MongoDB assets and publish to Atlan for discovery", # noqa + "package.argoproj.io/homepage": f"https://packages.atlan.com/-/web/detail/{self._PACKAGE_NAME}", + "package.argoproj.io/keywords": "[\"mongodb\",\"nosql\",\"document-database\",\"connector\",\"crawler\"]", # fmt: skip # noqa + "package.argoproj.io/name": self._PACKAGE_NAME, + "package.argoproj.io/registry": "https://packages.atlan.com", + "package.argoproj.io/repository": "git+https://github.com/atlanhq/marketplace-packages.git", + "package.argoproj.io/support": "support@atlan.com", + "orchestration.atlan.com/atlanName": f"{self._PACKAGE_PREFIX}-default-{self._NAME}-{self._epoch}", + }, + name=f"{self._PACKAGE_PREFIX}-{self._epoch}", + namespace="default", + ) diff --git a/tests/unit/data/package_requests/mongodb_basic.json b/tests/unit/data/package_requests/mongodb_basic.json new file mode 100644 index 000000000..2fdfc9593 --- /dev/null +++ b/tests/unit/data/package_requests/mongodb_basic.json @@ -0,0 +1,124 @@ +{ + "metadata": { + "annotations": { + "orchestration.atlan.com/allowSchedule": "true", + "orchestration.atlan.com/categories": "nosql,crawler", + "orchestration.atlan.com/dependentPackage": "", + "orchestration.atlan.com/docsUrl": "https://ask.atlan.com/hc/en-us/articles/6037440864145", + "orchestration.atlan.com/emoji": "🚀", + "orchestration.atlan.com/icon": "https://assets.atlan.com/assets/mongoDB.svg", + "orchestration.atlan.com/logo": "https://assets.atlan.com/assets/mongoDB.svg", + "orchestration.atlan.com/marketplaceLink": "https://packages.atlan.com/-/web/detail/@atlan/mongodb", + "orchestration.atlan.com/name": "MongoDB Assets", + "package.argoproj.io/author": "Atlan", + "package.argoproj.io/description": "Package to crawl MongoDB assets and publish to Atlan for discovery", + "package.argoproj.io/homepage": "https://packages.atlan.com/-/web/detail/@atlan/mongodb", + "package.argoproj.io/keywords": "[\"mongodb\",\"nosql\",\"document-database\",\"connector\",\"crawler\"]", + "package.argoproj.io/name": "@atlan/mongodb", + "package.argoproj.io/registry": "https://packages.atlan.com", + "package.argoproj.io/repository": "git+https://github.com/atlanhq/marketplace-packages.git", + "package.argoproj.io/support": "support@atlan.com", + "orchestration.atlan.com/atlanName": "atlan-mongodb-default-mongodb-123456" + }, + "labels": { + "orchestration.atlan.com/certified": "true", + "orchestration.atlan.com/source": "mongodb", + "orchestration.atlan.com/sourceCategory": "nosql", + "orchestration.atlan.com/type": "connector", + "orchestration.atlan.com/verified": "true", + "package.argoproj.io/installer": "argopm", + "package.argoproj.io/name": "a-t-ratlans-l-a-s-hmongodb", + "package.argoproj.io/registry": "httpsc-o-l-o-ns-l-a-s-hs-l-a-s-hpackages.atlan.com", + "orchestration.atlan.com/default-mongodb-123456": "true", + "orchestration.atlan.com/atlan-ui": "true" + }, + "name": "atlan-mongodb-123456", + "namespace": "default" + }, + "spec": { + "entrypoint": "main", + "templates": [ + { + "name": "main", + "dag": { + "tasks": [ + { + "name": "run", + "arguments": { + "parameters": [ + { + "name": "extraction-method", + "value": "direct" + }, + { + "name": "include-filter", + "value": "{\"^test-include$\": [\"^test-asset-1$\", \"^test-asset-2$\"]}" + }, + { + "name": "exclude-filter", + "value": "{\"^test-exlcude$\": [\"^test-asset-1$\", \"^test-asset-2$\"]}" + }, + { + "name": "temp-table-regex", + "value": "TEST*" + }, + { + "name": "credentials-fetch-strategy", + "value": "credential_guid" + }, + { + "name": "credential-guid", + "value": "{{credentialGuid}}" + }, + { + "name": "connection", + "value": "{\"typeName\": \"Connection\", \"attributes\": {\"qualifiedName\": \"default/mongodb/123456\", \"name\": \"test-sdk-mongodb\", \"adminUsers\": [], \"adminGroups\": [], \"connectorName\": \"mongodb\", \"isDiscoverable\": true, \"isEditable\": false, \"adminRoles\": [\"admin-guid-1234\"], \"category\": \"database\", \"allowQuery\": true, \"allowQueryPreview\": true, \"rowLimit\": 10000, \"defaultCredentialGuid\": \"{{credentialGuid}}\", \"sourceLogo\": \"https://assets.atlan.com/assets/mongoDB.svg\"}, \"guid\": \"-1234567890000000000000000\"}" + }, + { + "name": "publish-mode", + "value": "production" + }, + { + "name": "atlas-auth-type", + "value": "internal" + } + ] + }, + "templateRef": { + "name": "atlan-mongodb", + "template": "main", + "clusterScope": true + } + } + ] + } + } + ], + "workflowMetadata": { + "annotations": { + "package.argoproj.io/name": "@atlan/mongodb" + } + } + }, + "payload": [ + { + "parameter": "credentialGuid", + "type": "credential", + "body": { + "name": "default-mongodb-123456-0", + "host": "test-hostname", + "port": 1234, + "authType": "basic", + "username": "test-user", + "password": "test-pass", + "extra": { + "native-host": "test-native-host", + "default-database": "test-default-db", + "authsource": "test-auth-db", + "ssl": false + }, + "connectorConfigName": "atlan-connectors-mongodb" + } + } + ] +} diff --git a/tests/unit/test_packages.py b/tests/unit/test_packages.py index f8d81bb84..d65ce5651 100644 --- a/tests/unit/test_packages.py +++ b/tests/unit/test_packages.py @@ -16,6 +16,7 @@ DbtCrawler, DynamoDBCrawler, GlueCrawler, + MongoDBCrawler, PostgresCrawler, PowerBICrawler, RelationalAssetsBuilder, @@ -72,6 +73,7 @@ RELATIONAL_ASSETS_BUILDER_S3 = "relational_assets_builder_s3.json" RELATIONAL_ASSETS_BUILDER_ADLS = "relational_assets_builder_adls.json" RELATIONAL_ASSETS_BUILDER_GCS = "relational_assets_builder_gcs.json" +MONGODB_BASIC = "mongodb_basic.json" class NonSerializable: @@ -621,6 +623,31 @@ def test_postgres_package(mock_package_env): assert request_json == load_json(POSTGRES_S3_OFFLINE) +def test_mongodb_package(mock_package_env): + mongodb_basic = ( + MongoDBCrawler( + connection_name="test-sdk-mongodb", + admin_roles=["admin-guid-1234"], + ) + .direct(hostname="test-hostname", port=1234) + .basic_auth( + username="test-user", + password="test-pass", + native_host="test-native-host", + default_db="test-default-db", + auth_db="test-auth-db", + is_ssl=False, + ) + .include(assets={"test-include": ["test-asset-1", "test-asset-2"]}) + .exclude(assets={"test-exlcude": ["test-asset-1", "test-asset-2"]}) + .exclude_regex(regex="TEST*") + .to_workflow() + ) + + request_json = loads(mongodb_basic.json(by_alias=True, exclude_none=True)) + assert request_json == load_json(MONGODB_BASIC) + + def test_connection_delete_package(mock_package_env): # With PURGE (hard delete) connection_delete_hard = ConnectionDelete( From de4ac75994431b779cbf6015af74069dec494f4a Mon Sep 17 00:00:00 2001 From: Aryamanz29 Date: Tue, 7 Jan 2025 17:51:14 +0530 Subject: [PATCH 2/2] [req-changes] Let's accept `list` of assets for hierarchical filters instead of `dict` --- pyatlan/model/packages/mongodb_crawler.py | 16 +++++++++------- .../data/package_requests/mongodb_basic.json | 4 ++-- tests/unit/test_packages.py | 4 ++-- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pyatlan/model/packages/mongodb_crawler.py b/pyatlan/model/packages/mongodb_crawler.py index 5d3e739cc..df7b69c63 100644 --- a/pyatlan/model/packages/mongodb_crawler.py +++ b/pyatlan/model/packages/mongodb_crawler.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import List, Optional +from typing import Dict, List, Optional from pyatlan.model.enums import AtlanConnectorType, WorkflowPackage from pyatlan.model.packages.base.crawler import AbstractCrawler @@ -104,32 +104,34 @@ def basic_auth( self._credentials_body.update(local_creds) return self - def include(self, assets: dict) -> MongoDBCrawler: + def include(self, assets: List[str]) -> MongoDBCrawler: """ Defines the filter for assets to include when crawling. - :param assets: dict where keys are database names, and values are lists of collections. + :param assets: list of databases names to include when crawling :returns: crawler, set to include only those assets specified :raises InvalidRequestException: In the unlikely event the provided filter cannot be translated """ - include_assets = assets or {} + assets = assets or [] + include_assets: Dict[str, List[str]] = {asset: [] for asset in assets} to_include = self.build_hierarchical_filter(include_assets) self._parameters.append( dict(dict(name="include-filter", value=to_include or "{}")) ) return self - def exclude(self, assets: dict) -> MongoDBCrawler: + def exclude(self, assets: List[str]) -> MongoDBCrawler: """ Defines the filter for assets to exclude when crawling. - :param assets: dict where keys are database names, and values are lists of collections. + :param assets: list of databases names to exclude when crawling :returns: crawler, set to exclude only those assets specified :raises InvalidRequestException: In the unlikely event the provided filter cannot be translated """ - exclude_assets = assets or {} + assets = assets or [] + exclude_assets: Dict[str, List[str]] = {asset: [] for asset in assets} to_exclude = self.build_hierarchical_filter(exclude_assets) self._parameters.append(dict(name="exclude-filter", value=to_exclude or "{}")) return self diff --git a/tests/unit/data/package_requests/mongodb_basic.json b/tests/unit/data/package_requests/mongodb_basic.json index 2fdfc9593..24004d270 100644 --- a/tests/unit/data/package_requests/mongodb_basic.json +++ b/tests/unit/data/package_requests/mongodb_basic.json @@ -52,11 +52,11 @@ }, { "name": "include-filter", - "value": "{\"^test-include$\": [\"^test-asset-1$\", \"^test-asset-2$\"]}" + "value": "{\"^test-asset-1$\": [], \"^test-asset-2$\": []}" }, { "name": "exclude-filter", - "value": "{\"^test-exlcude$\": [\"^test-asset-1$\", \"^test-asset-2$\"]}" + "value": "{\"^test-asset-1$\": [], \"^test-asset-2$\": []}" }, { "name": "temp-table-regex", diff --git a/tests/unit/test_packages.py b/tests/unit/test_packages.py index d65ce5651..9381db3d2 100644 --- a/tests/unit/test_packages.py +++ b/tests/unit/test_packages.py @@ -638,8 +638,8 @@ def test_mongodb_package(mock_package_env): auth_db="test-auth-db", is_ssl=False, ) - .include(assets={"test-include": ["test-asset-1", "test-asset-2"]}) - .exclude(assets={"test-exlcude": ["test-asset-1", "test-asset-2"]}) + .include(assets=["test-asset-1", "test-asset-2"]) + .exclude(assets=["test-asset-1", "test-asset-2"]) .exclude_regex(regex="TEST*") .to_workflow() )