diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml index e5fa8b46509256..f30060c5ccdbdf 100644 --- a/metadata-ingestion/pyproject.toml +++ b/metadata-ingestion/pyproject.toml @@ -30,6 +30,7 @@ extend-select = [ "I", # isort "TID", # flake8-tidy-imports "RUF100", # unused-noqa + "SIM", # flake8-simplify ] extend-ignore = [ "E501", # Handled by formatter @@ -40,8 +41,21 @@ extend-ignore = [ "B008", # Allow function call in argument defaults "RUF012", # mutable-class-default; incompatible with pydantic "RUF015", # unnecessary-iterable-allocation-for-first-element - # TODO: Enable these later - "B006", # Mutable args + + # Can be enabled later if someone wants to fix existing cases or an auto-fix becomes available + "SIM101", # Multiple isinstance calls for {name}, merge into a single call + "SIM102", # Use a single `if` statement instead of nested `if` statements + "SIM103", # Return the condition directly + "SIM105", # Use `contextlib.suppress(...)` instead of `try`-`except`-`pass` + "SIM108", # Use ternary operator {contents} instead of if-else-block + "SIM110", # Use `return any(re.match(regex_pattern, path, re.IGNORECASE) for path in paths)` instead of `for` loop + "SIM113", # Use enumerate() for index variable {index} in for loop + "SIM115", # Use a context manager for opening files + "SIM116", # Use a dictionary instead of consecutive `if` statements + "SIM117", # Use a single with statement with multiple contexts instead of nested with statements + "SIM118", # Use key {operator} dict instead of key {operator} dict.keys() + "SIM210", # Use `bool(...)` instead of `True if ... else False` + "SIM401", # Use `sample_data.get(schema_field.fieldPath, [])` instead of an `if` block ] [tool.ruff.lint.mccabe] diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index 7e68e9f80da4ff..df25949d782c14 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -43,9 +43,7 @@ class GitReference(ConfigModel): @validator("repo", pre=True) def simplify_repo_url(cls, repo: str) -> str: - if repo.startswith("github.com/"): - repo = f"https://{repo}" - elif repo.startswith("gitlab.com"): + if repo.startswith("github.com/") or repo.startswith("gitlab.com"): repo = f"https://{repo}" elif repo.count("/") == 1: repo = f"https://github.com/{repo}" diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py b/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py index 4465317ae351a6..32f7d1634f8f8d 100644 --- a/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py +++ b/metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py @@ -281,7 +281,7 @@ def get_columns_to_classify( ), values=( sample_data[schema_field.fieldPath] - if schema_field.fieldPath in sample_data.keys() + if schema_field.fieldPath in sample_data else [] ), ) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 1905a6d2e3f045..69968ecb726f2d 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -330,7 +330,7 @@ def get_aspect_v2( aspect_type_name: Optional[str] = None, version: int = 0, ) -> Optional[Aspect]: - assert aspect_type.ASPECT_NAME == aspect + assert aspect == aspect_type.ASPECT_NAME return self.get_aspect( entity_urn=entity_urn, aspect_type=aspect_type, diff --git a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py index fc790535cfe03d..d72a2e68f53a88 100644 --- a/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +++ b/metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py @@ -163,12 +163,7 @@ def _convert_sets_to_lists(obj: Any) -> Any: key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value) for key, value in obj.items() } - elif isinstance(obj, list): - return [ - DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element) - for element in obj - ] - elif isinstance(obj, set): + elif isinstance(obj, list) or isinstance(obj, set): return [ DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element) for element in obj diff --git a/metadata-ingestion/src/datahub/ingestion/source/abs/config.py b/metadata-ingestion/src/datahub/ingestion/source/abs/config.py index c62239527a1200..583876a8dda6c0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/abs/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/abs/config.py @@ -144,10 +144,8 @@ def check_path_specs_and_infer_platform( return path_specs @pydantic.validator("platform", always=True) - def platform_not_empty(cls, platform: str, values: dict) -> str: - inferred_platform = values.get( - "platform", None - ) # we may have inferred it above + def platform_not_empty(cls, platform: Any, values: dict) -> str: + inferred_platform = values.get("platform") # we may have inferred it above platform = platform or inferred_platform if not platform: raise ValueError("platform must not be empty") diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py index d35c5265878c03..507f36d84335b8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_audit.py @@ -165,7 +165,7 @@ def from_bigquery_table(cls, table: BigqueryTableIdentifier) -> "BigQueryTableRe @classmethod def from_spec_obj(cls, spec: dict) -> "BigQueryTableRef": for key in ["projectId", "datasetId", "tableId"]: - if key not in spec.keys(): + if key not in spec: raise ValueError(f"invalid BigQuery table reference dict: {spec}") return cls( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index d3d8ed62cd364c..eb6b620f3f7498 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -344,7 +344,7 @@ def get_tables_for_dataset( with_partitions: bool = False, ) -> Iterator[BigqueryTable]: with PerfTimer() as current_timer: - filter_clause: str = ", ".join(f"'{table}'" for table in tables.keys()) + filter_clause: str = ", ".join(f"'{table}'" for table in tables) if with_partitions: query_template = BigqueryQuery.tables_for_dataset diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py index afbf2c311b5005..3be7ac9af38016 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py +++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py @@ -314,7 +314,7 @@ def get_resource_description_work_unit( "datajob": EditableDataJobPropertiesClass, "dataflow": EditableDataFlowPropertiesClass, "notebook": EditableNotebookPropertiesClass, - }.get(entityType, None) + }.get(entityType) if not entityClass: raise ValueError( diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index f810500085295b..13b4c5dcd6ff0c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -1033,7 +1033,7 @@ def add_node_to_cll_list(dbt_name: str) -> None: cll_nodes.add(dbt_name) schema_nodes.add(dbt_name) - for dbt_name in all_nodes_map.keys(): + for dbt_name in all_nodes_map: if self._is_allowed_node(dbt_name): add_node_to_cll_list(dbt_name) diff --git a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py index 64c1a0ad0bfbad..27ca8999d7dec8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py +++ b/metadata-ingestion/src/datahub/ingestion/source/gc/dataprocess_cleanup.py @@ -498,7 +498,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: # Delete empty dataflows if needed if self.config.delete_empty_data_flows: deleted_data_flows: int = 0 - for key in dataFlows.keys(): + for key in dataFlows: if not dataJobs.get(key) or len(dataJobs[key]) == 0: logger.info( f"Deleting dataflow {key} because there are not datajobs" diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index bde26f97bf271f..b8d67703e815fb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -170,14 +170,10 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in ).select_from(self._table) ) return convert_to_json_serializable(element_values.fetchone()[0]) - elif self.engine.dialect.name.lower() == BIGQUERY: - element_values = self.engine.execute( - sa.select(sa.func.APPROX_COUNT_DISTINCT(sa.column(column))).select_from( - self._table - ) - ) - return convert_to_json_serializable(element_values.fetchone()[0]) - elif self.engine.dialect.name.lower() == SNOWFLAKE: + elif ( + self.engine.dialect.name.lower() == BIGQUERY + or self.engine.dialect.name.lower() == SNOWFLAKE + ): element_values = self.engine.execute( sa.select(sa.func.APPROX_COUNT_DISTINCT(sa.column(column))).select_from( self._table @@ -381,13 +377,14 @@ def _get_columns_to_profile(self) -> List[str]: col = col_dict["name"] self.column_types[col] = str(col_dict["type"]) # We expect the allow/deny patterns to specify '.' - if not self.config._allow_deny_patterns.allowed( - f"{self.dataset_name}.{col}" + if ( + not self.config._allow_deny_patterns.allowed( + f"{self.dataset_name}.{col}" + ) + or not self.config.profile_nested_fields + and "." in col ): ignored_columns_by_pattern.append(col) - # We try to ignore nested columns as well - elif not self.config.profile_nested_fields and "." in col: - ignored_columns_by_pattern.append(col) elif col_dict.get("type") and self._should_ignore_column(col_dict["type"]): ignored_columns_by_type.append(col) else: @@ -1408,7 +1405,7 @@ def _get_ge_dataset( }, ) - if platform == BIGQUERY or platform == DATABRICKS: + if platform in (BIGQUERY, DATABRICKS): # This is done as GE makes the name as DATASET.TABLE # but we want it to be PROJECT.DATASET.TABLE instead for multi-project setups name_parts = pretty_name.split(".") diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py index 56453855a5b33d..42a79b8694883c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py @@ -568,9 +568,7 @@ def _filter_okta_user(self, okta_user: User) -> bool: if ( self.config.include_deprovisioned_users is False and okta_user.status == UserStatus.DEPROVISIONED - ): - return False - elif ( + ) or ( self.config.include_suspended_users is False and okta_user.status == UserStatus.SUSPENDED ): diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py index c562f23336ec2d..b4998ba7b7cbd8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect/source_connectors.py @@ -447,13 +447,10 @@ def get_parser( ) -> DebeziumParser: connector_class = connector_manifest.config.get(CONNECTOR_CLASS, "") - if connector_class == "io.debezium.connector.mysql.MySqlConnector": - parser = self.DebeziumParser( - source_platform="mysql", - server_name=self.get_server_name(connector_manifest), - database_name=None, - ) - elif connector_class == "MySqlConnector": + if ( + connector_class == "io.debezium.connector.mysql.MySqlConnector" + or connector_class == "MySqlConnector" + ): parser = self.DebeziumParser( source_platform="mysql", server_name=self.get_server_name(connector_manifest), diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py index c3f2a110136c45..040b8fc3937441 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_lib_wrapper.py @@ -205,8 +205,9 @@ def lookml_model_explore(self, model: str, explore_name: str) -> LookmlModelExpl def folder_ancestors( self, folder_id: str, - fields: Union[str, List[str]] = ["id", "name", "parent_id"], + fields: Optional[Union[str, List[str]]] = None, ) -> Sequence[Folder]: + fields = fields or ["id", "name", "parent_id"] self.client_stats.folder_calls += 1 try: return self.client.folder_ancestors( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py index 60983f04bafa05..c8d93299de9f0c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py @@ -464,9 +464,10 @@ def process_lookml_template_language( source_config: LookMLSourceConfig, view_lkml_file_dict: dict, reporter: LookMLSourceReport, - manifest_constants: Dict[str, "LookerConstant"] = {}, + manifest_constants: Optional[Dict[str, "LookerConstant"]] = None, resolve_constants: bool = False, ) -> None: + manifest_constants = manifest_constants or {} if "views" not in view_lkml_file_dict: return @@ -507,9 +508,10 @@ def load_and_preprocess_file( path: Union[str, pathlib.Path], source_config: LookMLSourceConfig, reporter: LookMLSourceReport, - manifest_constants: Dict[str, "LookerConstant"] = {}, + manifest_constants: Optional[Dict[str, "LookerConstant"]] = None, resolve_constants: bool = False, ) -> dict: + manifest_constants = manifest_constants or {} parsed = load_lkml(path) process_lookml_template_language( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index f2f49ca71f5d58..dfc18d6d7409fb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -1006,8 +1006,9 @@ def gen_project_workunits(self, project_name: str) -> Iterable[MetadataWorkUnit] def report_skipped_unreachable_views( self, viewfile_loader: LookerViewFileLoader, - processed_view_map: Dict[str, Set[str]] = {}, + processed_view_map: Optional[Dict[str, Set[str]]] = None, ) -> None: + processed_view_map = processed_view_map or {} view_files: Dict[str, List[pathlib.Path]] = {} for project, folder_path in self.base_projects_folder.items(): folder = pathlib.Path(folder_path) diff --git a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py index afc3a4becdff2a..ac7f38420213af 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/neo4j/neo4j_source.py @@ -292,7 +292,7 @@ def get_properties(self, record: dict) -> str: return record["properties"] def get_relationships(self, record: dict) -> dict: - return record.get("relationships", None) + return record.get("relationships", {}) def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: return [ diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index 7b6bbc632427d4..35c2772ea80e0d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -1234,11 +1234,14 @@ def construct_job_workunits( job_type: str, description: Optional[str], job_properties: Optional[Dict[str, str]] = None, - inlets: List[str] = [], - outlets: List[str] = [], - inputJobs: List[str] = [], + inlets: Optional[List[str]] = None, + outlets: Optional[List[str]] = None, + inputJobs: Optional[List[str]] = None, status: Optional[str] = None, ) -> Iterable[MetadataWorkUnit]: + inlets = inlets or [] + outlets = outlets or [] + inputJobs = inputJobs or [] logger.debug(f"Begining construction of job workunit for {job_urn}") if job_properties: job_properties = {k: v for k, v in job_properties.items() if v is not None} diff --git a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py index 1db09d3685ee7f..f8871e465632bb 100755 --- a/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/openapi_parser.py @@ -167,7 +167,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict: Try to determine if example data is defined for the endpoint, and return it """ data = {} - if "content" in base_res.keys(): + if "content" in base_res: res_cont = base_res["content"] if "application/json" in res_cont.keys(): ex_field = None @@ -188,7 +188,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict: ) elif "text/csv" in res_cont.keys(): data = res_cont["text/csv"]["schema"] - elif "examples" in base_res.keys(): + elif "examples" in base_res: data = base_res["examples"]["application/json"] return data diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py index 759fc6d7dadfba..de66b5281b917e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/parser.py @@ -2,7 +2,7 @@ import importlib.resources as pkg_resource import logging import os -from typing import Dict, List +from typing import Dict, List, Optional import lark from lark import Lark, Tree @@ -65,8 +65,9 @@ def get_upstream_tables( platform_instance_resolver: AbstractDataPlatformInstanceResolver, ctx: PipelineContext, config: PowerBiDashboardSourceConfig, - parameters: Dict[str, str] = {}, + parameters: Optional[Dict[str, str]] = None, ) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]: + parameters = parameters or {} if table.expression is None: logger.debug(f"There is no M-Query expression in table {table.full_name}") return [] diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py index d48e251bd00906..81b3c6d4407209 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py @@ -70,13 +70,14 @@ def internal(node: Union[Tree, Token]) -> Optional[Tree]: return expression_tree -def token_values(tree: Tree, parameters: Dict[str, str] = {}) -> List[str]: +def token_values(tree: Tree, parameters: Optional[Dict[str, str]] = None) -> List[str]: """ :param tree: Tree to traverse :param parameters: If parameters is not an empty dict, it will try to resolve identifier variable references using the values in 'parameters'. :return: List of leaf token data """ + parameters = parameters or {} values: List[str] = [] def internal(node: Union[Tree, Token]) -> None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 4342be1448b299..2cc4103806050a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -890,9 +890,7 @@ def to_datahub_users( set(user_rights) & set(self.__config.ownership.owner_criteria) ) > 0 - ): - user_mcps.extend(self.to_datahub_user(user)) - elif self.__config.ownership.owner_criteria is None: + ) or self.__config.ownership.owner_criteria is None: user_mcps.extend(self.to_datahub_user(user)) else: continue diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py index d98f4abc296778..d60e930d1f055e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py @@ -380,8 +380,9 @@ def get_tiles(self, workspace: Workspace, dashboard: Dashboard) -> List[Tile]: def itr_pages( self, endpoint: str, - parameter_override: Dict = {}, + parameter_override: Optional[Dict] = None, ) -> Iterator[List[Dict]]: + parameter_override = parameter_override or {} params: dict = { "$skip": 0, "$top": self.TOP, diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py index 1f4c157b33b95e..2e2d3d87a6ddee 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py @@ -196,7 +196,7 @@ def get_all_reports(self) -> List[Any]: } reports: List[Any] = [] - for report_type in report_types_mapping.keys(): + for report_type in report_types_mapping: report_get_endpoint: str = API_ENDPOINTS[report_type] # Replace place holders report_get_endpoint_http = report_get_endpoint.format( diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/websocket_connection.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/websocket_connection.py index 01ca9415f886a8..fabdbaffc44ce7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/websocket_connection.py +++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/websocket_connection.py @@ -17,8 +17,9 @@ def __init__(self, tenant_hostname: str, api_key: str, app_id: str) -> None: self.handle = [-1] def _build_websocket_request_dict( - self, method: str, params: Union[Dict, List] = {} + self, method: str, params: Optional[Union[Dict, List]] = None ) -> Dict: + params = params or {} return { "jsonrpc": "2.0", "id": self.request_id, @@ -37,11 +38,12 @@ def _send_request(self, request: Dict) -> Dict: return {} def websocket_send_request( - self, method: str, params: Union[Dict, List] = {} + self, method: str, params: Optional[Union[Dict, List]] = None ) -> Dict: """ Method to send request to websocket """ + params = params or {} self.request_id += 1 request = self._build_websocket_request_dict(method, params) response = self._send_request(request=request) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redash.py b/metadata-ingestion/src/datahub/ingestion/source/redash.py index 1909f0fad471b9..6feb324bff49f4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redash.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redash.py @@ -421,8 +421,9 @@ def _get_database_name_based_on_datasource( return database_name def _get_datasource_urns( - self, data_source: Dict, sql_query_data: Dict = {} + self, data_source: Dict, sql_query_data: Optional[Dict] = None ) -> Optional[List[str]]: + sql_query_data = sql_query_data or {} platform = self._get_platform_based_on_datasource(data_source) database_name = self._get_database_name_based_on_datasource(data_source) data_source_syntax = data_source.get("syntax") diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py index a946192242e65e..eac93c5059459f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py @@ -154,10 +154,8 @@ def check_path_specs_and_infer_platform( return path_specs @pydantic.validator("platform", always=True) - def platform_valid(cls, platform: str, values: dict) -> str: - inferred_platform = values.get( - "platform", None - ) # we may have inferred it above + def platform_valid(cls, platform: Any, values: dict) -> str: + inferred_platform = values.get("platform") # we may have inferred it above platform = platform or inferred_platform if not platform: raise ValueError("platform must not be empty") diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index b1554ad127b7ac..41d7ae3283711f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -834,7 +834,7 @@ def get_dir_to_process( min=min, ) folders.extend(folders_list) - if not path_spec.traversal_method == FolderTraversalMethod.ALL: + if path_spec.traversal_method != FolderTraversalMethod.ALL: return folders if folders: return folders diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 62a911185ac54b..dcb2bac7570a0d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -615,7 +615,7 @@ def _get_field_description(self, field: dict, customField: dict) -> str: prefix = "\\" if text.startswith("#") else "" desc += f"\n\n{prefix}{text}" - text = field.get("InlineHelpText", None) + text = field.get("InlineHelpText") if text: prefix = "\\" if text.startswith("#") else "" desc += f"\n\n{prefix}{text}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py index bbcb114ee40c35..8b865e3a0b1a18 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema_inference/object.py @@ -149,7 +149,7 @@ def append_to_schema(doc: Dict[str, Any], parent_prefix: Tuple[str, ...]) -> Non extended_schema: Dict[Tuple[str, ...], SchemaDescription] = {} - for field_path in schema.keys(): + for field_path in schema: field_types = schema[field_path]["types"] field_type: Union[str, type] = "mixed" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py index 8f28a7d2e74615..0088ffdfd4264e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py @@ -125,7 +125,7 @@ def validate_account_id(cls, account_id: str) -> str: @pydantic.validator("authentication_type", always=True) def authenticator_type_is_valid(cls, v, values): - if v not in _VALID_AUTH_TYPES.keys(): + if v not in _VALID_AUTH_TYPES: raise ValueError( f"unsupported authenticator type '{v}' was provided," f" use one of {list(_VALID_AUTH_TYPES.keys())}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index daeb839e5f54da..d90da77f408e21 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -439,7 +439,7 @@ class SnowflakePrivilege: failure_reason=failure_message, ) - if c in _report.keys(): + if c in _report: continue # If some capabilities are missing, then mark them as not capable diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index 65adf76b455709..9f194609dddccd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -396,7 +396,7 @@ def get_table_properties( metadata.table_type if metadata.table_type else "" ) - location: Optional[str] = custom_properties.get("location", None) + location: Optional[str] = custom_properties.get("location") if location is not None: if location.startswith("s3://"): location = make_s3_urn(location, self.config.env) @@ -538,7 +538,7 @@ def get_schema_fields_for_column( column_name=column["name"], column_type=column["type"], inspector=inspector, - description=column.get("comment", None), + description=column.get("comment"), nullable=column.get("nullable", True), is_part_of_key=( True diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 922dc4b28a220e..2e85609f213f4c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -204,7 +204,7 @@ def get_column_type( """ TypeClass: Optional[Type] = None - for sql_type in _field_type_mapping.keys(): + for sql_type in _field_type_mapping: if isinstance(column_type, sql_type): TypeClass = _field_type_mapping[sql_type] break @@ -973,7 +973,7 @@ def get_schema_fields_for_column( inspector=inspector, ) ), - description=column.get("comment", None), + description=column.get("comment"), nullable=column["nullable"], recursive=False, globalTags=gtc, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index 6b9b8bdeb39e97..c8a540a1337642 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -317,10 +317,10 @@ def resolve_snowflake_modified_type(type_string: str) -> Any: match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string) if match: modified_type_base = match.group(1) # Extract the base type - return SNOWFLAKE_TYPES_MAP.get(modified_type_base, None) + return SNOWFLAKE_TYPES_MAP.get(modified_type_base) # Fallback for types without precision/scale - return SNOWFLAKE_TYPES_MAP.get(type_string, None) + return SNOWFLAKE_TYPES_MAP.get(type_string) # see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32 diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py index 442d026b0af87c..463b0e60595b33 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py @@ -180,10 +180,11 @@ def optimized_get_columns( connection: Connection, table_name: str, schema: Optional[str] = None, - tables_cache: MutableMapping[str, List[TeradataTable]] = {}, + tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None, use_qvci: bool = False, **kw: Dict[str, Any], ) -> List[Dict]: + tables_cache = tables_cache or {} if schema is None: schema = self.default_schema_name @@ -314,9 +315,10 @@ def optimized_get_view_definition( connection: Connection, view_name: str, schema: Optional[str] = None, - tables_cache: MutableMapping[str, List[TeradataTable]] = {}, + tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None, **kw: Dict[str, Any], ) -> Optional[str]: + tables_cache = tables_cache or {} if schema is None: schema = self.default_schema_name diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index b6fa51dd70e18d..6fa3fc10f06fee 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -142,7 +142,7 @@ def get_table_comment(self, connection, table_name: str, schema: str = None, **k if col_value is not None: properties[col_name] = col_value - return {"text": properties.get("comment", None), "properties": properties} + return {"text": properties.get("comment"), "properties": properties} else: return self.get_table_comment_default(connection, table_name, schema) except Exception: @@ -483,7 +483,7 @@ def _parse_struct_fields(parts): def _parse_basic_datatype(s): - for sql_type in _all_atomic_types.keys(): + for sql_type in _all_atomic_types: if isinstance(s, sql_type): return { "type": _all_atomic_types[sql_type], diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 07d63a0787d97e..ef96a081e7c431 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -1911,11 +1911,7 @@ def get_upstream_columns_of_fields_in_datasource( if upstream_col.get(c.TABLE) else None ) - if ( - name - and upstream_table_id - and upstream_table_id in table_id_to_urn.keys() - ): + if name and upstream_table_id and upstream_table_id in table_id_to_urn: parent_dataset_urn = table_id_to_urn[upstream_table_id] if ( self.is_snowflake_urn(parent_dataset_urn) diff --git a/metadata-ingestion/src/datahub/lite/duckdb_lite.py b/metadata-ingestion/src/datahub/lite/duckdb_lite.py index c3e2ccc75b2db7..c05d4b2dd6c678 100644 --- a/metadata-ingestion/src/datahub/lite/duckdb_lite.py +++ b/metadata-ingestion/src/datahub/lite/duckdb_lite.py @@ -760,15 +760,9 @@ def post_update_hook( entity_id=[str(data_platform_urn), data_platform_instance], ) self._create_edges_from_data_platform_instance(data_platform_instance_urn) - elif isinstance(aspect, ChartInfoClass): - urn = Urn.from_string(entity_urn) - self.add_edge( - entity_urn, - "name", - aspect.title + f" ({urn.get_entity_id()[-1]})", - remove_existing=True, - ) - elif isinstance(aspect, DashboardInfoClass): + elif isinstance(aspect, ChartInfoClass) or isinstance( + aspect, DashboardInfoClass + ): urn = Urn.from_string(entity_urn) self.add_edge( entity_urn, diff --git a/metadata-ingestion/src/datahub/sdk/dataset.py b/metadata-ingestion/src/datahub/sdk/dataset.py index 6d241627e58d19..2fff5adf25009d 100644 --- a/metadata-ingestion/src/datahub/sdk/dataset.py +++ b/metadata-ingestion/src/datahub/sdk/dataset.py @@ -72,9 +72,9 @@ def _parse_upstream_input( upstream_input: UpstreamInputType, ) -> Union[models.UpstreamClass, models.FineGrainedLineageClass]: - if isinstance(upstream_input, models.UpstreamClass): - return upstream_input - elif isinstance(upstream_input, models.FineGrainedLineageClass): + if isinstance(upstream_input, models.UpstreamClass) or isinstance( + upstream_input, models.FineGrainedLineageClass + ): return upstream_input elif isinstance(upstream_input, (str, DatasetUrn)): return models.UpstreamClass( diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py index 5010c0197e33a7..e6c37ab3bbee03 100644 --- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py +++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py @@ -583,7 +583,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - api_vs_response.update(override_data or {}) - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index bbcc6332539c02..d3ad481908323d 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -532,9 +532,10 @@ def setup_mock_explore_unaliased_with_joins(mocked_client): def setup_mock_explore( mocked_client: Any, - additional_lkml_fields: List[LookmlModelExploreField] = [], + additional_lkml_fields: Optional[List[LookmlModelExploreField]] = None, **additional_explore_fields: Any, ) -> None: + additional_lkml_fields = additional_lkml_fields or [] mock_model = mock.MagicMock(project_name="lkml_samples") mocked_client.lookml_model.return_value = mock_model diff --git a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py index 3ce62a20379594..a6cab7cdb69bb2 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py +++ b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py @@ -476,7 +476,7 @@ def register_mock_admin_api( api_vs_response.update(override_data) - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index ee68ae62a25a7b..ac5e4d2a8f6afc 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -103,7 +103,7 @@ def register_mock_api( api_vs_response.update(override_data or {}) - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, diff --git a/metadata-ingestion/tests/integration/powerbi/test_profiling.py b/metadata-ingestion/tests/integration/powerbi/test_profiling.py index aadf0f0f89c549..fb5ed77830af34 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_profiling.py +++ b/metadata-ingestion/tests/integration/powerbi/test_profiling.py @@ -264,7 +264,7 @@ def register_mock_admin_api( api_vs_response.update(override_data) - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, diff --git a/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py b/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py index 84f7a87ce5d2d0..3ba990f7fd5886 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py +++ b/metadata-ingestion/tests/integration/powerbi/test_stateful_ingestion.py @@ -106,7 +106,7 @@ def register_mock_api_state1(request_mock): }, } - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, @@ -177,7 +177,7 @@ def register_mock_api_state2(request_mock): }, } - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py index 83dcd76ea6f871..80cb9647d63013 100644 --- a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py +++ b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py @@ -117,7 +117,7 @@ def register_mock_api( api_vs_response.update(override_mock_data) - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, diff --git a/metadata-ingestion/tests/integration/qlik_sense/test_qlik_sense.py b/metadata-ingestion/tests/integration/qlik_sense/test_qlik_sense.py index 73f9eac41d9dcd..6c1a382f103cf7 100644 --- a/metadata-ingestion/tests/integration/qlik_sense/test_qlik_sense.py +++ b/metadata-ingestion/tests/integration/qlik_sense/test_qlik_sense.py @@ -559,7 +559,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - api_vs_response.update(override_data) - for url in api_vs_response.keys(): + for url in api_vs_response: if api_vs_response[url].get("response_list"): request_mock.register_uri( api_vs_response[url]["method"], diff --git a/metadata-ingestion/tests/integration/sigma/test_sigma.py b/metadata-ingestion/tests/integration/sigma/test_sigma.py index 1a3a2b33b10368..a28bacacc6b2dd 100644 --- a/metadata-ingestion/tests/integration/sigma/test_sigma.py +++ b/metadata-ingestion/tests/integration/sigma/test_sigma.py @@ -411,7 +411,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - api_vs_response.update(override_data) - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, diff --git a/metadata-ingestion/tests/integration/superset/test_superset.py b/metadata-ingestion/tests/integration/superset/test_superset.py index 9b6d5ce5e86a1f..435a754d2b3af4 100644 --- a/metadata-ingestion/tests/integration/superset/test_superset.py +++ b/metadata-ingestion/tests/integration/superset/test_superset.py @@ -391,7 +391,7 @@ def register_mock_api(request_mock: Any, override_data: Optional[dict] = None) - api_vs_response.update(override_data) - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, diff --git a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py index 9c7b86a275f6d0..8014aa41f13f41 100644 --- a/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py +++ b/metadata-ingestion/tests/integration/unity/test_unity_catalog_ingest.py @@ -33,7 +33,7 @@ def register_mock_api(request_mock): }, } - for url in api_vs_response.keys(): + for url in api_vs_response: request_mock.register_uri( api_vs_response[url]["method"], url, diff --git a/metadata-ingestion/tests/unit/bigquery/test_bigquery_profiler.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_profiler.py index fb5133b24474c2..8f5b9154e61828 100644 --- a/metadata-ingestion/tests/unit/bigquery/test_bigquery_profiler.py +++ b/metadata-ingestion/tests/unit/bigquery/test_bigquery_profiler.py @@ -67,7 +67,7 @@ def test_generate_day_partitioned_partition_profiler_query(): `date` BETWEEN TIMESTAMP('2020-01-01 00:00:00') AND TIMESTAMP('2020-01-02 00:00:00') """.strip() - assert "20200101" == query[0] + assert query[0] == "20200101" assert query[1] assert expected_query == query[1].strip() @@ -110,7 +110,7 @@ def test_generate_day_partitioned_partition_profiler_query_with_set_partition_ti `date` BETWEEN TIMESTAMP('2020-01-01 00:00:00') AND TIMESTAMP('2020-01-02 00:00:00') """.strip() - assert "20200101" == query[0] + assert query[0] == "20200101" assert query[1] assert expected_query == query[1].strip() @@ -153,7 +153,7 @@ def test_generate_hour_partitioned_partition_profiler_query(): `partition_column` BETWEEN TIMESTAMP('2020-01-01 03:00:00') AND TIMESTAMP('2020-01-01 04:00:00') """.strip() - assert "2020010103" == query[0] + assert query[0] == "2020010103" assert query[1] assert expected_query == query[1].strip() @@ -186,7 +186,7 @@ def test_generate_ingestion_partitioned_partition_profiler_query(): `_PARTITIONTIME` BETWEEN TIMESTAMP('2020-01-01 00:00:00') AND TIMESTAMP('2020-01-02 00:00:00') """.strip() - assert "20200101" == query[0] + assert query[0] == "20200101" assert query[1] assert expected_query == query[1].strip() @@ -208,5 +208,5 @@ def test_generate_sharded_table_profiler_query(): table=test_table, ) - assert "20200101" == query[0] + assert query[0] == "20200101" assert query[1] is None diff --git a/metadata-ingestion/tests/unit/bigquery/test_bigquery_source.py b/metadata-ingestion/tests/unit/bigquery/test_bigquery_source.py index b605e9b3f8a3e6..a61475f68546dd 100644 --- a/metadata-ingestion/tests/unit/bigquery/test_bigquery_source.py +++ b/metadata-ingestion/tests/unit/bigquery/test_bigquery_source.py @@ -848,7 +848,7 @@ def test_table_processing_logic( tables: Dict[str, TableListItem] = data_dictionary_mock.call_args_list[0][0][ 2 ] # alternatively - for table in tables.keys(): + for table in tables: assert table in ["test-table", "test-sharded-table_20220102"] @@ -924,7 +924,7 @@ def test_table_processing_logic_date_named_tables( tables: Dict[str, TableListItem] = data_dictionary_mock.call_args_list[0][0][ 2 ] # alternatively - for table in tables.keys(): + for table in tables: assert tables[table].table_id in ["test-table", "20220103"] diff --git a/metadata-ingestion/tests/unit/schema/test_json_schema_util.py b/metadata-ingestion/tests/unit/schema/test_json_schema_util.py index 34ccc3d4fb9225..bc4dd7de57f7e0 100644 --- a/metadata-ingestion/tests/unit/schema/test_json_schema_util.py +++ b/metadata-ingestion/tests/unit/schema/test_json_schema_util.py @@ -81,7 +81,7 @@ def json_schema_to_schema_fields(schema): ) def test_json_schema_to_events_with_nullable_fields(schema): fields = json_schema_to_schema_fields(schema) - assert 1 == len(fields) + assert len(fields) == 1 assert fields[0].nullable diff --git a/metadata-ingestion/tests/unit/serde/test_codegen.py b/metadata-ingestion/tests/unit/serde/test_codegen.py index 13fcf3d919cc03..e5f55b7041eb85 100644 --- a/metadata-ingestion/tests/unit/serde/test_codegen.py +++ b/metadata-ingestion/tests/unit/serde/test_codegen.py @@ -54,7 +54,7 @@ def test_key_aspect_info(): "entityCategory": "internal", "entityAspects": ["telemetryClientId"], } - assert TelemetryKeyClass.ASPECT_INFO == expected + assert expected == TelemetryKeyClass.ASPECT_INFO assert TelemetryKeyClass.get_aspect_info() == expected diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py index c3c3a4a15d915b..7811e6928246f1 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_utils.py @@ -17,7 +17,7 @@ def test_update_from_select(): - assert _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT == {"returning", "this"} + assert {"returning", "this"} == _UPDATE_ARGS_NOT_SUPPORTED_BY_SELECT def test_is_dialect_instance(): diff --git a/metadata-ingestion/tests/unit/test_athena_source.py b/metadata-ingestion/tests/unit/test_athena_source.py index f8b6220d182735..1a4a68d6e74485 100644 --- a/metadata-ingestion/tests/unit/test_athena_source.py +++ b/metadata-ingestion/tests/unit/test_athena_source.py @@ -211,8 +211,8 @@ def test_column_type_decimal(): result = CustomAthenaRestDialect()._get_column_type(type_="decimal(10,2)") assert isinstance(result, types.DECIMAL) - assert 10 == result.precision - assert 2 == result.scale + assert result.precision == 10 + assert result.scale == 2 def test_column_type_complex_combination(): diff --git a/metadata-ingestion/tests/unit/test_kafka_source.py b/metadata-ingestion/tests/unit/test_kafka_source.py index 1a8afe1b956fae..79d0eb1a594336 100644 --- a/metadata-ingestion/tests/unit/test_kafka_source.py +++ b/metadata-ingestion/tests/unit/test_kafka_source.py @@ -301,7 +301,7 @@ def test_kafka_source_workunits_schema_registry_subject_name_strategies( # Mock the kafka consumer mock_kafka_instance = mock_kafka_consumer.return_value mock_cluster_metadata = MagicMock() - mock_cluster_metadata.topics = {k: None for k in topic_subject_schema_map.keys()} + mock_cluster_metadata.topics = {k: None for k in topic_subject_schema_map} mock_cluster_metadata.topics["schema_less_topic"] = None mock_kafka_instance.list_topics.return_value = mock_cluster_metadata @@ -598,7 +598,7 @@ def test_kafka_source_topic_meta_mappings( # Mock the kafka consumer mock_kafka_instance = mock_kafka_consumer.return_value mock_cluster_metadata = MagicMock() - mock_cluster_metadata.topics = {k: None for k in topic_subject_schema_map.keys()} + mock_cluster_metadata.topics = {k: None for k in topic_subject_schema_map} mock_kafka_instance.list_topics.return_value = mock_cluster_metadata # Mock the schema registry client diff --git a/metadata-ingestion/tests/unit/test_protobuf_util.py b/metadata-ingestion/tests/unit/test_protobuf_util.py index 86418d2d97a59e..213b5f2ffbc975 100644 --- a/metadata-ingestion/tests/unit/test_protobuf_util.py +++ b/metadata-ingestion/tests/unit/test_protobuf_util.py @@ -20,7 +20,7 @@ def test_protobuf_schema_to_mce_fields_with_single_empty_message() -> None: ProtobufSchema("main_1.proto", schema) ) - assert 0 == len(fields) + assert len(fields) == 0 def test_protobuf_schema_to_mce_fields_with_single_message_single_field_key_schema() -> ( @@ -36,12 +36,12 @@ def test_protobuf_schema_to_mce_fields_with_single_message_single_field_key_sche fields: List[SchemaFieldClass] = protobuf_schema_to_mce_fields( ProtobufSchema("main_2.proto", schema), is_key_schema=True ) - assert 1 == len(fields) + assert len(fields) == 1 assert ( - "[version=2.0].[key=True].[type=Test2].[type=string].field_1" - == fields[0].fieldPath + fields[0].fieldPath + == "[version=2.0].[key=True].[type=Test2].[type=string].field_1" ) - assert "string" == fields[0].nativeDataType + assert fields[0].nativeDataType == "string" def test_protobuf_schema_to_mce_fields_with_two_messages_enum() -> None: @@ -67,15 +67,15 @@ def test_protobuf_schema_to_mce_fields_with_two_messages_enum() -> None: ProtobufSchema("main_3.proto", schema) ) - assert 5 == len(fields) - assert "[version=2.0].[type=Test3].Test3" == fields[0].fieldPath - assert "[version=2.0].[type=Test3].Test3.[type=enum].field_2" == fields[1].fieldPath + assert len(fields) == 5 + assert fields[0].fieldPath == "[version=2.0].[type=Test3].Test3" + assert fields[1].fieldPath == "[version=2.0].[type=Test3].Test3.[type=enum].field_2" assert ( - "[version=2.0].[type=Test3].Test3.[type=string].field_1" == fields[2].fieldPath + fields[2].fieldPath == "[version=2.0].[type=Test3].Test3.[type=string].field_1" ) - assert "[version=2.0].[type=Test4].Test4" == fields[3].fieldPath + assert fields[3].fieldPath == "[version=2.0].[type=Test4].Test4" assert ( - "[version=2.0].[type=Test4].Test4.[type=long].anInteger" == fields[4].fieldPath + fields[4].fieldPath == "[version=2.0].[type=Test4].Test4.[type=long].anInteger" ) @@ -101,21 +101,21 @@ def test_protobuf_schema_to_mce_fields_nested(): ProtobufSchema("main_4.proto", schema) ) - assert 4 == len(fields) - assert "[version=2.0].[type=Test5].[type=Test5_Nested1].f1" == fields[0].fieldPath + assert len(fields) == 4 + assert fields[0].fieldPath == "[version=2.0].[type=Test5].[type=Test5_Nested1].f1" assert ( - "[version=2.0].[type=Test5].[type=Test5_Nested1].f1.[type=Test5_Nested1_Nested2].f2" - == fields[1].fieldPath + fields[1].fieldPath + == "[version=2.0].[type=Test5].[type=Test5_Nested1].f1.[type=Test5_Nested1_Nested2].f2" ) assert ( - "[version=2.0].[type=Test5].[type=Test5_Nested1].f1.[type=Test5_Nested1_Nested2].f2.[type=Test5_Nested1_Nested2_Nested3].f3" - == fields[2].fieldPath + fields[2].fieldPath + == "[version=2.0].[type=Test5].[type=Test5_Nested1].f1.[type=Test5_Nested1_Nested2].f2.[type=Test5_Nested1_Nested2_Nested3].f3" ) assert ( - "[version=2.0].[type=Test5].[type=Test5_Nested1].f1.[type=Test5_Nested1_Nested2].f2.[type=Test5_Nested1_Nested2_Nested3].f3.[type=Test5_Nested1_Nested2_Nested3_Nested4].f4" - == fields[3].fieldPath + fields[3].fieldPath + == "[version=2.0].[type=Test5].[type=Test5_Nested1].f1.[type=Test5_Nested1_Nested2].f2.[type=Test5_Nested1_Nested2_Nested3].f3.[type=Test5_Nested1_Nested2_Nested3_Nested4].f4" ) - assert "Test5.Nested1.Nested2.Nested3.Nested4" == fields[3].nativeDataType + assert fields[3].nativeDataType == "Test5.Nested1.Nested2.Nested3.Nested4" def test_protobuf_schema_to_mce_fields_repeated() -> None: @@ -130,15 +130,15 @@ def test_protobuf_schema_to_mce_fields_repeated() -> None: ProtobufSchema("main_5.proto", schema) ) - assert 1 == len(fields) + assert len(fields) == 1 assert ( - "[version=2.0].[type=Test6].[type=array].[type=long].aList" - == fields[0].fieldPath + fields[0].fieldPath + == "[version=2.0].[type=Test6].[type=array].[type=long].aList" ) - assert "int64" == fields[0].nativeDataType + assert fields[0].nativeDataType == "int64" assert isinstance(fields[0].type.type, ArrayTypeClass) assert fields[0].type.type.nestedType is not None - assert "int64" == fields[0].type.type.nestedType[0] + assert fields[0].type.type.nestedType[0] == "int64" def test_protobuf_schema_to_mce_fields_nestd_repeated() -> None: @@ -157,19 +157,19 @@ def test_protobuf_schema_to_mce_fields_nestd_repeated() -> None: ProtobufSchema("main_6.proto", schema) ) - assert 2 == len(fields) + assert len(fields) == 2 assert ( - "[version=2.0].[type=Test7].[type=array].[type=Test7_Nested].aList" - == fields[0].fieldPath + fields[0].fieldPath + == "[version=2.0].[type=Test7].[type=array].[type=Test7_Nested].aList" ) assert ( - "[version=2.0].[type=Test7].[type=array].[type=Test7_Nested].aList.[type=string].name" - == fields[1].fieldPath + fields[1].fieldPath + == "[version=2.0].[type=Test7].[type=array].[type=Test7_Nested].aList.[type=string].name" ) - assert "Test7.Nested" == fields[0].nativeDataType + assert fields[0].nativeDataType == "Test7.Nested" assert isinstance(fields[0].type.type, ArrayTypeClass) assert fields[0].type.type.nestedType is not None - assert "Test7.Nested" == fields[0].type.type.nestedType[0] + assert fields[0].type.type.nestedType[0] == "Test7.Nested" # This is not how maps should be encoded but we need to find a good way of detecting @@ -192,34 +192,34 @@ def test_protobuf_schema_to_mce_fields_map() -> None: ProtobufSchema("main_7.proto", schema) ) - assert 7 == len(fields) + assert len(fields) == 7 assert ( - "[version=2.0].[type=Test8].[type=array].[type=Test8_Map1Entry].map_1" - == fields[0].fieldPath + fields[0].fieldPath + == "[version=2.0].[type=Test8].[type=array].[type=Test8_Map1Entry].map_1" ) assert ( - "[version=2.0].[type=Test8].[type=array].[type=Test8_Map1Entry].map_1.[type=long].value" - == fields[1].fieldPath + fields[1].fieldPath + == "[version=2.0].[type=Test8].[type=array].[type=Test8_Map1Entry].map_1.[type=long].value" ) assert ( - "[version=2.0].[type=Test8].[type=array].[type=Test8_Map1Entry].map_1.[type=string].key" - == fields[2].fieldPath + fields[2].fieldPath + == "[version=2.0].[type=Test8].[type=array].[type=Test8_Map1Entry].map_1.[type=string].key" ) assert ( - "[version=2.0].[type=Test8].[type=array].[type=Test8_Map2Entry].map_2" - == fields[3].fieldPath + fields[3].fieldPath + == "[version=2.0].[type=Test8].[type=array].[type=Test8_Map2Entry].map_2" ) assert ( - "[version=2.0].[type=Test8].[type=array].[type=Test8_Map2Entry].map_2.[type=Test8_Nested].value" - == fields[4].fieldPath + fields[4].fieldPath + == "[version=2.0].[type=Test8].[type=array].[type=Test8_Map2Entry].map_2.[type=Test8_Nested].value" ) assert ( - "[version=2.0].[type=Test8].[type=array].[type=Test8_Map2Entry].map_2.[type=Test8_Nested].value.[type=string].aString" - == fields[5].fieldPath + fields[5].fieldPath + == "[version=2.0].[type=Test8].[type=array].[type=Test8_Map2Entry].map_2.[type=Test8_Nested].value.[type=string].aString" ) assert ( - "[version=2.0].[type=Test8].[type=array].[type=Test8_Map2Entry].map_2.[type=string].key" - == fields[6].fieldPath + fields[6].fieldPath + == "[version=2.0].[type=Test8].[type=array].[type=Test8_Map2Entry].map_2.[type=string].key" ) @@ -256,39 +256,39 @@ def test_protobuf_schema_to_mce_fields_with_complex_schema() -> None: ProtobufSchema("main_8.proto", schema) ) - assert 10 == len(fields) - assert "[version=2.0].[type=Test10].Test10" == fields[0].fieldPath + assert len(fields) == 10 + assert fields[0].fieldPath == "[version=2.0].[type=Test10].Test10" assert ( - "[version=2.0].[type=Test10].Test10.[type=long].an_int_64_field" - == fields[1].fieldPath + fields[1].fieldPath + == "[version=2.0].[type=Test10].Test10.[type=long].an_int_64_field" ) - assert "[version=2.0].[type=Test9].Test9" == fields[2].fieldPath + assert fields[2].fieldPath == "[version=2.0].[type=Test9].Test9" assert ( - "[version=2.0].[type=Test9].Test9.[type=Test9_EmptyNested].emptyMsg" - == fields[3].fieldPath + fields[3].fieldPath + == "[version=2.0].[type=Test9].Test9.[type=Test9_EmptyNested].emptyMsg" ) assert ( - "[version=2.0].[type=Test9].Test9.[type=bool].boolean_field_1" - == fields[4].fieldPath + fields[4].fieldPath + == "[version=2.0].[type=Test9].Test9.[type=bool].boolean_field_1" ) assert ( - "[version=2.0].[type=Test9].Test9.[type=long].int64_field_1" - == fields[5].fieldPath + fields[5].fieldPath + == "[version=2.0].[type=Test9].Test9.[type=long].int64_field_1" ) assert ( - "[version=2.0].[type=Test9].Test9.[type=string].string_field_1" - == fields[6].fieldPath + fields[6].fieldPath + == "[version=2.0].[type=Test9].Test9.[type=string].string_field_1" ) assert ( - "[version=2.0].[type=Test9].Test9.[type=union].payload" == fields[7].fieldPath + fields[7].fieldPath == "[version=2.0].[type=Test9].Test9.[type=union].payload" ) assert ( - "[version=2.0].[type=Test9].Test9.[type=union].payload.[type=long].pl_2" - == fields[8].fieldPath + fields[8].fieldPath + == "[version=2.0].[type=Test9].Test9.[type=union].payload.[type=long].pl_2" ) assert ( - "[version=2.0].[type=Test9].Test9.[type=union].payload.[type=string].pl_1" - == fields[9].fieldPath + fields[9].fieldPath + == "[version=2.0].[type=Test9].Test9.[type=union].payload.[type=string].pl_1" ) diff --git a/metadata-ingestion/tests/unit/test_schema_util.py b/metadata-ingestion/tests/unit/test_schema_util.py index 0a111d700cf8ce..224238a3983236 100644 --- a/metadata-ingestion/tests/unit/test_schema_util.py +++ b/metadata-ingestion/tests/unit/test_schema_util.py @@ -129,7 +129,7 @@ def assert_field_paths_match( ) def test_avro_schema_to_mce_fields_events_with_nullable_fields(schema): fields = avro_schema_to_mce_fields(schema) - assert 1 == len(fields) + assert len(fields) == 1 assert fields[0].nullable @@ -762,7 +762,7 @@ def test_logical_types_fully_specified_in_type(): json.dumps(schema), default_nullable=True ) assert len(fields) == 1 - assert "[version=2.0].[type=test].[type=bytes].name" == fields[0].fieldPath + assert fields[0].fieldPath == "[version=2.0].[type=test].[type=bytes].name" assert isinstance(fields[0].type.type, NumberTypeClass) diff --git a/metadata-ingestion/tests/unit/test_sql_common.py b/metadata-ingestion/tests/unit/test_sql_common.py index cfb8f55bd977f7..6951b23a6dc403 100644 --- a/metadata-ingestion/tests/unit/test_sql_common.py +++ b/metadata-ingestion/tests/unit/test_sql_common.py @@ -45,12 +45,12 @@ def test_generate_foreign_key(): ) assert fk_dict.get("name") == foreign_key.name - assert [ + assert foreign_key.foreignFields == [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_referred_schema.test_table,PROD),test_referred_column)" - ] == foreign_key.foreignFields - assert [ + ] + assert foreign_key.sourceFields == [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_schema.base_urn,PROD),test_column)" - ] == foreign_key.sourceFields + ] def test_use_source_schema_for_foreign_key_if_not_specified(): @@ -69,12 +69,12 @@ def test_use_source_schema_for_foreign_key_if_not_specified(): ) assert fk_dict.get("name") == foreign_key.name - assert [ + assert foreign_key.foreignFields == [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_schema.test_table,PROD),test_referred_column)" - ] == foreign_key.foreignFields - assert [ + ] + assert foreign_key.sourceFields == [ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:TEST,test_schema.base_urn,PROD),test_column)" - ] == foreign_key.sourceFields + ] PLATFORM_FROM_SQLALCHEMY_URI_TEST_CASES: Dict[str, str] = { diff --git a/metadata-ingestion/tests/unit/urns/test_data_process_instance_urn.py b/metadata-ingestion/tests/unit/urns/test_data_process_instance_urn.py index f9087b19b13c32..128801862b45bb 100644 --- a/metadata-ingestion/tests/unit/urns/test_data_process_instance_urn.py +++ b/metadata-ingestion/tests/unit/urns/test_data_process_instance_urn.py @@ -18,4 +18,4 @@ def test_parse_urn(self) -> None: assert str(dataprocessinstance_urn) == dataprocessinstance_urn_str assert dataprocessinstance_urn == DataProcessInstanceUrn("abc") assert dataprocessinstance_urn == DataProcessInstanceUrn.create_from_id("abc") - assert "abc" == dataprocessinstance_urn.get_dataprocessinstance_id() + assert dataprocessinstance_urn.get_dataprocessinstance_id() == "abc"