diff --git a/metadata-ingestion/pyproject.toml b/metadata-ingestion/pyproject.toml index c309926dd96891..fab6f84da7b2ca 100644 --- a/metadata-ingestion/pyproject.toml +++ b/metadata-ingestion/pyproject.toml @@ -41,8 +41,7 @@ extend-ignore = [ "RUF012", # mutable-class-default; incompatible with pydantic "RUF015", # unnecessary-iterable-allocation-for-first-element # TODO: Enable these later - "B006", # Mutable args - "B904", # Checks for raise statements in exception handlers that lack a from clause + "B006", # Mutable args - 21 errors remain ] [tool.ruff.lint.mccabe] diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index 2eb0e270726346..e32de84307874b 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -231,7 +231,7 @@ def _docker_compose_v2() -> List[str]: # docker-compose v1 is not installed either. raise DockerComposeVersionError( "You don't have Docker Compose installed. Please install Docker Compose. See https://docs.docker.com/compose/install/.", - ) + ) from None def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None: diff --git a/metadata-ingestion/src/datahub/cli/lite_cli.py b/metadata-ingestion/src/datahub/cli/lite_cli.py index 5ef9f967b5e7db..5feee9188ece87 100644 --- a/metadata-ingestion/src/datahub/cli/lite_cli.py +++ b/metadata-ingestion/src/datahub/cli/lite_cli.py @@ -298,7 +298,7 @@ def search( except KeyError: raise click.UsageError( f"Failed to find a matching query flavor for {flavor}. Valid values are {[x.lower() for x in SearchFlavor._member_names_]}" - ) + ) from None catalog = _get_datahub_lite(read_only=True) # sanitize query result_ids = set() diff --git a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py index 857a6fbb4e18e5..4e970915c88311 100644 --- a/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py +++ b/metadata-ingestion/src/datahub/cli/specific/dataproduct_cli.py @@ -49,7 +49,7 @@ def _abort_if_non_existent_urn(graph: DataHubGraph, urn: str, operation: str) -> entity_type = parsed_urn.get_type() except Exception: click.secho(f"Provided urn {urn} does not seem valid", fg="red") - raise click.Abort() + raise click.Abort() from None else: if not graph.exists(urn): click.secho( diff --git a/metadata-ingestion/src/datahub/configuration/kafka.py b/metadata-ingestion/src/datahub/configuration/kafka.py index b8d9ff994a51ab..43215f7e9cd09c 100644 --- a/metadata-ingestion/src/datahub/configuration/kafka.py +++ b/metadata-ingestion/src/datahub/configuration/kafka.py @@ -44,7 +44,7 @@ def resolve_callback(cls, value: dict) -> dict: try: value = CallableConsumerConfig(value).callable_config() except Exception as e: - raise ConfigurationError(e) + raise ConfigurationError() from e return value diff --git a/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py b/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py index beec42724529e6..5848230d75be3e 100644 --- a/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py +++ b/metadata-ingestion/src/datahub/ingestion/fs/s3_fs.py @@ -48,12 +48,12 @@ def __init__( def __next__(self) -> FileInfo: try: return next(self._file_statuses) - except StopIteration: + except StopIteration as e: if self._token: self.fetch() return next(self._file_statuses) else: - raise StopIteration() + raise e def fetch(self): params = dict(Bucket=self._bucket, Prefix=self._prefix, MaxKeys=self._max_keys) diff --git a/metadata-ingestion/src/datahub/ingestion/graph/entity_versioning.py b/metadata-ingestion/src/datahub/ingestion/graph/entity_versioning.py index 3d2288fdc627ab..2dfd234d5e42d8 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/entity_versioning.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/entity_versioning.py @@ -93,7 +93,7 @@ def link_asset_to_version_set( try: return response["linkAssetVersion"]["urn"] except KeyError: - raise ValueError(f"Unexpected response: {response}") + raise ValueError(f"Unexpected response: {response}") from None def link_asset_to_versioned_asset( self, @@ -165,7 +165,7 @@ def unlink_asset_from_version_set(self, asset_urn: str) -> Optional[str]: try: return response["unlinkAssetVersion"]["urn"] except KeyError: - raise ValueError(f"Unexpected response: {response}") + raise ValueError(f"Unexpected response: {response}") from None def unlink_latest_asset_from_version_set( self, version_set_urn: str @@ -198,4 +198,4 @@ def unlink_latest_asset_from_version_set( try: return response["unlinkAssetVersion"]["urn"] except KeyError: - raise ValueError(f"Unexpected response: {response}") + raise ValueError(f"Unexpected response: {response}") from None diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py index 8ebb7b9ef7fbdf..3fd003cd358a79 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py +++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py @@ -640,8 +640,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: ) except Exception as e: raise ConfigurationError( - f"Cannot read remote file {self.config.filename}, error:{e}" - ) + f"Cannot read remote file {self.config.filename}" + ) from e else: with open(pathlib.Path(self.config.filename), encoding="utf-8-sig") as f: rows = list(csv.DictReader(f, delimiter=self.config.delimiter)) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py index cf2d9670400ca5..40ac6b247c0b7e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py @@ -271,12 +271,12 @@ def execute_query(self, query: str, timeout: int = 3600) -> List[Dict[str, Any]] self.cancel_query(job_id) raise DremioAPIException( f"Query execution timed out after {timeout} seconds" - ) + ) from None except RuntimeError as e: - raise DremioAPIException(f"{str(e)}") + raise DremioAPIException() from e except requests.RequestException as e: - raise DremioAPIException(f"Error executing query: {str(e)}") + raise DremioAPIException("Error executing query") from e def fetch_results(self, job_id: str) -> List[Dict]: """Fetch job results with status checking""" diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py index 3ba6ef142bc41f..78e42b25ff2c75 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka/kafka.py @@ -272,7 +272,7 @@ def create_schema_registry( return schema_registry_class.create(config, report) except Exception as e: logger.debug(e, exc_info=e) - raise ImportError(config.schema_registry_class) + raise ImportError(config.schema_registry_class) from e def __init__(self, config: KafkaSourceConfig, ctx: PipelineContext): super().__init__(config, ctx) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 5f39821ee6c2e3..f2f49ca71f5d58 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -501,7 +501,7 @@ def get_project_name(self, model_name: str) -> str: raise ValueError( f"Could not locate a project name for model {model_name}. Consider configuring a static project name " f"in your config file" - ) + ) from None def get_manifest_if_present(self, folder: pathlib.Path) -> Optional[LookerManifest]: manifest_file = folder / "manifest.lkml" diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index b546495f667d2a..2e7a77eae4e2a6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -1494,7 +1494,7 @@ def get_request(): sleep_time = error_response.headers.get("retry-after") if sleep_time is not None: time.sleep(float(sleep_time)) - raise HTTPError429 + raise HTTPError429 from None raise http_error diff --git a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py index 738c9bad0cb885..e9ef6fa8fa1af2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py +++ b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py @@ -230,8 +230,8 @@ def _get_pulsar_metadata(self, url): self.report.report_warning("HTTPError", message) except requests.exceptions.RequestException as e: raise Exception( - f"An ambiguous exception occurred while handling the request: {e}" - ) + "An ambiguous exception occurred while handling the request" + ) from e @classmethod def create(cls, config_dict, ctx): diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py index 0468792f44aabb..1a5a0bef893318 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma.py @@ -124,7 +124,7 @@ def __init__(self, config: SigmaSourceConfig, ctx: PipelineContext): try: self.sigma_api = SigmaAPI(self.config, self.reporter) except Exception as e: - raise ConfigurationError(f"Unable to connect sigma API. Exception: {e}") + raise ConfigurationError("Unable to connect sigma API") from e @staticmethod def test_connection(config_dict: dict) -> TestConnectionReport: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py index 2854a99198d62b..8f28a7d2e74615 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_connection.py @@ -312,7 +312,7 @@ def get_oauth_connection(self) -> NativeSnowflakeConnection: raise ValueError( f"access_token not found in response {response}. " "Please check your OAuth configuration." - ) + ) from None connect_args = self.get_options()["connect_args"] return snowflake.connector.connect( user=self.username, diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 9025448573eb38..07d63a0787d97e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -1562,8 +1562,9 @@ def get_connection_objects( query: str, connection_type: str, page_size: int, - query_filter: dict = {}, + query_filter: Optional[dict] = None, ) -> Iterable[dict]: + query_filter = query_filter or {} query_filter = optimize_query_filter(query_filter) # Calls the get_connection_object_page function to get the objects, diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py index b9dce0e189ab8d..d495eec185d2e0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py @@ -514,7 +514,8 @@ class MetadataQueryException(Exception): } -def get_tags_from_params(params: List[str] = []) -> GlobalTagsClass: +def get_tags_from_params(params: Optional[List[str]] = None) -> GlobalTagsClass: + params = params or [] tags = [ TagAssociationClass(tag=builder.make_tag_urn(tag.upper())) for tag in params diff --git a/metadata-ingestion/src/datahub/lite/duckdb_lite.py b/metadata-ingestion/src/datahub/lite/duckdb_lite.py index fe025842822b13..c3e2ccc75b2db7 100644 --- a/metadata-ingestion/src/datahub/lite/duckdb_lite.py +++ b/metadata-ingestion/src/datahub/lite/duckdb_lite.py @@ -284,9 +284,10 @@ def search( self, query: str, flavor: SearchFlavor, - aspects: List[str] = [], + aspects: Optional[List[str]] = None, snippet: bool = True, ) -> Iterable[Searchable]: + aspects = aspects or [] if flavor == SearchFlavor.FREE_TEXT: base_query = f"SELECT distinct(urn), 'urn', NULL from metadata_aspect_v2 where urn ILIKE '%{query}%' UNION SELECT urn, aspect_name, metadata from metadata_aspect_v2 where metadata->>'$.name' ILIKE '%{query}%'" for r in self.duckdb_client.execute(base_query).fetchall(): diff --git a/metadata-ingestion/src/datahub/lite/lite_local.py b/metadata-ingestion/src/datahub/lite/lite_local.py index d767bbcec46215..384fb51976c486 100644 --- a/metadata-ingestion/src/datahub/lite/lite_local.py +++ b/metadata-ingestion/src/datahub/lite/lite_local.py @@ -90,7 +90,7 @@ def search( self, query: str, flavor: SearchFlavor, - aspects: List[str] = [], + aspects: Optional[List[str]] = None, snippet: bool = True, ) -> Iterable[Searchable]: pass diff --git a/metadata-ingestion/src/datahub/lite/lite_util.py b/metadata-ingestion/src/datahub/lite/lite_util.py index b1631e233fa1a9..833a2c027aad28 100644 --- a/metadata-ingestion/src/datahub/lite/lite_util.py +++ b/metadata-ingestion/src/datahub/lite/lite_util.py @@ -70,9 +70,10 @@ def search( self, query: str, flavor: SearchFlavor, - aspects: List[str] = [], + aspects: Optional[List[str]] = None, snippet: bool = True, ) -> Iterable[Searchable]: + aspects = aspects or [] yield from self.lite.search(query, flavor, aspects, snippet) def ls(self, path: str) -> List[Browseable]: @@ -96,10 +97,10 @@ def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLite lite_type = lite_local_config.type try: lite_class = lite_registry.get(lite_type) - except KeyError: + except KeyError as e: raise Exception( f"Failed to find a registered lite implementation for {lite_type}. Valid values are {[k for k in lite_registry.mapping.keys()]}" - ) + ) from e lite_specific_config = lite_class.get_config_class().parse_obj( lite_local_config.config diff --git a/metadata-ingestion/src/datahub/utilities/memory_footprint.py b/metadata-ingestion/src/datahub/utilities/memory_footprint.py index f5f5fdf54ff9b7..e5c76a89491b66 100644 --- a/metadata-ingestion/src/datahub/utilities/memory_footprint.py +++ b/metadata-ingestion/src/datahub/utilities/memory_footprint.py @@ -1,10 +1,10 @@ from collections import deque from itertools import chain from sys import getsizeof -from typing import Any, Iterator +from typing import Any, Iterator, Optional -def total_size(o: Any, handlers: Any = {}) -> int: +def total_size(o: Any, handlers: Optional[Any] = None) -> int: """Returns the approximate memory footprint an object and all of its contents. Automatically finds the contents of the following builtin containers and their subclasses: tuple, list, deque, dict, set and frozenset. @@ -14,6 +14,7 @@ def total_size(o: Any, handlers: Any = {}) -> int: Based on https://github.com/ActiveState/recipe-577504-compute-mem-footprint/blob/master/recipe.py """ + handlers = handlers or {} def dict_handler(d: dict) -> Iterator[Any]: return chain.from_iterable(d.items()) diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py index 2dd320041a1132..40a6e06ebe881a 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py +++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py @@ -54,7 +54,8 @@ def random_email(): ) -def recipe(mcp_output_path: str, source_config_override: dict = {}) -> dict: +def recipe(mcp_output_path: str, source_config_override: Optional[dict] = None) -> dict: + source_config_override = source_config_override or {} return { "source": { "type": "bigquery",