Skip to content

Commit

Permalink
dev: enable ruff rule (#12749)
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Feb 28, 2025
1 parent c4e5a9e commit 9c3bd34
Show file tree
Hide file tree
Showing 63 changed files with 204 additions and 205 deletions.
18 changes: 16 additions & 2 deletions metadata-ingestion/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ extend-select = [
"I", # isort
"TID", # flake8-tidy-imports
"RUF100", # unused-noqa
"SIM", # flake8-simplify
]
extend-ignore = [
"E501", # Handled by formatter
Expand All @@ -40,8 +41,21 @@ extend-ignore = [
"B008", # Allow function call in argument defaults
"RUF012", # mutable-class-default; incompatible with pydantic
"RUF015", # unnecessary-iterable-allocation-for-first-element
# TODO: Enable these later
"B006", # Mutable args

# Can be enabled later if someone wants to fix existing cases or an auto-fix becomes available
"SIM101", # Multiple isinstance calls for {name}, merge into a single call
"SIM102", # Use a single `if` statement instead of nested `if` statements
"SIM103", # Return the condition directly
"SIM105", # Use `contextlib.suppress(...)` instead of `try`-`except`-`pass`
"SIM108", # Use ternary operator {contents} instead of if-else-block
"SIM110", # Use `return any(re.match(regex_pattern, path, re.IGNORECASE) for path in paths)` instead of `for` loop
"SIM113", # Use enumerate() for index variable {index} in for loop
"SIM115", # Use a context manager for opening files
"SIM116", # Use a dictionary instead of consecutive `if` statements
"SIM117", # Use a single with statement with multiple contexts instead of nested with statements
"SIM118", # Use key {operator} dict instead of key {operator} dict.keys()
"SIM210", # Use `bool(...)` instead of `True if ... else False`
"SIM401", # Use `sample_data.get(schema_field.fieldPath, [])` instead of an `if` block
]

[tool.ruff.lint.mccabe]
Expand Down
4 changes: 1 addition & 3 deletions metadata-ingestion/src/datahub/configuration/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ class GitReference(ConfigModel):

@validator("repo", pre=True)
def simplify_repo_url(cls, repo: str) -> str:
if repo.startswith("github.com/"):
repo = f"https://{repo}"
elif repo.startswith("gitlab.com"):
if repo.startswith("github.com/") or repo.startswith("gitlab.com"):
repo = f"https://{repo}"
elif repo.count("/") == 1:
repo = f"https://github.com/{repo}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def get_columns_to_classify(
),
values=(
sample_data[schema_field.fieldPath]
if schema_field.fieldPath in sample_data.keys()
if schema_field.fieldPath in sample_data
else []
),
)
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/src/datahub/ingestion/graph/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def get_aspect_v2(
aspect_type_name: Optional[str] = None,
version: int = 0,
) -> Optional[Aspect]:
assert aspect_type.ASPECT_NAME == aspect
assert aspect == aspect_type.ASPECT_NAME
return self.get_aspect(
entity_urn=entity_urn,
aspect_type=aspect_type,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,7 @@ def _convert_sets_to_lists(obj: Any) -> Any:
key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value)
for key, value in obj.items()
}
elif isinstance(obj, list):
return [
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
for element in obj
]
elif isinstance(obj, set):
elif isinstance(obj, list) or isinstance(obj, set):
return [
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
for element in obj
Expand Down
6 changes: 2 additions & 4 deletions metadata-ingestion/src/datahub/ingestion/source/abs/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,8 @@ def check_path_specs_and_infer_platform(
return path_specs

@pydantic.validator("platform", always=True)
def platform_not_empty(cls, platform: str, values: dict) -> str:
inferred_platform = values.get(
"platform", None
) # we may have inferred it above
def platform_not_empty(cls, platform: Any, values: dict) -> str:
inferred_platform = values.get("platform") # we may have inferred it above
platform = platform or inferred_platform
if not platform:
raise ValueError("platform must not be empty")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def from_bigquery_table(cls, table: BigqueryTableIdentifier) -> "BigQueryTableRe
@classmethod
def from_spec_obj(cls, spec: dict) -> "BigQueryTableRef":
for key in ["projectId", "datasetId", "tableId"]:
if key not in spec.keys():
if key not in spec:
raise ValueError(f"invalid BigQuery table reference dict: {spec}")

return cls(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def get_tables_for_dataset(
with_partitions: bool = False,
) -> Iterator[BigqueryTable]:
with PerfTimer() as current_timer:
filter_clause: str = ", ".join(f"'{table}'" for table in tables.keys())
filter_clause: str = ", ".join(f"'{table}'" for table in tables)

if with_partitions:
query_template = BigqueryQuery.tables_for_dataset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def get_resource_description_work_unit(
"datajob": EditableDataJobPropertiesClass,
"dataflow": EditableDataFlowPropertiesClass,
"notebook": EditableNotebookPropertiesClass,
}.get(entityType, None)
}.get(entityType)

if not entityClass:
raise ValueError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,7 @@ def add_node_to_cll_list(dbt_name: str) -> None:
cll_nodes.add(dbt_name)
schema_nodes.add(dbt_name)

for dbt_name in all_nodes_map.keys():
for dbt_name in all_nodes_map:
if self._is_allowed_node(dbt_name):
add_node_to_cll_list(dbt_name)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
# Delete empty dataflows if needed
if self.config.delete_empty_data_flows:
deleted_data_flows: int = 0
for key in dataFlows.keys():
for key in dataFlows:
if not dataJobs.get(key) or len(dataJobs[key]) == 0:
logger.info(
f"Deleting dataflow {key} because there are not datajobs"
Expand Down
25 changes: 11 additions & 14 deletions metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,10 @@ def get_column_unique_count_dh_patch(self: SqlAlchemyDataset, column: str) -> in
).select_from(self._table)
)
return convert_to_json_serializable(element_values.fetchone()[0])
elif self.engine.dialect.name.lower() == BIGQUERY:
element_values = self.engine.execute(
sa.select(sa.func.APPROX_COUNT_DISTINCT(sa.column(column))).select_from(
self._table
)
)
return convert_to_json_serializable(element_values.fetchone()[0])
elif self.engine.dialect.name.lower() == SNOWFLAKE:
elif (
self.engine.dialect.name.lower() == BIGQUERY
or self.engine.dialect.name.lower() == SNOWFLAKE
):
element_values = self.engine.execute(
sa.select(sa.func.APPROX_COUNT_DISTINCT(sa.column(column))).select_from(
self._table
Expand Down Expand Up @@ -381,13 +377,14 @@ def _get_columns_to_profile(self) -> List[str]:
col = col_dict["name"]
self.column_types[col] = str(col_dict["type"])
# We expect the allow/deny patterns to specify '<table_pattern>.<column_pattern>'
if not self.config._allow_deny_patterns.allowed(
f"{self.dataset_name}.{col}"
if (
not self.config._allow_deny_patterns.allowed(
f"{self.dataset_name}.{col}"
)
or not self.config.profile_nested_fields
and "." in col
):
ignored_columns_by_pattern.append(col)
# We try to ignore nested columns as well
elif not self.config.profile_nested_fields and "." in col:
ignored_columns_by_pattern.append(col)
elif col_dict.get("type") and self._should_ignore_column(col_dict["type"]):
ignored_columns_by_type.append(col)
else:
Expand Down Expand Up @@ -1408,7 +1405,7 @@ def _get_ge_dataset(
},
)

if platform == BIGQUERY or platform == DATABRICKS:
if platform in (BIGQUERY, DATABRICKS):
# This is done as GE makes the name as DATASET.TABLE
# but we want it to be PROJECT.DATASET.TABLE instead for multi-project setups
name_parts = pretty_name.split(".")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -568,9 +568,7 @@ def _filter_okta_user(self, okta_user: User) -> bool:
if (
self.config.include_deprovisioned_users is False
and okta_user.status == UserStatus.DEPROVISIONED
):
return False
elif (
) or (
self.config.include_suspended_users is False
and okta_user.status == UserStatus.SUSPENDED
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -447,13 +447,10 @@ def get_parser(
) -> DebeziumParser:
connector_class = connector_manifest.config.get(CONNECTOR_CLASS, "")

if connector_class == "io.debezium.connector.mysql.MySqlConnector":
parser = self.DebeziumParser(
source_platform="mysql",
server_name=self.get_server_name(connector_manifest),
database_name=None,
)
elif connector_class == "MySqlConnector":
if (
connector_class == "io.debezium.connector.mysql.MySqlConnector"
or connector_class == "MySqlConnector"
):
parser = self.DebeziumParser(
source_platform="mysql",
server_name=self.get_server_name(connector_manifest),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,9 @@ def lookml_model_explore(self, model: str, explore_name: str) -> LookmlModelExpl
def folder_ancestors(
self,
folder_id: str,
fields: Union[str, List[str]] = ["id", "name", "parent_id"],
fields: Optional[Union[str, List[str]]] = None,
) -> Sequence[Folder]:
fields = fields or ["id", "name", "parent_id"]
self.client_stats.folder_calls += 1
try:
return self.client.folder_ancestors(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,9 +464,10 @@ def process_lookml_template_language(
source_config: LookMLSourceConfig,
view_lkml_file_dict: dict,
reporter: LookMLSourceReport,
manifest_constants: Dict[str, "LookerConstant"] = {},
manifest_constants: Optional[Dict[str, "LookerConstant"]] = None,
resolve_constants: bool = False,
) -> None:
manifest_constants = manifest_constants or {}
if "views" not in view_lkml_file_dict:
return

Expand Down Expand Up @@ -507,9 +508,10 @@ def load_and_preprocess_file(
path: Union[str, pathlib.Path],
source_config: LookMLSourceConfig,
reporter: LookMLSourceReport,
manifest_constants: Dict[str, "LookerConstant"] = {},
manifest_constants: Optional[Dict[str, "LookerConstant"]] = None,
resolve_constants: bool = False,
) -> dict:
manifest_constants = manifest_constants or {}
parsed = load_lkml(path)

process_lookml_template_language(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1006,8 +1006,9 @@ def gen_project_workunits(self, project_name: str) -> Iterable[MetadataWorkUnit]
def report_skipped_unreachable_views(
self,
viewfile_loader: LookerViewFileLoader,
processed_view_map: Dict[str, Set[str]] = {},
processed_view_map: Optional[Dict[str, Set[str]]] = None,
) -> None:
processed_view_map = processed_view_map or {}
view_files: Dict[str, List[pathlib.Path]] = {}
for project, folder_path in self.base_projects_folder.items():
folder = pathlib.Path(folder_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def get_properties(self, record: dict) -> str:
return record["properties"]

def get_relationships(self, record: dict) -> dict:
return record.get("relationships", None)
return record.get("relationships", {})

def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
return [
Expand Down
9 changes: 6 additions & 3 deletions metadata-ingestion/src/datahub/ingestion/source/nifi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,11 +1234,14 @@ def construct_job_workunits(
job_type: str,
description: Optional[str],
job_properties: Optional[Dict[str, str]] = None,
inlets: List[str] = [],
outlets: List[str] = [],
inputJobs: List[str] = [],
inlets: Optional[List[str]] = None,
outlets: Optional[List[str]] = None,
inputJobs: Optional[List[str]] = None,
status: Optional[str] = None,
) -> Iterable[MetadataWorkUnit]:
inlets = inlets or []
outlets = outlets or []
inputJobs = inputJobs or []
logger.debug(f"Begining construction of job workunit for {job_urn}")
if job_properties:
job_properties = {k: v for k, v in job_properties.items() if v is not None}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
Try to determine if example data is defined for the endpoint, and return it
"""
data = {}
if "content" in base_res.keys():
if "content" in base_res:
res_cont = base_res["content"]
if "application/json" in res_cont.keys():
ex_field = None
Expand All @@ -188,7 +188,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
)
elif "text/csv" in res_cont.keys():
data = res_cont["text/csv"]["schema"]
elif "examples" in base_res.keys():
elif "examples" in base_res:
data = base_res["examples"]["application/json"]

return data
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import importlib.resources as pkg_resource
import logging
import os
from typing import Dict, List
from typing import Dict, List, Optional

import lark
from lark import Lark, Tree
Expand Down Expand Up @@ -65,8 +65,9 @@ def get_upstream_tables(
platform_instance_resolver: AbstractDataPlatformInstanceResolver,
ctx: PipelineContext,
config: PowerBiDashboardSourceConfig,
parameters: Dict[str, str] = {},
parameters: Optional[Dict[str, str]] = None,
) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]:
parameters = parameters or {}
if table.expression is None:
logger.debug(f"There is no M-Query expression in table {table.full_name}")
return []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,14 @@ def internal(node: Union[Tree, Token]) -> Optional[Tree]:
return expression_tree


def token_values(tree: Tree, parameters: Dict[str, str] = {}) -> List[str]:
def token_values(tree: Tree, parameters: Optional[Dict[str, str]] = None) -> List[str]:
"""
:param tree: Tree to traverse
:param parameters: If parameters is not an empty dict, it will try to resolve identifier variable references
using the values in 'parameters'.
:return: List of leaf token data
"""
parameters = parameters or {}
values: List[str] = []

def internal(node: Union[Tree, Token]) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -890,9 +890,7 @@ def to_datahub_users(
set(user_rights) & set(self.__config.ownership.owner_criteria)
)
> 0
):
user_mcps.extend(self.to_datahub_user(user))
elif self.__config.ownership.owner_criteria is None:
) or self.__config.ownership.owner_criteria is None:
user_mcps.extend(self.to_datahub_user(user))
else:
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,9 @@ def get_tiles(self, workspace: Workspace, dashboard: Dashboard) -> List[Tile]:
def itr_pages(
self,
endpoint: str,
parameter_override: Dict = {},
parameter_override: Optional[Dict] = None,
) -> Iterator[List[Dict]]:
parameter_override = parameter_override or {}
params: dict = {
"$skip": 0,
"$top": self.TOP,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def get_all_reports(self) -> List[Any]:
}

reports: List[Any] = []
for report_type in report_types_mapping.keys():
for report_type in report_types_mapping:
report_get_endpoint: str = API_ENDPOINTS[report_type]
# Replace place holders
report_get_endpoint_http = report_get_endpoint.format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ def __init__(self, tenant_hostname: str, api_key: str, app_id: str) -> None:
self.handle = [-1]

def _build_websocket_request_dict(
self, method: str, params: Union[Dict, List] = {}
self, method: str, params: Optional[Union[Dict, List]] = None
) -> Dict:
params = params or {}
return {
"jsonrpc": "2.0",
"id": self.request_id,
Expand All @@ -37,11 +38,12 @@ def _send_request(self, request: Dict) -> Dict:
return {}

def websocket_send_request(
self, method: str, params: Union[Dict, List] = {}
self, method: str, params: Optional[Union[Dict, List]] = None
) -> Dict:
"""
Method to send request to websocket
"""
params = params or {}
self.request_id += 1
request = self._build_websocket_request_dict(method, params)
response = self._send_request(request=request)
Expand Down
Loading

0 comments on commit 9c3bd34

Please sign in to comment.