diff --git a/.pylintrc b/.pylintrc index 323b0dbc..23731843 100644 --- a/.pylintrc +++ b/.pylintrc @@ -19,6 +19,19 @@ max-parents=8 # "TODO():", "FIXME:", or anything else. notes=FIXME,XXX +[TYPECHECK] +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +# +# TODO(sergiitk): remove when astroid upgraded to astroid 2.15.5, see +# https://pylint.pycqa.org/projects/astroid/en/latest/changelog.html#what-s-new-in-astroid-2-15-5 +# > Recognize stub pyi Python files. +ignored-modules= + protos.grpc.testing.messages_pb2, + protos.grpc.testing.empty_pb2, + [MESSAGES CONTROL] disable= diff --git a/bin/pylint.sh b/bin/pylint.sh new file mode 100755 index 00000000..3faba4de --- /dev/null +++ b/bin/pylint.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Copyright 2024 gRPC authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eo pipefail + +SCRIPT_DIR="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" +readonly SCRIPT_DIR +XDS_K8S_DRIVER_DIR="$( cd -- "$(dirname "${SCRIPT_DIR}")" >/dev/null 2>&1 ; pwd -P )" +readonly XDS_K8S_DRIVER_DIR + +# TODO(sergiitk): Once upgraded to be compatible with requirements-dev.txt, use the default venv. +display_usage() { + cat </dev/stderr +A helper to run black pylint. + +USAGE: + ./bin/pylint.sh + +ONE-TIME INSTALLATION: + 1. python3.9 -m venv --upgrade-deps venv-pylint + 2. source ./venv-pylint/bin/activate + 3. pip install pylint==2.2.2 astroid==2.3.3 toml==0.10.2 "isort>=4.3.0,<5.0.0" + 4. deactivate + +ENVIRONMENT: + XDS_K8S_DRIVER_PYLINT_VENV_DIR: the path to python virtual environment directory + Default: $XDS_K8S_DRIVER_DIR/venv-pylint +EOF + exit 1 +} + +if [[ "$1" == "-h" || "$1" == "--help" ]]; then + display_usage +fi + +cd "${XDS_K8S_DRIVER_DIR}" + +# Relative paths not yet supported by shellcheck. +# shellcheck source=/dev/null +XDS_K8S_DRIVER_VENV_DIR="${XDS_K8S_DRIVER_PYLINT_VENV_DIR:-$XDS_K8S_DRIVER_DIR/venv-pylint}" \ + source "${XDS_K8S_DRIVER_DIR}/bin/ensure_venv.sh" + +EXIT=0 +set -x +pylint -rn 'bin' 'framework' || EXIT=1 +pylint --rcfile=./tests/.pylintrc -rn 'tests' || EXIT=1 +exit "${EXIT}" diff --git a/config/common.cfg b/config/common.cfg index af8f3462..6d022748 100644 --- a/config/common.cfg +++ b/config/common.cfg @@ -1,5 +1,5 @@ --resource_prefix=psm-interop ---td_bootstrap_image=gcr.io/grpc-testing/td-grpc-bootstrap:7d8d90477792e2e1bfe3a3da20b3dc9ef01d326c +--td_bootstrap_image=gcr.io/grpc-testing/td-grpc-bootstrap:2bf1b5ed00f852ffea8d24759c6fa673acc9ef10 # The canonical implementation of the xDS test server. # Can be used in tests where language-specific xDS test server does not exist, diff --git a/framework/test_cases/base_testcase.py b/framework/test_cases/base_testcase.py index f2dbcab4..0a0973eb 100644 --- a/framework/test_cases/base_testcase.py +++ b/framework/test_cases/base_testcase.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """Base test case used for xds test suites.""" - -from typing import Optional +import inspect +import traceback +from typing import Optional, Union import unittest from absl import logging @@ -21,6 +22,7 @@ class BaseTestCase(absltest.TestCase): + # @override def run(self, result: Optional[unittest.TestResult] = None) -> None: super().run(result) # TODO(sergiitk): should this method be returning result? See @@ -44,10 +46,8 @@ def run(self, result: Optional[unittest.TestResult] = None) -> None: self.test_name, f" | Errors count: {total_errors}" if total_errors > 1 else "", ) - if test_errors: - self._print_error_list(test_errors, is_unexpected_error=True) - if test_failures: - self._print_error_list(test_failures) + self._log_test_errors(test_errors, is_unexpected=True) + self._log_test_errors(test_failures) elif test_unexpected_successes: logging.error( "----- PSM Test Case UNEXPECTEDLY SUCCEEDED: %s -----\n", @@ -85,22 +85,75 @@ def test_name(self) -> str: """ return self.id().removeprefix("__main__.").split(" ", 1)[0] - def _print_error_list( - self, errors: list[str], is_unexpected_error: bool = False + def _log_test_errors(self, errors: list[str], is_unexpected: bool = False): + for err in errors: + self._log_framed_test_failure(self.test_name, err, is_unexpected) + + @classmethod + def _log_class_hook_failure(cls, error: Exception): + """ + Log error helper for failed unittest hooks, e.g. setUpClass. + + Normally we don't want to make external calls in setUpClass. + But when we do, we want to wrap them into try/except, and call + _log_class_hook_failure, so the error is logged in our standard format. + Don't forget to re-raise! + + Example: + @classmethod + def setUpClass(cls): + try: + # Making bad external calls that end up raising + raise OSError("Network bad!") + except Exception as error: # noqa pylint: disable=broad-except + cls._log_class_hook_failure(error) + raise + """ + caller: str + try: + caller_info: inspect.FrameInfo = inspect.stack()[1] + caller: str = caller_info.function + except (IndexError, AttributeError): + caller = "undefined_hook" + + fake_test_id = f"{cls.__name__}.{caller}" + # The same test name transformation as in self.test_name(). + # TODO(sergiitk): move the transformation to a classmethod. + test_name = fake_test_id.removeprefix("__main__.").split(" ", 1)[0] + logging.error("----- PSM Test Case FAILED: %s -----", test_name) + cls._log_framed_test_failure(test_name, error, is_unexpected=True) + + @classmethod + def _log_framed_test_failure( + cls, + test_name: str, + error: Union[str, Exception], + is_unexpected: bool = False, ) -> None: + trace: str + if isinstance(error, Exception): + trace = cls._format_error_with_trace(error) + else: + trace = error + # FAILURE is an errors explicitly signalled using one of the # TestCase.assert*() methods, while ERROR means an unexpected exception. - fail_type: str = "ERROR" if is_unexpected_error else "FAILURE" - for err in errors: - logging.error( - "(%(fail_type)s) PSM Interop Test Failed: %(test_id)s" - "\n^^^^^" - "\n[%(test_id)s] PSM Failed Test Traceback BEGIN" - "\n%(error)s" - "[%(test_id)s] PSM Failed Test Traceback END\n", - { - "test_id": self.test_name, - "fail_type": fail_type, - "error": err, - }, - ) + fail_type: str = "ERROR" if is_unexpected else "FAILURE" + logging.error( + "(%(fail_type)s) PSM Interop Test Failed: %(test_id)s" + "\n^^^^^" + "\n[%(test_id)s] PSM Failed Test Traceback BEGIN" + "\n%(error)s" + "[%(test_id)s] PSM Failed Test Traceback END\n", + { + "test_id": test_name, + "fail_type": fail_type, + "error": trace, + }, + ) + + @classmethod + def _format_error_with_trace(cls, error: Exception) -> str: + return "".join( + traceback.TracebackException.from_exception(error).format() + ) diff --git a/framework/xds_url_map_testcase.py b/framework/xds_url_map_testcase.py index 25ef9ee9..c1abd85e 100644 --- a/framework/xds_url_map_testcase.py +++ b/framework/xds_url_map_testcase.py @@ -380,27 +380,33 @@ def setUpClass(cls): # whether setUpClass failed. cls.addClassCleanup(cls.cleanupAfterTests) - if not cls.started_test_cases: - # Create the GCP resource once before the first test start - GcpResourceManager().setup(cls.test_case_classes) - cls.started_test_cases.add(cls.__name__) - - # Create the test case's own client runner with it's own namespace, - # enables concurrent running with other test cases. - cls.test_client_runner = ( - GcpResourceManager().create_test_client_runner() - ) - # Start the client, and allow the test to override the initial RPC config. - rpc, metadata = cls.client_init_config( - rpc="UnaryCall,EmptyCall", metadata="" - ) - cls.test_client = cls.test_client_runner.run( - server_target=f"xds:///{cls.hostname()}", - rpc=rpc, - metadata=metadata, - qps=QPS.value, - print_response=True, - ) + # Normally we don't want to make external calls in setUpClass. + try: + if not cls.started_test_cases: + # Create the GCP resource once before the first test start + GcpResourceManager().setup(cls.test_case_classes) + cls.started_test_cases.add(cls.__name__) + + # Create the test case's own client runner with it's own namespace, + # enables concurrent running with other test cases. + cls.test_client_runner = ( + GcpResourceManager().create_test_client_runner() + ) + # Start the client, and allow the test to override the initial + # RPC config. + rpc, metadata = cls.client_init_config( + rpc="UnaryCall,EmptyCall", metadata="" + ) + cls.test_client = cls.test_client_runner.run( + server_target=f"xds:///{cls.hostname()}", + rpc=rpc, + metadata=metadata, + qps=QPS.value, + print_response=True, + ) + except Exception as error: # noqa pylint: disable=broad-except + cls._log_class_hook_failure(error) + raise @classmethod def cleanupAfterTests(cls): diff --git a/requirements.lock b/requirements.lock index 2e45a34a..06b22cf5 100644 --- a/requirements.lock +++ b/requirements.lock @@ -4,10 +4,10 @@ absl-py==0.15.0 google-api-python-client==1.12.11 google-cloud-secret-manager==2.15.1 google-cloud-monitoring==2.18.0 -grpcio==1.57.0 -grpcio-health-checking==1.57.0 -grpcio-tools==1.57.0 -grpcio-channelz==1.57.0 +grpcio==1.60.1 +grpcio-health-checking==1.60.1 +grpcio-tools==1.60.1 +grpcio-channelz==1.60.1 kubernetes==27.2.0 six==1.16.0 tenacity==6.3.1 @@ -25,7 +25,7 @@ google-auth==2.22.0 google-auth-httplib2==0.1.0 googleapis-common-protos==1.60.0 grpc-google-iam-v1==0.12.6 -grpcio-status==1.57.0 +grpcio-status==1.60.1 httplib2==0.22.0 idna==3.4 MarkupSafe==2.1.3 diff --git a/requirements.txt b/requirements.txt index 11600948..3349f081 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,10 +4,10 @@ absl-py~=0.11 google-api-python-client~=1.12 google-cloud-secret-manager~=2.1 google-cloud-monitoring~=2.18 -grpcio~=1.57 -grpcio-health-checking~=1.57 -grpcio-tools~=1.57 -grpcio-channelz~=1.57 +grpcio~=1.60 +grpcio-health-checking~=1.60 +grpcio-tools~=1.60 +grpcio-channelz~=1.60 kubernetes~=27.2 six~=1.13 tenacity~=6.2 diff --git a/tests/bootstrap_generator_test.py b/tests/bootstrap_generator_test.py index dba6e1f7..b728afc0 100644 --- a/tests/bootstrap_generator_test.py +++ b/tests/bootstrap_generator_test.py @@ -46,6 +46,14 @@ # after the release is published. def bootstrap_version_testcases() -> List: return ( + dict( + version="v0.16.0", + image="gcr.io/grpc-testing/td-grpc-bootstrap:2bf1b5ed00f852ffea8d24759c6fa673acc9ef10", + ), + dict( + version="v0.15.0", + image="gcr.io/grpc-testing/td-grpc-bootstrap:7d8d90477792e2e1bfe3a3da20b3dc9ef01d326c", + ), dict( version="v0.14.0", image="gcr.io/grpc-testing/td-grpc-bootstrap:d6baaf7b0e0c63054ac4d9bedc09021ff261d599", diff --git a/tests/fake_test.py b/tests/fake_test.py index 7be8fc6a..28be4f49 100644 --- a/tests/fake_test.py +++ b/tests/fake_test.py @@ -80,5 +80,24 @@ def test_even(self): self.fail(f"Integer {num} is odd") +class FakeSetupClassTest(xds_k8s_testcase.XdsKubernetesBaseTestCase): + """A fake class to debug BaseTestCase logs produced by setupClassError. + + See FakeTest for notes on provisioning. + """ + + @classmethod + def setUpClass(cls): + try: + # Making bad external calls that end up raising + raise OSError("Network bad!") + except Exception as error: # noqa pylint: disable=broad-except + cls._log_class_hook_failure(error) + raise + + def test_should_never_run(self): + self.fail("IF YOU SEE ME, SOMETHING IS WRONG!") + + if __name__ == "__main__": absltest.main() diff --git a/tests/gamma/csm_observability_test.py b/tests/gamma/csm_observability_test.py index 5603e533..00d87447 100644 --- a/tests/gamma/csm_observability_test.py +++ b/tests/gamma/csm_observability_test.py @@ -98,7 +98,7 @@ GammaServerRunner = gamma_server_runner.GammaServerRunner KubernetesClientRunner = k8s_xds_client_runner.KubernetesClientRunner -BuildQueryFn = Callable[[str], str] +BuildQueryFn = Callable[[str, str], str] ANY = unittest.mock.ANY @@ -215,13 +215,36 @@ def test_csm_observability(self): start_time={"seconds": start_secs}, end_time={"seconds": end_secs}, ) - histogram_results = self.query_metrics( - HISTOGRAM_METRICS, self.build_histogram_query, interval + server_histogram_results = self.query_metrics( + HISTOGRAM_SERVER_METRICS, + self.build_histogram_query, + self.server_namespace, + interval, ) - counter_results = self.query_metrics( - COUNTER_METRICS, self.build_counter_query, interval + client_histogram_results = self.query_metrics( + HISTOGRAM_CLIENT_METRICS, + self.build_histogram_query, + self.client_namespace, + interval, ) - all_results = {**histogram_results, **counter_results} + server_counter_results = self.query_metrics( + COUNTER_SERVER_METRICS, + self.build_counter_query, + self.server_namespace, + interval, + ) + client_counter_results = self.query_metrics( + COUNTER_CLIENT_METRICS, + self.build_counter_query, + self.client_namespace, + interval, + ) + all_results = { + **server_histogram_results, + **client_histogram_results, + **server_counter_results, + **client_counter_results, + } self.assertNotEmpty(all_results, msg="No query metrics results") with self.subTest("5_check_metrics_time_series"): @@ -376,7 +399,7 @@ def test_csm_observability(self): ) @classmethod - def build_histogram_query(cls, metric_type: str) -> str: + def build_histogram_query(cls, metric_type: str, namespace: str) -> str: # # The list_time_series API requires us to query one metric # at a time. @@ -389,25 +412,30 @@ def build_histogram_query(cls, metric_type: str) -> str: # The 'grpc_method' filter condition is needed because the # server metrics are also serving on the Channelz requests. # + # The 'resource.labels.namespace' filter condition allows us to + # filter metrics just for the current test run. return ( f'metric.type = "{metric_type}" AND ' 'metric.labels.grpc_status = "OK" AND ' - f'metric.labels.grpc_method = "{GRPC_METHOD_NAME}"' + f'metric.labels.grpc_method = "{GRPC_METHOD_NAME}" AND ' + f'resource.labels.namespace = "{namespace}"' ) @classmethod - def build_counter_query(cls, metric_type: str) -> str: + def build_counter_query(cls, metric_type: str, namespace: str) -> str: # For these num rpcs started counter metrics, they do not have the # 'grpc_status' label return ( f'metric.type = "{metric_type}" AND ' - f'metric.labels.grpc_method = "{GRPC_METHOD_NAME}"' + f'metric.labels.grpc_method = "{GRPC_METHOD_NAME}" AND ' + f'resource.labels.namespace = "{namespace}"' ) def query_metrics( self, metric_names: Iterable[str], build_query_fn: BuildQueryFn, + namespace: str, interval: monitoring_v3.TimeInterval, ) -> dict[str, MetricTimeSeries]: """ @@ -438,7 +466,7 @@ def query_metrics( logger.info("Requesting list_time_series for metric %s", metric) response = self.metric_client.list_time_series( name=f"projects/{self.project}", - filter=build_query_fn(metric), + filter=build_query_fn(metric, namespace), interval=interval, view=monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL, retry=retry_settings,