Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CSM O11y test] Ping GMP endpoint during test for debugging purpose #33

Merged
merged 29 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
81b567e
[CSM O11y test] Ping GMP endpoint regularly for debug purpose
stanley-cheung Feb 7, 2024
d2c1e97
Use requests instead of pod exec
stanley-cheung Feb 7, 2024
916a6c9
Use port forwarding to ping GMP endpoint
stanley-cheung Feb 8, 2024
b9caa33
Add requests to requirements.txt
stanley-cheung Feb 8, 2024
3ab0f31
try catch RequestException
stanley-cheung Feb 8, 2024
73765f0
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Feb 8, 2024
dc6c854
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Feb 20, 2024
f11a723
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Feb 20, 2024
54a4e73
Review changes
stanley-cheung Feb 21, 2024
d025083
remove unnecessary variable which broke unit test
stanley-cheung Feb 21, 2024
1c0f3dd
fix pylint for function signature
stanley-cheung Feb 21, 2024
90c533c
fixed variable initialization
stanley-cheung Feb 21, 2024
01f4c4d
add comment for temp debugging class
stanley-cheung Feb 21, 2024
a3e52dd
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Feb 22, 2024
bcf43a0
Add a marker in the prometheus log files
stanley-cheung Feb 22, 2024
02ea1cc
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Feb 29, 2024
d4fabf2
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Mar 4, 2024
9d117f4
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Mar 5, 2024
5a49e3a
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Mar 5, 2024
bae1c81
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Mar 5, 2024
e5d0270
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Mar 5, 2024
c71a050
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung Mar 7, 2024
32bfb74
Merge branch 'main' into ping-gmp-endpoint
sergiitk May 10, 2024
3d0437f
Review feedback: check should_collect_logs flags first
stanley-cheung May 16, 2024
3020499
Merge branch 'main' into ping-gmp-endpoint
stanley-cheung May 16, 2024
19a9a2b
Merge branch 'ping-gmp-endpoint' of github.com:stanley-cheung/psm-int…
stanley-cheung May 16, 2024
2a4c603
Fully use ClientDeploymentArgs
stanley-cheung May 17, 2024
95edebd
ran black.sh
stanley-cheung May 17, 2024
0518a7d
Fix ClientDeploymentArgs initialization
stanley-cheung May 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions framework/test_app/client_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,15 @@ def __init__(
hostname: str,
rpc_host: Optional[str] = None,
maintenance_port: Optional[int] = None,
monitoring_port: Optional[int] = None,
):
super().__init__(rpc_host=(rpc_host or ip))
self.ip = ip
self.rpc_port = rpc_port
self.server_target = server_target
self.maintenance_port = maintenance_port or rpc_port
self.hostname = hostname
self.monitoring_port = monitoring_port

@property
@functools.lru_cache(None)
Expand Down
35 changes: 32 additions & 3 deletions framework/test_app/runners/k8s/gamma_server_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class GammaServerRunner(KubernetesServerRunner):

route_name: str
frontend_service_name: str
enable_csm_observability: bool
csm_workload_name: str
csm_canonical_service_name: str

Expand Down Expand Up @@ -77,6 +78,7 @@ def __init__(
namespace_template: Optional[str] = None,
debug_use_port_forwarding: bool = False,
enable_workload_identity: bool = True,
enable_csm_observability: bool = False,
csm_workload_name: str = "",
csm_canonical_service_name: str = "",
deployment_args: Optional[ServerDeploymentArgs] = None,
Expand Down Expand Up @@ -108,6 +110,7 @@ def __init__(

self.frontend_service_name = frontend_service_name
self.route_name = route_name or f"route-{deployment_name}"
self.enable_csm_observability = enable_csm_observability
self.csm_workload_name = csm_workload_name
self.csm_canonical_service_name = csm_canonical_service_name

Expand All @@ -122,7 +125,6 @@ def run( # pylint: disable=arguments-differ
log_to_stdout: bool = False,
bootstrap_version: Optional[str] = None,
route_template: str = "gamma/route_http.yaml",
enable_csm_observability: bool = False,
generate_mesh_id: bool = False,
) -> list[XdsTestServer]:
if not maintenance_port:
Expand Down Expand Up @@ -209,7 +211,7 @@ def run( # pylint: disable=arguments-differ
maintenance_port=maintenance_port,
secure_mode=secure_mode,
bootstrap_version=bootstrap_version,
enable_csm_observability=enable_csm_observability,
enable_csm_observability=self.enable_csm_observability,
generate_mesh_id=generate_mesh_id,
csm_workload_name=self.csm_workload_name,
csm_canonical_service_name=self.csm_canonical_service_name,
Expand All @@ -218,13 +220,14 @@ def run( # pylint: disable=arguments-differ

# Create a PodMonitoring resource if CSM Observability is enabled
# This is GMP (Google Managed Prometheus)
if enable_csm_observability:
if self.enable_csm_observability:
self.pod_monitoring_name = f"{self.deployment_id}-gmp"
self.pod_monitoring = self._create_pod_monitoring(
"csm/pod-monitoring.yaml",
namespace_name=self.k8s_namespace.name,
deployment_id=self.deployment_id,
pod_monitoring_name=self.pod_monitoring_name,
pod_monitoring_port=self.DEFAULT_MONITORING_PORT,
)

servers = self._make_servers_for_deployment(
Expand Down Expand Up @@ -290,6 +293,32 @@ def create_backend_policy(
draining_timeout_sec=draining_timeout_sec,
)

def _xds_test_server_for_pod(
self,
pod: k8s.V1Pod,
*,
test_port: int = KubernetesServerRunner.DEFAULT_TEST_PORT,
maintenance_port: Optional[int] = None,
secure_mode: bool = False,
monitoring_port: Optional[int] = None,
) -> XdsTestServer:
if self.enable_csm_observability:
if self.debug_use_port_forwarding:
pf = self._start_port_forwarding_pod(
pod, self.DEFAULT_MONITORING_PORT
)
monitoring_port = pf.local_port
else:
monitoring_port = self.DEFAULT_MONITORING_PORT

return super()._xds_test_server_for_pod(
pod=pod,
test_port=test_port,
maintenance_port=maintenance_port,
secure_mode=secure_mode,
monitoring_port=monitoring_port,
)

@override
def cleanup(self, *, force=False, force_namespace=False):
try:
Expand Down
3 changes: 3 additions & 0 deletions framework/test_app/runners/k8s/k8s_base_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class KubernetesBaseRunner(base_runner.BaseRunner, metaclass=ABCMeta):
# Pylint wants abstract classes to override abstract methods.
# pylint: disable=abstract-method

DEFAULT_MONITORING_PORT = 9464
TEMPLATE_DIR_NAME = "kubernetes-manifests"
TEMPLATE_DIR_RELATIVE_PATH = f"../../../../{TEMPLATE_DIR_NAME}"
ROLE_WORKLOAD_IDENTITY_USER = "roles/iam.workloadIdentityUser"
Expand Down Expand Up @@ -384,6 +385,7 @@ def _create_pod_monitoring(
namespace_name: str,
deployment_id: str,
pod_monitoring_name: str,
pod_monitoring_port: int,
**kwargs,
) -> k8s.PodMonitoring:
pod_monitoring = self._create_from_template(
Expand All @@ -392,6 +394,7 @@ def _create_pod_monitoring(
namespace_name=namespace_name,
deployment_id=deployment_id,
pod_monitoring_name=pod_monitoring_name,
pod_monitoring_port=pod_monitoring_port,
**kwargs,
)
if not (
Expand Down
24 changes: 12 additions & 12 deletions framework/test_app/runners/k8s/k8s_xds_client_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

@dataclasses.dataclass(frozen=True)
class ClientDeploymentArgs:
enable_csm_observability: bool = False
csm_workload_name: str = ""
csm_canonical_service_name: str = ""

Expand Down Expand Up @@ -75,8 +76,6 @@ def __init__( # pylint: disable=too-many-locals
namespace_template: Optional[str] = None,
debug_use_port_forwarding: bool = False,
enable_workload_identity: bool = True,
csm_workload_name: str = "",
csm_canonical_service_name: str = "",
deployment_args: Optional[ClientDeploymentArgs] = None,
):
super().__init__(
Expand All @@ -97,13 +96,6 @@ def __init__( # pylint: disable=too-many-locals
self.debug_use_port_forwarding = debug_use_port_forwarding

# Client deployment arguments.
if not deployment_args:
deployment_args = ClientDeploymentArgs(
# TODO(stanleycheung): remove once https://github.com/grpc/psm-interop/pull/33
# is merged and removed self.csm_* removed as class args.
csm_workload_name=csm_workload_name,
csm_canonical_service_name=csm_canonical_service_name,
)
self.deployment_args = deployment_args

# Used by the TD bootstrap generator.
Expand Down Expand Up @@ -132,7 +124,6 @@ def run( # pylint: disable=arguments-differ
generate_mesh_id=False,
print_response=False,
log_to_stdout: bool = False,
enable_csm_observability: bool = False,
request_payload_size: int = 0,
response_payload_size: int = 0,
) -> client_app.XdsTestClient:
Expand Down Expand Up @@ -194,19 +185,19 @@ def run( # pylint: disable=arguments-differ
config_mesh=config_mesh,
generate_mesh_id=generate_mesh_id,
print_response=print_response,
enable_csm_observability=enable_csm_observability,
**self.deployment_args.as_dict(),
)

# Create a PodMonitoring resource if CSM Observability is enabled
# This is GMP (Google Managed Prometheus)
if enable_csm_observability:
if self.deployment_args.enable_csm_observability:
self.pod_monitoring_name = f"{self.deployment_id}-gmp"
self.pod_monitoring = self._create_pod_monitoring(
"csm/pod-monitoring.yaml",
namespace_name=self.k8s_namespace.name,
deployment_id=self.deployment_id,
pod_monitoring_name=self.pod_monitoring_name,
pod_monitoring_port=self.DEFAULT_MONITORING_PORT,
)

# We don't support for multiple client replicas at the moment.
Expand Down Expand Up @@ -237,18 +228,27 @@ def _make_clients_for_deployment(
def _xds_test_client_for_pod(
self, pod: k8s.V1Pod, *, server_target: str
) -> client_app.XdsTestClient:
monitoring_port = None
if self.debug_use_port_forwarding:
pf = self._start_port_forwarding_pod(pod, self.stats_port)
rpc_port, rpc_host = pf.local_port, pf.local_address
if self.deployment_args.enable_csm_observability:
pf = self._start_port_forwarding_pod(
pod, self.DEFAULT_MONITORING_PORT
)
monitoring_port = pf.local_port
else:
rpc_port, rpc_host = self.stats_port, None
if self.deployment_args.enable_csm_observability:
monitoring_port = self.DEFAULT_MONITORING_PORT

return client_app.XdsTestClient(
ip=pod.status.pod_ip,
rpc_port=rpc_port,
server_target=server_target,
hostname=pod.metadata.name,
rpc_host=rpc_host,
monitoring_port=monitoring_port,
)

# pylint: disable=arguments-differ
Expand Down
2 changes: 2 additions & 0 deletions framework/test_app/runners/k8s/k8s_xds_server_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ def _xds_test_server_for_pod(
test_port: int = DEFAULT_TEST_PORT,
maintenance_port: Optional[int] = None,
secure_mode: bool = False,
monitoring_port: Optional[int] = None,
) -> XdsTestServer:
if maintenance_port is None:
maintenance_port = self._get_default_maintenance_port(secure_mode)
Expand All @@ -322,6 +323,7 @@ def _xds_test_server_for_pod(
maintenance_port=rpc_port,
secure_mode=secure_mode,
rpc_host=rpc_host,
monitoring_port=monitoring_port,
)
self.pods_to_servers[pod.metadata.name] = server
return server
Expand Down
2 changes: 2 additions & 0 deletions framework/test_app/server_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(
xds_host: Optional[str] = None,
xds_port: Optional[int] = None,
rpc_host: Optional[str] = None,
monitoring_port: Optional[str] = None,
):
super().__init__(rpc_host=(rpc_host or ip))
self.ip = ip
Expand All @@ -60,6 +61,7 @@ def __init__(
self.maintenance_port = maintenance_port or rpc_port
self.secure_mode = secure_mode
self.xds_host, self.xds_port = xds_host, xds_port
self.monitoring_port = monitoring_port

@property
@functools.lru_cache(None)
Expand Down
2 changes: 1 addition & 1 deletion kubernetes-manifests/csm/pod-monitoring.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ spec:
matchLabels:
deployment_id: ${deployment_id}
endpoints:
- port: 9464
- port: ${pod_monitoring_port}
interval: 10s
2 changes: 1 addition & 1 deletion requirements.lock
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ packaging==23.1
Pygments==2.14.0
python-dateutil==2.8.2
protobuf==4.24.1
requests==2.31.0
xds-protos==1.58.0rc1
## The following requirements were added by pip freeze:
cachetools==5.3.1
Expand All @@ -35,7 +36,6 @@ proto-plus==1.22.3
pyasn1==0.5.0
pyasn1-modules==0.3.0
pyparsing==3.1.1
requests==2.31.0
requests-oauthlib==1.3.1
rsa==4.9
uritemplate==3.0.1
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ packaging~=23.1
Pygments~=2.9
python-dateutil~=2.8
protobuf~=4.24
requests~=2.31.0
xds-protos==1.58.0rc1
Loading
Loading