Skip to content

Commit

Permalink
UID2-4719 change azure cc starting process (#1260)
Browse files Browse the repository at this point in the history
* UID2-4719 change azure cc starting process
  • Loading branch information
clarkxuyang authored Jan 23, 2025
1 parent 4c8e872 commit 3301aa4
Show file tree
Hide file tree
Showing 7 changed files with 263 additions and 61 deletions.
1 change: 1 addition & 0 deletions .github/workflows/publish-azure-cc-enclave-docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ jobs:
echo "jar_version=$(mvn help:evaluate -Dexpression=project.version | grep -e '^[1-9][^\[]')" >> $GITHUB_OUTPUT
echo "git_commit=$(git show --format="%h" --no-patch)" >> $GITHUB_OUTPUT
cp -r target ${{ env.DOCKER_CONTEXT_PATH }}/
cp scripts/confidential_compute.py ${{ env.DOCKER_CONTEXT_PATH }}/
- name: Log in to the Docker container registry
uses: docker/login-action@v3
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>com.uid2</groupId>
<artifactId>uid2-operator</artifactId>
<version>5.45.7-alpha-169-SNAPSHOT</version>
<version>5.45.8-alpha-172-SNAPSHOT</version>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
Expand Down
40 changes: 19 additions & 21 deletions scripts/aws/ec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import yaml

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from confidential_compute import ConfidentialCompute, ConfidentialComputeConfig, MissingInstanceProfile, ConfigNotFound, InvalidConfigValue, ConfidentialComputeStartupException
from confidential_compute import ConfidentialCompute, ConfidentialComputeConfig, MissingInstanceProfile, ApiTokenNotFound, InvalidConfigValue, ConfidentialComputeStartupException

class AWSConfidentialComputeConfig(ConfidentialComputeConfig):
enclave_memory_mb: int
Expand Down Expand Up @@ -48,10 +48,10 @@ def get_meta_url(cls) -> str:
return f"http://{cls.AWS_METADATA}/latest/dynamic/instance-identity/document"


class EC2(ConfidentialCompute):
class EC2EntryPoint(ConfidentialCompute):

def __init__(self):
super().__init__()
self.configs: AWSConfidentialComputeConfig = {}

def __get_aws_token(self) -> str:
"""Fetches a temporary AWS EC2 metadata token."""
Expand All @@ -74,38 +74,36 @@ def __get_current_region(self) -> str:
except requests.RequestException as e:
raise RuntimeError(f"Failed to fetch region: {e}")

def __validate_aws_specific_config(self, secret):
if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret:
def __validate_aws_specific_config(self):
if "enclave_memory_mb" in self.configs or "enclave_cpu_count" in self.configs:
max_capacity = self.__get_max_capacity()
min_capacity = {"enclave_memory_mb": 11000, "enclave_cpu_count" : 2 }
for key in ["enclave_memory_mb", "enclave_cpu_count"]:
if int(secret.get(key, 0)) > max_capacity.get(key):
raise ValueError(f"{key} value ({secret.get(key, 0)}) exceeds the maximum allowed ({max_capacity.get(key)}).")
if min_capacity.get(key) > int(secret.get(key, 10**9)):
raise ValueError(f"{key} value ({secret.get(key, 0)}) needs to be higher than the minimum required ({min_capacity.get(key)}).")
if int(self.configs.get(key, 0)) > max_capacity.get(key):
raise ValueError(f"{key} value ({self.configs.get(key, 0)}) exceeds the maximum allowed ({max_capacity.get(key)}).")
if min_capacity.get(key) > int(self.configs.get(key, 10**9)):
raise ValueError(f"{key} value ({self.configs.get(key, 0)}) needs to be higher than the minimum required ({min_capacity.get(key)}).")

def _get_secret(self, secret_identifier: str) -> AWSConfidentialComputeConfig:
def _set_confidential_config(self, secret_identifier: str) -> None:
"""Fetches a secret value from AWS Secrets Manager and adds defaults"""

def add_defaults(configs: Dict[str, any]) -> AWSConfidentialComputeConfig:
def add_defaults(configs: Dict[str, any]) -> None:
"""Adds default values to configuration if missing."""
default_capacity = self.__get_max_capacity()
configs.setdefault("enclave_memory_mb", default_capacity["enclave_memory_mb"])
configs.setdefault("enclave_cpu_count", default_capacity["enclave_cpu_count"])
configs.setdefault("debug_mode", False)
return configs

region = self.__get_current_region()
print(f"Running in {region}")
client = boto3.client("secretsmanager", region_name=region)
try:
secret = add_defaults(json.loads(client.get_secret_value(SecretId=secret_identifier)["SecretString"]))
self.__validate_aws_specific_config(secret)
return secret
add_defaults(json.loads(client.get_secret_value(SecretId=secret_identifier)["SecretString"]))
self.__validate_aws_specific_config()
except NoCredentialsError as _:
raise MissingInstanceProfile(self.__class__.__name__)
except ClientError as _:
raise ConfigNotFound(self.__class__.__name__, f"Secret Manager {secret_identifier} in {region}")
raise ApiTokenNotFound(self.__class__.__name__, f"Secret Manager {secret_identifier} in {region}")

@staticmethod
def __get_max_capacity():
Expand Down Expand Up @@ -137,7 +135,7 @@ def __run_config_server(self) -> None:
json.dump(self.configs, config_file)
os.chdir("/opt/uid2operator/config-server")
command = ["./bin/flask", "run", "--host", AuxiliaryConfig.LOCALHOST, "--port", AuxiliaryConfig.FLASK_PORT]
self.run_command(command, seperate_process=True)
self.run_command(command, separate_process=True)

def __run_socks_proxy(self) -> None:
"""
Expand Down Expand Up @@ -205,12 +203,12 @@ def __run_nitro_enclave(self):
if self.configs.get('debug_mode', False):
print("Running in debug_mode")
command += ["--debug-mode", "--attach-console"]
self.run_command(command, seperate_process=True)
self.run_command(command, separate_process=True)

def run_compute(self) -> None:
"""Main execution flow for confidential compute."""
secret_manager_key = self.__get_secret_name_from_userdata()
self.configs = self._get_secret(secret_manager_key)
self._set_confidential_config(secret_manager_key)
print(f"Fetched configs from {secret_manager_key}")
if not self.configs.get("skip_validations"):
self.validate_configuration()
Expand Down Expand Up @@ -246,13 +244,13 @@ def __kill_auxiliaries(self) -> None:
parser.add_argument("-o", "--operation", choices=["stop", "start"], default="start", help="Operation to perform.")
args = parser.parse_args()
try:
ec2 = EC2()
ec2 = EC2EntryPoint()
if args.operation == "stop":
ec2.cleanup()
else:
ec2.run_compute()
except ConfidentialComputeStartupException as e:
print("Failed starting up Confidential Compute. Please checks the logs for errors and retry \n", e)
except Exception as e:
print("Unexpected failure while starting up Confidential Compute. Please contact UID support team with this log \n ", e)
print("Unexpected failure while starting up Confidential Compute. Please contact UID support team with this log \n ", e)

37 changes: 29 additions & 8 deletions scripts/azure-cc/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
# sha from https://hub.docker.com/layers/amd64/eclipse-temurin/21.0.4_7-jre-alpine/images/sha256-8179ddc8a6c5ac9af935020628763b9a5a671e0914976715d2b61b21881cefca
# Use Alpine-based JRE image
FROM eclipse-temurin@sha256:8179ddc8a6c5ac9af935020628763b9a5a671e0914976715d2b61b21881cefca

# Install Packages
RUN apk update && apk add jq
# Install necessary packages and set up virtual environment
RUN apk update && apk add --no-cache jq python3 py3-pip && \
python3 -m venv /venv && \
. /venv/bin/activate && \
pip install --no-cache-dir requests azure-identity azure-keyvault-secrets && \
rm -rf /var/cache/apk/*

# Set virtual environment path
ENV PATH="/venv/bin:$PATH"

# Working directory
WORKDIR /app

# Expose necessary ports
EXPOSE 8080
EXPOSE 9080

# ARG and ENV variables
ARG JAR_NAME=uid2-operator
ARG JAR_VERSION=1.0.0-SNAPSHOT
ARG IMAGE_VERSION=1.0.0.unknownhash
Expand All @@ -17,18 +28,28 @@ ENV IMAGE_VERSION=${IMAGE_VERSION}
ENV REGION=default
ENV LOKI_HOSTNAME=loki

# Copy application files
COPY ./target/${JAR_NAME}-${JAR_VERSION}-jar-with-dependencies.jar /app/${JAR_NAME}-${JAR_VERSION}.jar
COPY ./target/${JAR_NAME}-${JAR_VERSION}-sources.jar /app
COPY ./target/${JAR_NAME}-${JAR_VERSION}-static.tar.gz /app/static.tar.gz
COPY ./conf/*.json /app/conf/
COPY ./conf/*.xml /app/conf/

RUN tar xzvf /app/static.tar.gz --no-same-owner --no-same-permissions && rm -f /app/static.tar.gz
# Extract and clean up tar.gz
RUN tar xzvf /app/static.tar.gz --no-same-owner --no-same-permissions && \
rm -f /app/static.tar.gz

COPY ./azureEntryPoint.py /app
COPY ./confidential_compute.py /app
RUN chmod a+x /app/*.py

COPY ./entrypoint.sh /app/
RUN chmod a+x /app/entrypoint.sh
# Create and configure non-root user
RUN adduser -D uid2-operator && \
mkdir -p /opt/uid2 && chmod 777 -R /opt/uid2 && \
chmod 705 -R /app && mkdir -p /app/file-uploads && chmod 777 -R /app/file-uploads

RUN adduser -D uid2-operator && mkdir -p /opt/uid2 && chmod 777 -R /opt/uid2 && mkdir -p /app && chmod 705 -R /app && mkdir -p /app/file-uploads && chmod 777 -R /app/file-uploads
# Switch to non-root user
USER uid2-operator

CMD ["/app/entrypoint.sh"]
# Run the Python entry point
CMD python3 /app/azureEntryPoint.py
176 changes: 176 additions & 0 deletions scripts/azure-cc/azureEntryPoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
#!/usr/bin/env python3

import json
import os
import time
from typing import Dict
import sys
import shutil
import requests
import logging

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from confidential_compute import ConfidentialCompute, MissingConfig, MissingInstanceProfile, AuxiliariesException, SecretAccessDenied, ApiTokenNotFound, ConfidentialComputeStartupException
from azure.keyvault.secrets import SecretClient
from azure.identity import DefaultAzureCredential, CredentialUnavailableError
from azure.core.exceptions import ResourceNotFoundError, ClientAuthenticationError

class AzureEntryPoint(ConfidentialCompute):

kv_name = os.getenv("VAULT_NAME")
secret_name = os.getenv("OPERATOR_KEY_SECRET_NAME")
env_name = os.getenv("DEPLOYMENT_ENVIRONMENT")
jar_name = os.getenv("JAR_NAME", "default-jar-name")
jar_version = os.getenv("JAR_VERSION", "default-jar-version")

FINAL_CONFIG = "/tmp/final-config.json"

def __init__(self):
super().__init__()

def __check_env_variables(self):
# Check essential env variables
if AzureEntryPoint.kv_name is None:
raise MissingConfig(self.__class__.__name__, ["VAULT_NAME"])
if AzureEntryPoint.secret_name is None:
raise MissingConfig(self.__class__.__name__, ["OPERATOR_KEY_SECRET_NAME"])
if AzureEntryPoint.env_name is None:
raise MissingConfig(self.__class__.__name__, ["DEPLOYMENT_ENVIRONMENT"])
logging.info("Environment variables validation success")

def __create_final_config(self):
TARGET_CONFIG = f"/app/conf/{AzureEntryPoint.env_name}-uid2-config.json"
if not os.path.isfile(TARGET_CONFIG):
logging.error(f"Unrecognized config {TARGET_CONFIG}")
sys.exit(1)

logging.info(f"-- copying {TARGET_CONFIG} to {AzureEntryPoint.FINAL_CONFIG}")
try:
shutil.copy(TARGET_CONFIG, AzureEntryPoint.FINAL_CONFIG)
except IOError as e:
logging.error(f"Failed to create {AzureEntryPoint.FINAL_CONFIG} with error: {e}")
sys.exit(1)

CORE_BASE_URL = os.getenv("CORE_BASE_URL")
OPTOUT_BASE_URL = os.getenv("OPTOUT_BASE_URL")
if CORE_BASE_URL and OPTOUT_BASE_URL and AzureEntryPoint.env_name != 'prod':
logging.info(f"-- replacing URLs by {CORE_BASE_URL} and {OPTOUT_BASE_URL}")
with open(AzureEntryPoint.FINAL_CONFIG, "r") as file:
config = file.read()

config = config.replace("https://core-integ.uidapi.com", CORE_BASE_URL)
config = config.replace("https://optout-integ.uidapi.com", OPTOUT_BASE_URL)

with open(AzureEntryPoint.FINAL_CONFIG, "w") as file:
file.write(config)

with open(AzureEntryPoint.FINAL_CONFIG, "r") as file:
logging.info(file.read())

def __set_base_urls(self):
with open(AzureEntryPoint.FINAL_CONFIG, "r") as file:
jdata = json.load(file)
self.configs["core_base_url"] = jdata["core_attest_url"]
self.configs["optout_base_url"] = jdata["optout_api_uri"]

def __set_api_token(self):
try:
credential = DefaultAzureCredential()
kv_URL = f"https://{AzureEntryPoint.kv_name}.vault.azure.net"
secret_client = SecretClient(vault_url=kv_URL, credential=credential)
secret = secret_client.get_secret(AzureEntryPoint.secret_name)
# print(f"Secret Value: {secret.value}")
self.configs["api_token"] = secret.value

except (CredentialUnavailableError, ClientAuthenticationError) as auth_error:
logging.error(f"Read operator key, authentication error: {auth_error}")
raise SecretAccessDenied(self.__class__.__name__, str(auth_error))
except ResourceNotFoundError as not_found_error:
logging.error(f"Read operator key, secret not found: {AzureEntryPoint.secret_name}. Error: {not_found_error}")
raise ApiTokenNotFound(self.__class__.__name__, str(not_found_error))


def _set_confidential_config(self, secret_identifier: str = None):
self.configs["skip_validations"] = os.getenv("SKIP_VALIDATIONS", "false").lower() == "true"
self.configs["debug_mode"] = os.getenv("DEBUG_MODE", "false").lower() == "true"
self.configs["environment"] = AzureEntryPoint.env_name

# set self.configs["api_token"]
self.__set_api_token()
# set base urls from final config file
self.__set_base_urls()

def __run_operator(self):

# Start the operator
os.environ["azure_vault_name"] = AzureEntryPoint.kv_name
os.environ["azure_secret_name"] = AzureEntryPoint.secret_name

java_command = [
"java",
"-XX:MaxRAMPercentage=95", "-XX:-UseCompressedOops", "-XX:+PrintFlagsFinal",
"-Djava.security.egd=file:/dev/./urandom",
"-Dvertx.logger-delegate-factory-class-name=io.vertx.core.logging.SLF4JLogDelegateFactory",
"-Dlogback.configurationFile=/app/conf/logback.xml",
f"-Dvertx-config-path={AzureEntryPoint.FINAL_CONFIG}",
"-jar",
f"{AzureEntryPoint.jar_name}-{AzureEntryPoint.jar_version}.jar"
]
logging.info("-- starting java operator application")
self.run_command(java_command, separate_process=False)

def _validate_auxiliaries(self):
logging.info("Waiting for sidecar ...")

MAX_RETRIES = 15
PING_URL = "http://169.254.169.254/ping"
delay = 1

for attempt in range(1, MAX_RETRIES + 1):
try:
response = requests.get(PING_URL, timeout=5)
if response.status_code in [200, 204]:
logging.info("Sidecar started successfully.")
return
else:
logging.warning(
f"Attempt {attempt}: Unexpected status code {response.status_code}. Response: {response.text}"
)
except Exception as e:
logging.info(f"Attempt {attempt}: Error during request - {e}")

if attempt == MAX_RETRIES:
logging.error(
f"Sidecar failed to start after {MAX_RETRIES} attempts. Exiting."
)
raise AuxiliariesException(self.__class__.__name__)

logging.info(f"Retrying in {delay} seconds... (Attempt {attempt}/{MAX_RETRIES})")
time.sleep(delay)
delay += 1

def run_compute(self) -> None:
"""Main execution flow for confidential compute."""
self.__check_env_variables()
self.__create_final_config()
self._set_confidential_config()
if not self.configs.get("skip_validations"):
self.validate_configuration()
self._setup_auxiliaries()
self.__run_operator()

def _setup_auxiliaries(self) -> None:
""" setup auxiliary services are running."""
pass

if __name__ == "__main__":

logging.basicConfig(level=logging.INFO)
logging.info("Start AzureEntryPoint")
try:
operator = AzureEntryPoint()
operator.run_compute()
except ConfidentialComputeStartupException as e:
logging.error(f"Failed starting up Azure Confidential Compute. Please checks the logs for errors and retry {e}", exc_info=True)
except Exception as e:
logging.error(f"Unexpected failure while starting up Azure Confidential Compute. Please contact UID support team with this log {e}", exc_info=True)
Loading

0 comments on commit 3301aa4

Please sign in to comment.