Skip to content

Commit

Permalink
Merge pull request #514 from neonlabsorg/feat/ci-cd-retries
Browse files Browse the repository at this point in the history
DOPS-1554 | Add ci/cd retries
  • Loading branch information
sferatime authored Feb 12, 2025
2 parents c661b2f + 15a4015 commit 8471418
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 23 deletions.
53 changes: 44 additions & 9 deletions deploy/cli/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import typing as tp
import pathlib
import logging
import time

from paramiko.client import SSHClient
from scp import SCPClient
Expand Down Expand Up @@ -57,22 +58,56 @@ def deploy_infrastructure(
os.environ["TF_VAR_proxy_model_commit"] = proxy_branch
os.environ["TF_VAR_dockerhub_org_name"] = os.environ.get("GITHUB_REPOSITORY_OWNER")
os.environ["TF_VAR_devnet_solana_url"] = devnet_solana_url
os.environ["TF_LOG"] = "DEBUG"

if use_real_price:
os.environ["TF_VAR_use_real_price"] = "1"

instance_types = ["cpx51", "cx52", "cpx41", "cx42"]
locations = ["hel1", "nbg1", "fsn1"]
instances = [{"server_type": i, "location": j} for i in instance_types for j in locations]
print("Possible instance options: ", instances)

retry_amount = 10
retry_amount = (
len(instances) if len(instances) > retry_amount else retry_amount
) # Verify that we can try all regions and locations

terraform.init(backend_config=TF_BACKEND_CONFIG)
return_code, stdout, stderr = terraform.apply(skip_plan=True)
print(f"code: {return_code}")
print(f"stdout: {stdout}")
print(f"stderr: {stderr}")
with open("terraform.log", "w") as file:
file.write(stdout)
file.write(stderr)
if return_code != 0:

instance_iterator = 0
retry_iterator = 0
while retry_iterator < retry_amount:
return_code, stdout, stderr = terraform.apply(
skip_plan=True,
capture_output=True,
var={
"server_type": instances[instance_iterator]["server_type"],
"location": instances[instance_iterator]["location"],
},
)
print(f"code: {return_code}")
print(f"stdout: {stdout}")
print(f"stderr: {stderr}")
if return_code == 0:
break
elif return_code != 0:
retry_iterator += 1
if "(resource_unavailable)" in stderr:
instance_iterator += 1
print(
"Resource_unavailable; ",
instances[instance_iterator],
" Trying to recreate instances with another region / another instance type...",
)
else:
print("Retry because ", stderr, "; Retries left: ", retry_amount - retry_iterator)
time.sleep(3)
if retry_iterator >= retry_amount:
print("Retries left: ", retry_amount - retry_iterator)
print("Terraform apply failed:", stderr)
print("Terraform infrastructure is not built correctly")
sys.exit(1)

output = terraform.output(json=True)
print(f"output: {output}")
proxy_ip = output["proxy_ip"]["value"]
Expand Down
26 changes: 13 additions & 13 deletions deploy/hetzner/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ resource "hcloud_server" "proxy" {
content = data.template_file.proxy_init.rendered
destination = "/tmp/proxy_init.sh"

connection {
type = "ssh"
user = "root"
host = hcloud_server.proxy.ipv4_address
private_key = file("/tmp/ci-stands")
}
connection {
type = "ssh"
user = "root"
host = hcloud_server.proxy.ipv4_address
private_key = file("/tmp/ci-stands")
}

}

Expand All @@ -37,18 +37,18 @@ resource "hcloud_server" "proxy" {
"chmod a+x /tmp/proxy_init.sh",
"sudo /tmp/proxy_init.sh"
]
connection {
type = "ssh"
user = "root"
host = hcloud_server.proxy.ipv4_address
private_key = file("/tmp/ci-stands")
}
connection {
type = "ssh"
user = "root"
host = hcloud_server.proxy.ipv4_address
private_key = file("/tmp/ci-stands")
}

}

labels = {
environment = "ci"
purpose = "ci-oz-full-tests"
purpose = "ci-oz-full-tests"
}
depends_on = [
hcloud_server.solana
Expand Down
2 changes: 1 addition & 1 deletion deploy/hetzner/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ variable "dockerhub_org_name" {
}

variable "use_real_price" {
type = number
type = number
default = 0
}

0 comments on commit 8471418

Please sign in to comment.