Skip to content

Commit

Permalink
Introduce health sidecar to better integrate with PDB
Browse files Browse the repository at this point in the history
Signed-off-by: Sebastian Woehrl <sebastian.woehrl@maibornwolff.de>
  • Loading branch information
swoehrl-mw committed Jan 28, 2025
1 parent 2e78967 commit 28b4c36
Show file tree
Hide file tree
Showing 30 changed files with 1,188 additions and 102 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/docker-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker Build
working-directory: ./opensearch-operator
run: |
make docker-build-multiarch
- name: Docker Build Sidecar image
working-directory: ./operator-sidecar
run: |
cd opensearch-operator
make docker-build-multiarch
46 changes: 40 additions & 6 deletions .github/workflows/functional-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@ jobs:
export CLUSTER_NAME=opensearch-operator-tests
## Check disk to avoid failed shard assignments due to watermarking
df -h
cd opensearch-operator
## Build sidecar docker image and import into k3d
cd operator-sidecar
IMG=operator-sidecar:test make docker-build
k3d image import -c $CLUSTER_NAME operator-sidecar:test
cd ../opensearch-operator
## Prepare kubeconfig
k3d kubeconfig get $CLUSTER_NAME > functionaltests/kubeconfig
export KUBECONFIG=$(pwd)/functionaltests/kubeconfig
Expand All @@ -36,9 +43,17 @@ jobs:
k3d image import -c $CLUSTER_NAME controller:latest
## Install helm chart
helm install opensearch-operator ../charts/opensearch-operator --set manager.image.repository=controller --set manager.image.tag=latest --set manager.image.pullPolicy=IfNotPresent --namespace default --wait
helm install opensearch-operator ../charts/opensearch-operator \
--set manager.image.repository=controller \
--set manager.image.tag=latest \
--set manager.image.pullPolicy=IfNotPresent \
--set operatorSidecar.image=operator-sidecar:test \
--namespace default --wait
cd functionaltests
kubectl apply -f rbac.yaml
## Run tests
go test ./operatortests -timeout 30m
Expand Down Expand Up @@ -70,7 +85,13 @@ jobs:
export CLUSTER_NAME=opensearch-operator-tests
## Check disk to avoid failed shard assignments due to watermarking
df -h
cd opensearch-operator
## Build sidecar docker image and import into k3d
cd operator-sidecar
IMG=operator-sidecar:test make docker-build
k3d image import -c $CLUSTER_NAME operator-sidecar:test
cd ../opensearch-operator
## Prepare kubeconfig
k3d kubeconfig get $CLUSTER_NAME > functionaltests/kubeconfig
export KUBECONFIG=$(pwd)/functionaltests/kubeconfig
Expand All @@ -81,9 +102,22 @@ jobs:
## Import controller docker image
k3d image import -c $CLUSTER_NAME controller:latest
## Install helm chart
helm install opensearch-operator ../charts/opensearch-operator --set manager.image.repository=controller --set manager.image.tag=latest --set manager.image.pullPolicy=IfNotPresent --namespace default --wait
helm install opensearch-cluster ../charts/opensearch-cluster --set opensearchCluster.general.version=${{ matrix.version }} --set opensearchCluster.dashboards.version=${{ matrix.version }} --wait
## Install helm charts
helm install opensearch-operator ../charts/opensearch-operator \
--set manager.image.repository=controller \
--set manager.image.tag=latest \
--set manager.image.pullPolicy=IfNotPresent \
--set operatorSidecar.image=operator-sidecar:test \
--namespace default --wait
kubectl apply -f functionaltests/rbac.yaml
helm install opensearch-cluster ../charts/opensearch-cluster \
--set opensearchCluster.general.version=${{ matrix.version }} \
--set opensearchCluster.dashboards.version=${{ matrix.version }} \
-f functionaltests/helm-cluster-values.yaml \
--wait
cd functionaltests
## Run tests
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/go-linting.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,17 @@ jobs:
with:
go-version-file: 'go.work'
cache: false
- name: lint go
- name: lint opensearch-operator
uses: golangci/golangci-lint-action@v4
with:
# Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version.
version: v1.57
working-directory: opensearch-operator
args: --timeout=6m --skip-dirs="(^|/)responses($|/)" -v
- name: lint operator-sidecar
uses: golangci/golangci-lint-action@v4
with:
# Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version.
version: v1.57
working-directory: operator-sidecar
args: --timeout=6m -v
Original file line number Diff line number Diff line change
Expand Up @@ -4800,6 +4800,34 @@ spec:
additionalProperties:
type: string
type: object
operatorSidecar:
properties:
enable:
type: boolean
image:
type: string
imagePullPolicy:
description: PullPolicy describes a policy for if/when to
pull a container image
type: string
imagePullSecrets:
items:
description: |-
LocalObjectReference contains enough information to let you locate the
referenced object inside the same namespace.
properties:
name:
description: |-
Name of the referent.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
TODO: Add other useful fields. apiVersion, kind, uid?
type: string
type: object
x-kubernetes-map-type: atomic
type: array
readinessPerPool:
type: boolean
type: object
pdb:
properties:
enable:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ spec:
value: "{{ .Values.manager.parallelRecoveryEnabled }}"
- name: PPROF_ENDPOINTS_ENABLED
value: "{{ .Values.manager.pprofEndpointsEnabled }}"
{{- if .Values.operatorSidecar.image }}
- name: OPERATOR_SIDECAR_IMAGE
value: {{ .Values.operatorSidecar.image }}
{{- end }}
- name: OPERATOR_SIDECAR_VERSION
value: "{{ default .Chart.AppVersion }}"
{{- if .Values.manager.extraEnv }}
{{- toYaml .Values.manager.extraEnv | nindent 8 }}
{{- end }}
Expand Down
5 changes: 5 additions & 0 deletions charts/opensearch-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ manager:
# watch objects in the desired namespace. Defaults is to watch all namespaces.
watchNamespace:

# Configuration for the operator sidecar that can be added to opensearch pods
operatorSidecar: {}
# full name and tag for an alternate image to use for the sidecar (e.g. someregistry.com/operator-sidecar:1.2.3)
# image:

# Install the Custom Resource Definitions with Helm
installCRDs: true

Expand Down
6 changes: 6 additions & 0 deletions docs/designs/crd.md
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,12 @@ Every NodePool is defining different Opensearch Nodes StatefulSet
<td>Updates the probes timeouts and thresholds config</td>
<td>false</td>
<td>-</td>
</tr><tr>
<td><b>operatorSidecar</b></td>
<td>OperatorSidecar</td>
<td>Special sidecar for each pod to deal with cluster-state-dependent readiness probes</td>
<td>false</td>
<td>-</td>
</tr>
</table>

Expand Down
77 changes: 76 additions & 1 deletion docs/userguide/main.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,6 @@ To install a plugin for the bootstrap pod add it to the list under `bootstrap.pl
pluginsList: ["repository-s3"]
```


Please note:

* [Bundled plugins](https://opensearch.org/docs/latest/install-and-configure/install-opensearch/plugins/#bundled-plugins) do not have to be added to the list, they are installed automatically
Expand Down Expand Up @@ -1067,6 +1066,82 @@ The following considerations should be taken into account in order to increase t

Note: To change the `diskSize` from `G` to `Gi` or vice-versa, first make sure data is backed up and make sure the right conversion number is identified, so that the underlying volume has the same value and then re-apply the cluster yaml. This will make sure the statefulset is re-created with right value in VolueClaimTemplates, this operation is expected to have no downtime.

### Keeping Opensearch available during Kubernetes node replacements

When you have restarts of Kubernetes nodes (e.g. because you are upgrading the Kubernetes version) there is a risk of your Opensearch cluster becoming unavailable for a time. The problem is that Kubernetes cannot know when Opensearch has internally finished replicating data and has a green cluster state. So it can happen that a second node with an Opensearch pod is restarted/replaced before Opensearch had a chance to resync. This could lead to indices becoming unavailable with both primary and replica shards being on the pods that were just restarted.

The canonical way around this is to set a Pod Distruption Budget (PDB). But this relies on the readiness probe of all affected pods, which for Opensearch does not reflect cluster state but only if the local pod is reachable. Changing the readiness probe to reflect Opensearch cluster state (green/yellow/red) would make all pods unreachable (because readiness controls which pods are reachable via Service/Ingress).

To support such cases the operator provides a special sidecar added to all Opensearch pods. The sidecars sync with each other so that exactly one of them exposes Opensearch cluster state via the readiness probe. So if the cluster became yellow, the readiness for one pod would switch to not ready. If you also configure a PDB that allows for only one pod to be unavailable, then any Kubernetes node replacements with Opensearch pods on them will be blocked until the Opensearch cluster is healthy again.

Note that there can still be situations with more than one pod down if a pod crashes or is restarted for some other reason while a node restart is in progress.

> [!WARNING]
> The sidecar feature is still experimential and could have unintended interference with normal cluster operations. Please test it before using it on a production system.

To configure the sidecar you need to do the following:

1. Create a Kubernetes ServiceAccount and give it the permission to perform leader election (find an example manifest below)
2. Configure your opensearch cluster to use that ServiceAccount
3. Enable the sidecar for nodePools with the data role

The ServiceAccount manifest should look like this (you need to apply this before creating your Opensearch cluster):

```yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: my-opensearch
automountServiceAccountToken: true
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: leaderelection
namespace: default
rules:
- apiGroups:
- coordination.k8s.io
resources:
- leases
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: opensearch-leaderelection
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: leaderelection
subjects:
- kind: ServiceAccount
name: my-opensearch
```

The changes to the cluster manifest look like this:

```yaml
spec:
general:
serviceAccount: my-opensearch # The name of the ServiceAccount with leader election permissions
nodePools:
- component: foobar
pdb: # PDB is needed so that node restarts take the pods into account
enable: true
maxUnavailable: 1
operatorSidecar:
enable: true # This enables the sidecar with the cluster-state-dependent readiness probe
readinessPerPool: true # Setting this to false means only one pod from all nodepools has the state-dependent readiness probe instead of one per nodepool
```

Keep in mind that this setup could block cluster node restarts/replacements for a long time if Opensearch has a problem, so be sure to choose sensible timeouts / max wait times in your operations tooling (ClusterAPI or cloud provider configuration).

> [!WARNING]
> If you perform an upgrade of Opensearch itself, you should consider disabling the sidecar beforehand. Upgrades can lead to the cluster staying in yellow state during the entire upgrade which the sidecar cannot detect so the upgrade would block. To stay on the safe side it is recommended to disable the sidecar before an upgrade (requires a rolling restart of the opensearch pods) and enable it again afterwards.

## User and role management

An important part of any OpenSearch cluster is the user and role management to give users access to the cluster (via the opensearch-security plugin). By default the operator will use the included demo securityconfig with default users (see [internal_users.yml](https://github.com/opensearch-project/security/blob/main/config/internal_users.yml) for a list of users). For any production installation you should swap that out with your own configuration.
Expand Down
1 change: 1 addition & 0 deletions go.work
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ go 1.22.1
use (
./opensearch-operator
./opensearch-operator/functionaltests
./operator-sidecar
)
51 changes: 51 additions & 0 deletions jenkins/release.jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,59 @@ pipeline {
sh("git checkout ${ref_final}")

def OPERATOR_PRODUCT = "opensearch-operator"
def SIDECAR_PRODUCT = "operator-sidecar"
def OPERATOR_VERSION = ref_final.substring(1);


// First build and promote image for the sidecar
echo("${SIDECAR_PRODUCT}: ${OPERATOR_VERSION}")

// Build and push to dockerhub staging repo https://hub.docker.com/r/opensearchstaging/opensearch-operator.
dockerBuild: {
build job: 'docker-build',
parameters: [
string(name: 'DOCKER_BUILD_GIT_REPOSITORY', value: "${REPO_URL}"),
string(name: 'DOCKER_BUILD_GIT_REPOSITORY_REFERENCE', value: "${ref_final}"),
string(name: 'DOCKER_BUILD_SCRIPT_WITH_COMMANDS', value: [
'ls -l',
'cd operator-sidecar',
[
'bash',
'../scripts/build-image-multi-arch.sh',
"-v ${OPERATOR_VERSION}",
"-a 'x64,arm,arm64'",
"-f Dockerfile",
"-p ${SIDECAR_PRODUCT}"
].join(' ')
].join(' && ')),
]
}
// This is required to copy the operator to staging ECR https://gallery.ecr.aws/opensearchstaging/opensearch-operator repo as the docker-promotion job does not promote to staging ECR.
dockerCopy: {
build job: 'docker-copy',
parameters: [
string(name: 'SOURCE_IMAGE_REGISTRY', value: 'opensearchstaging'),
string(name: 'SOURCE_IMAGE', value: "${SIDECAR_PRODUCT}:${OPERATOR_VERSION}"),
string(name: 'DESTINATION_IMAGE_REGISTRY', value: 'public.ecr.aws/opensearchstaging'),
string(name: 'DESTINATION_IMAGE', value: "${SIDECAR_PRODUCT}:${OPERATOR_VERSION}")
]
}

// Promote to DockerHub Prod https://hub.docker.com/r/opensearchproject/opensearch-operator repo.
// Promote to ECR Prod https://gallery.ecr.aws/opensearchproject/opensearch-operator repo.
dockerPromotion: {
build job: 'docker-promotion',
parameters: [
string(name: 'SOURCE_IMAGES', value: "${SIDECAR_PRODUCT}:${OPERATOR_VERSION}"),
string(name: 'RELEASE_VERSION', value: "${OPERATOR_VERSION}"),
booleanParam(name: 'DOCKER_HUB_PROMOTE', value: true),
booleanParam(name: 'ECR_PROMOTE', value: true),
booleanParam(name: 'TAG_LATEST', value: true),
booleanParam(name: 'TAG_MAJOR_VERSION', value: false)
]
}

// Then build the actual operator image
echo("${OPERATOR_PRODUCT}: ${OPERATOR_VERSION}")

// Build and push to dockerhub staging repo https://hub.docker.com/r/opensearchstaging/opensearch-operator.
Expand Down
2 changes: 1 addition & 1 deletion opensearch-operator/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Build the manager binary
FROM --platform=$BUILDPLATFORM golang:1.22.1 as builder
FROM --platform=$BUILDPLATFORM golang:1.22.1 AS builder

WORKDIR /workspace
# Copy the Go Modules manifests
Expand Down
7 changes: 7 additions & 0 deletions opensearch-operator/api/v1/opensearch_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ type NodePool struct {
PriorityClassName string `json:"priorityClassName,omitempty"`
Pdb *PdbConfig `json:"pdb,omitempty"`
Probes *ProbesConfig `json:"probes,omitempty"`
OperatorSidecar *OperatorSidecarConfig `json:"operatorSidecar,omitempty"`
}

// PersistencConfig defines options for data persistence
Expand Down Expand Up @@ -175,6 +176,12 @@ type BootstrapConfig struct {
Keystore []KeystoreValue `json:"keystore,omitempty"`
}

type OperatorSidecarConfig struct {
Enable bool `json:"enable,omitempty"`
*ImageSpec `json:",inline,omitempty"`
ReadinessPerPool bool `json:"readinessPerPool,omitempty"`
}

type DashboardsServiceSpec struct {
// +kubebuilder:validation:Enum=ClusterIP;NodePort;LoadBalancer
// +kubebuilder:default=ClusterIP
Expand Down
Loading

0 comments on commit 28b4c36

Please sign in to comment.