From c0414eb00c8b2288604de8a23057c072bef1f94f Mon Sep 17 00:00:00 2001 From: Venkat Date: Mon, 1 Jul 2024 20:55:19 -0700 Subject: [PATCH] updates versions and remove loki/logcli/googledrive/rclone (#43) * feat: update versions * major: remove loki/logcli * major: remove rclone/gdrive as it's not being used --- Dockerfile | 19 +-------- gdrive-backup.sh | 27 ------------ loki-logcli-backup.sh | 96 ------------------------------------------- rclone.conf | 3 -- 4 files changed, 2 insertions(+), 143 deletions(-) delete mode 100755 gdrive-backup.sh delete mode 100755 loki-logcli-backup.sh delete mode 100644 rclone.conf diff --git a/Dockerfile b/Dockerfile index e6c96eb..67fa4a3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,9 @@ # Use an official Ubuntu runtime as a parent image FROM ubuntu:22.04@sha256:19478ce7fc2ffbce89df29fea5725a8d12e57de52eb9ea570890dc5852aac1ac -ENV VERSION_AWS_CLI="2.16.10" -ENV VERSION_GH_CLI="2.51.0" -ENV VERSION_RCLONE="1.67.0" +ENV VERSION_AWS_CLI="2.17.6" +ENV VERSION_GH_CLI="2.52.0" ENV VERSION_VAULT="1.14.10" -ENV VERSION_LOKI="2.9.8" # Update the system and install required packages RUN apt-get update -y && \ @@ -26,28 +24,15 @@ RUN curl --proto =https -fsSL https://releases.hashicorp.com/vault/${VERSION_VAU chmod +x /usr/bin/vault && \ rm vault.zip -# Install loki's logcli -RUN curl --proto =https -L -o logcli-linux-amd64.zip https://github.com/grafana/loki/releases/download/v${VERSION_LOKI}/logcli-linux-amd64.zip \ - && unzip logcli-linux-amd64.zip \ - && mv logcli-linux-amd64 /usr/bin/logcli \ - && rm logcli-linux-amd64.zip - # Install GitHub CLI RUN curl --proto =https -LO https://github.com/cli/cli/releases/download/v${VERSION_GH_CLI}/gh_${VERSION_GH_CLI}_linux_amd64.deb && \ dpkg -i gh_${VERSION_GH_CLI}_linux_amd64.deb && \ rm gh_${VERSION_GH_CLI}_linux_amd64.deb -RUN curl --proto =https -LO https://github.com/rclone/rclone/releases/download/v${VERSION_RCLONE}/rclone-v${VERSION_RCLONE}-linux-amd64.deb && \ - dpkg -i rclone-v${VERSION_RCLONE}-linux-amd64.deb && \ - rm rclone-v${VERSION_RCLONE}-linux-amd64.deb -ADD rclone.conf /root/.config/rclone/rclone.conf - # Set working directory in the container RUN mkdir /app ADD github-backup.sh /usr/bin/backup-github -ADD gdrive-backup.sh /usr/bin/backup-gdrive -ADD loki-logcli-backup.sh /usr/bin/backup-loki-logs-as-json ADD vault-backup.sh /usr/bin/backup-vault ADD s3-backup.sh /usr/bin/s3-backup diff --git a/gdrive-backup.sh b/gdrive-backup.sh deleted file mode 100755 index 2af505c..0000000 --- a/gdrive-backup.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -set -e - -# Check if required variables are set -if [[ -z "${RCLONE_DRIVE_SERVICE_ACCOUNT_CREDENTIALS}" ]]; then - echo "Error: RCLONE_DRIVE_SERVICE_ACCOUNT_CREDENTIALS is not set." - exit 1 -fi - -if [[ -z "${RCLONE_DRIVE_TEAM_DRIVE}" ]]; then - echo "Error: RCLONE_DRIVE_TEAM_DRIVE is not set." - exit 1 -fi - - -BACKUP_DATE=$(date '+%Y-%m-%d') -BACKUP_LOCATION="google_drive_team_drives/$BACKUP_DATE/$RCLONE_DRIVE_TEAM_DRIVE" -mkdir -p $BACKUP_LOCATION - -rclone copy -P --transfers=100 gdrive: "${BACKUP_LOCATION}" -tar -cf "${BACKUP_LOCATION}.tar" ${BACKUP_LOCATION} && rm -rf "${BACKUP_LOCATION}" - - -echo "Uploading everything to S3...." -cd /app -aws s3 cp --recursive google_drive_team_drives/ s3://${S3_BUCKET_NAME}/google_drive_team_drives/ \ No newline at end of file diff --git a/loki-logcli-backup.sh b/loki-logcli-backup.sh deleted file mode 100755 index 15efbb8..0000000 --- a/loki-logcli-backup.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash -set -e - -cleanup() { - echo "Cleaning up..." - rm -rf *.part* || true -} - -cleanup - -s3_key_prefix="${CAPTAIN_DOMAIN}/loki_exported_logs" - -aws sts get-caller-identity - -# Check to see if LOGCLI and LOKI API/SERVER are running the same version. We don't want to have a weird bug caused by a version mismatch. -LOGCLI_VERSION=$(logcli --version 2>&1 | grep -oP 'logcli, version \K[0-9.]+' | awk '{print $1}') -LOKI_SERVER_VERSION=$(curl $LOKI_ADDR/loki/api/v1/status/buildinfo -s | jq .version -r) - -loki_version_info() { - echo "LOGCLI_VERSION: $LOGCLI_VERSION" - echo "LOKI_SERVER_VERSION: $LOKI_SERVER_VERSION" -} - -if [ "$LOGCLI_VERSION" != "$LOKI_SERVER_VERSION" ]; then - echo "ERROR: The LOGCLI and Loki API versions do not match. Exiting the script." - loki_version_info - exit 1 -fi - -ERRORS=0 - -# Loop through the last 72 hours, excluding the most recent 2 hours -for i in {2..72}; do - echo "Processing hour $i" - - # Get the date and time - now=$(date -u -d "${i} hours ago" '+%Y-%m-%dT%H:00:00Z') - start_time=$(date -u -d "$now - 1 hour" '+%Y-%m-%dT%H:00:00Z') - echo "start_time: $start_time" - end_time=$(date -u -d "$now" '+%Y-%m-%dT%H:00:00Z') - echo "end_time: $end_time" - - # Prepare part file name - prefix_file_name="loki_v${LOGCLI_VERSION//./-}__" - time_window_of_logs="$(date -u -d "$start_time" '+%Y%m%dT%H%M%S')_$(date -u -d "$end_time" '+%Y%m%dT%H%M%S').part" - part_file="${prefix_file_name}${time_window_of_logs}" - echo "part_file: $part_file" - - # Prepare S3 path - S3_FOLDER_PATH="${s3_key_prefix}/$(date -u -d "$start_time" '+%Y/%m/%d/%H')/" - s3_path="${S3_FOLDER_PATH}${part_file}.gz" - echo "s3_path: $s3_path" - - # Check if the file already exists in S3 and has been replicated. - FILE_EXISTS=0 - existing_files=$(aws s3api list-objects --bucket "$S3_BUCKET_NAME" --prefix "$S3_FOLDER_PATH" --query 'Contents[].Key' --output text) - for file in $existing_files; do - if [[ $file == *"$time_window_of_logs"* ]]; then - echo "Skipping the time window: ${time_window_of_logs} as it is already covered by the ${file} within S3." - FILE_EXISTS=1 - fi - done - - - if [ $FILE_EXISTS -eq 0 ]; then - echo "The ${file} does not exist in S3. Starting to fetch logs from loki now..." - - # Query Loki and create part file. The part file will be created in the current directory. - logcli query '{job=~".+"}' --output jsonl --timezone=UTC --tls-skip-verify --from "$start_time" --to "$end_time" --parallel-max-workers=2 --parallel-duration=120m --part-path-prefix=$(pwd)/$prefix_file_name - - # Check for multiple part files. This should never since each parallel-duration is 2 hours which exceeds the requested time range of 1 hour. - part_files_count=$(ls -1 *.part 2>/dev/null | wc -l) - - if [ $part_files_count -gt 1 ]; then - echo "Error: Found multiple part files. There should only be 1 part file. Skipping to the next hour." - ERRORS += 1 - cleanup - continue - fi - - part_file=$(ls *.part | head -n 1) - - # Gzip and upload the part file to S3 - gzip "$part_file" - aws s3 cp "${part_file}.gz" "s3://${S3_BUCKET_NAME}/${s3_path}" - echo "The ${file} has been uploaded to S3." - - fi - cleanup - -done - -if [ "$ERRORS" -gt 0 ]; then - echo "ERROR: Found $ERRORS errors. Exiting the script." - exit 1 -fi \ No newline at end of file diff --git a/rclone.conf b/rclone.conf deleted file mode 100644 index a4d3f8a..0000000 --- a/rclone.conf +++ /dev/null @@ -1,3 +0,0 @@ -[gdrive] -type = drive -scope = drive.readonly \ No newline at end of file