diff --git a/tools/python/timestream-compute-units-testing/README.md b/tools/python/timestream-compute-units-testing/README.md index b74ca3bd..df9c1a6f 100644 --- a/tools/python/timestream-compute-units-testing/README.md +++ b/tools/python/timestream-compute-units-testing/README.md @@ -34,4 +34,28 @@ We provided scripts to create Timestream for LiveAnalytics resource (database an Binning, grouping, and ordering for given host. A relatively more resource intensive query than lastpoint-query ```sql select bin(time, 1m) AS binned_time, max(cpu_utilization) as max_cpu_utilization from "devops"."sample_devops" where time > ago(10m) and hostname='host2' group by bin(time, 1m) order by binned_time asc - ``` \ No newline at end of file + ``` + + + +Following graphs show Latency Percentiles (seconds), Queries Per Minute, Throttling Counts vs number of workers. + +select last point with 4 TCU configuration + +We start with 7 workers (users) and continue to increase the number of users accessing the data and we notice that with just 4 TCUs, the service supported approximately 4830 queries per minute (approximately 81 Queries per second) with p90 latency of less than 150ms. As the number of Grafana users/workers increase, we see an increase in the latency, and thereby a decrease in the number of queries per minute but zero throttles. This is because the default SDK maximum retries set to 3 (a best practice) and SDK retries the queries before throttling. If sub-500ms performance query latency is acceptable for your use case, you could serve up to 35 concurrent users and 4000+ queries per minute with 4 TCU. +select last point with 8 TCU configuration : We increase the MaxQueryTCU to 8 and rerun the test. (8 TCUs might not immediately allocated, based on the workload the service automatically scales, there is chance the initial results are only for 4 TCUs) + + + + +We observed with 8 TCUs the service supported approximately 5000 queries. If your requirement of p99 is sub second performance, you could serve 50 concurrent users and 5000 queries per minute with 8 TCU. Based, on the use-case you could have lesser number of queries which may yield p99 to sub 500 milliseconds as well. + + + +binning and grouping with 4 TCU configuration + +With 4 TCUs, you can analyze the metrics of 2750+ hosts in a minute with 30 concurrent workers (Grafana users), and maximum of approximately 3346 queries with 14 concurrent workers at 55 QPS. As the number of Grafana users increase, so does the latency and thereby fewer queries per minute. + +binning and grouping with 8 TCU configuration + +With 8 TCUs, you can analyze the metrics of 3750+ hosts in a minute for 30 concurrent Grafana users. diff --git a/tools/python/timestream-compute-units-testing/last_point-query.ipynb b/tools/python/timestream-compute-units-testing/last_point-query.ipynb new file mode 100644 index 00000000..7381790e --- /dev/null +++ b/tools/python/timestream-compute-units-testing/last_point-query.ipynb @@ -0,0 +1,187 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "e33a646c", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "import boto3\n", + "import threading\n", + "import time\n", + "from datetime import datetime\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from botocore.exceptions import ClientError\n", + "\n", + "# Initialize the Timestream client\n", + "client = boto3.client('timestream-query', region_name='us-east-2')\n", + "\n", + "# List of different Timestream queries\n", + "queries = [\n", + " f'select memory from \"devops\".\"sample_devops\" where time > ago(10m) and hostname=\\'host1\\' order by time desc limit 1',\n", + "]\n", + "\n", + "# Function to run a single query and measure its duration\n", + "def run_query(query, worker_id, worker_results,throttling_results):\n", + " start_time = time.time()\n", + " worker_results[worker_id] = []\n", + " end_time = time.time() + duration_seconds\n", + " throttling_results[worker_id] = []\n", + " throttling_count = []\n", + " results_dict = {}\n", + " while time.time() < end_time:\n", + " try:\n", + " start = time.time()\n", + " response = client.query(QueryString=query)\n", + " duration = time.time() - start\n", + " worker_results[worker_id].append((duration))\n", + " except ClientError as e:\n", + " if e.response['Error']['Code'] == 'ThrottlingException':\n", + " throttling_results[worker_id].append(\"ThrottlingException\")\n", + " else:\n", + " raise e \n", + "\n", + "\n", + "# Function to run queries in parallel using threads\n", + "def run_parallel_queries(duration_seconds, queries, users):\n", + " end_time = time.time() + duration_seconds\n", + " total_queries = 0\n", + " query_durations = []\n", + " throttling_count = {}\n", + " worker_results = {}\n", + " throttling_results = {}\n", + " threads = []\n", + "\n", + " print(f\"\\nLoad Test Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", + "\n", + "\n", + " for i in range(users):\n", + " worker_id = i\n", + " thread = threading.Thread(target=run_query, args=(queries[i % len(queries)], worker_id, worker_results, throttling_results))\n", + " threads.append(thread)\n", + " thread.start()\n", + "\n", + " for thread in threads:\n", + " thread.join()\n", + "\n", + " \n", + " query_durations = [value for worker_results in worker_results.values() for value in worker_results]\n", + " total_queries = len(query_durations)\n", + " throttling_count=(sum(len(exceptions_list) for exceptions_list in throttling_results.values()))\n", + " \n", + "\n", + " print(f\"Load Test End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", + " \n", + " if query_durations:\n", + " p50 = np.percentile(query_durations, 50)\n", + " p90 = np.percentile(query_durations, 90)\n", + " p99 = np.percentile(query_durations, 99)\n", + " else:\n", + " p50 = p90 = p99 = None \n", + "\n", + " \n", + " return total_queries, p50, p90, p99, throttling_count\n", + "\n", + "\n", + "users_count = [1,2,3,4,5,6,7,9,11,13,15,17,21] \n", + "duration_seconds = 60\n", + "\n", + "results = []\n", + "\n", + "# Run the tests for different worker counts\n", + "for users in users_count:\n", + " total_queries, p50, p90, p99, throttling_count = run_parallel_queries(duration_seconds, queries, users)\n", + " results.append((users, total_queries, p50, p90, p99, throttling_count))\n", + " print(f\"num_users: {users}\")\n", + " print(f\"Total number of queries run in {duration_seconds} seconds: {total_queries}\")\n", + " print(f\"p50 (50th percentile) of query durations: {p50:.2f} seconds\")\n", + " print(f\"p90 (90th percentile) of query durations: {p90:.2f} seconds\")\n", + " print(f\"p99 (99th percentile) of query durations: {p99:.2f} seconds\")\n", + " print(f\"Throttling count: {throttling_count}\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1aedc0c2", + "metadata": {}, + "outputs": [], + "source": [ + "total_queries = [result[1] for result in results]\n", + "p50s = [result[2] for result in results]\n", + "p90s = [result[3] for result in results]\n", + "p99s = [result[4] for result in results]\n", + "throttling_counts = [result[5] for result in results]\n", + "\n", + "plt.figure(figsize=(12, 8))\n", + "\n", + "# Plot latency percentiles\n", + "plt.subplot(3, 1, 1)\n", + "plt.plot(users_count, p50s, label='p50')\n", + "plt.plot(users_count, p90s, label='p90')\n", + "plt.plot(users_count, p99s, label='p99')\n", + "plt.xlabel('Number of Users')\n", + "plt.ylabel('Latency (seconds)')\n", + "plt.xticks(users_count)\n", + "plt.title('Latency Percentiles')\n", + "plt.legend()\n", + "\n", + "# Plot Queries Per Minute (QPM)\n", + "plt.subplot(3, 1, 2)\n", + "qpm = [q / (duration_seconds / 60) for q in total_queries]\n", + "plt.plot(users_count, qpm, label='Queries Per Minute (QPM)')\n", + "plt.xlabel('Number of Users')\n", + "plt.ylabel('Queries Per Minute')\n", + "plt.xticks(users_count)\n", + "plt.title('Queries Per Minute')\n", + "plt.legend()\n", + "\n", + "# Plot Throttling Counts\n", + "plt.subplot(3, 1, 3)\n", + "plt.plot(users_count, throttling_counts, label='Throttling Count', color='red')\n", + "plt.xlabel('Number of Users')\n", + "plt.ylabel('Throttling Count')\n", + "plt.xticks(users_count)\n", + "plt.title('Throttling Count')\n", + "plt.legend()\n", + "\n", + "plt.tight_layout()\n", + "plt.show() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "258db7a6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_python3", + "language": "python", + "name": "conda_python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tools/python/timestream-compute-units-testing/lastpoint-query.ipynb b/tools/python/timestream-compute-units-testing/lastpoint-query.ipynb deleted file mode 100644 index 76ffe12f..00000000 --- a/tools/python/timestream-compute-units-testing/lastpoint-query.ipynb +++ /dev/null @@ -1,232 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "d55b79d3", - "metadata": {}, - "outputs": [], - "source": [ - "import boto3\n", - "import concurrent.futures\n", - "import time\n", - "from datetime import datetime\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from botocore.exceptions import ClientError\n", - "\n", - "# Initialize the Timestream client\n", - "client = boto3.client('timestream-query', region_name='us-east-2')\n", - "\n", - "# List of different Timestream queries\n", - "queries = [\n", - " f'select memory from \"devops\".\"sample_devops\" where time > ago(10m) and hostname=\\'host1\\' order by time desc limit 1',\n", - " ]\n", - "\n", - "# Function to run a single query and measure its duration\n", - "def run_query(query, worker_id):\n", - " start_time = time.time()\n", - " try:\n", - " response = client.query(QueryString=query)\n", - " duration = time.time() - start_time\n", - " return worker_id, response, duration, None\n", - " except ClientError as e:\n", - " duration = time.time() - start_time\n", - " if e.response['Error']['Code'] == 'ThrottlingException':\n", - " return worker_id, None, duration, 'ThrottlingException'\n", - " else:\n", - " raise e\n", - "\n", - "# Function to run queries in parallel\n", - "def run_parallel_queries(duration_seconds, queries, max_workers):\n", - " end_time = time.time() + duration_seconds\n", - " total_queries = 0\n", - " query_durations = []\n", - " throttling_count = 0\n", - "\n", - " print(f\"\\nStart time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", - "\n", - " with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:\n", - " while time.time() < end_time:\n", - " # Launch each query in parallel with a worker ID\n", - " futures = {executor.submit(run_query, queries[i % len(queries)], i): i for i in range(max_workers)}\n", - "\n", - " # Process results\n", - " for future in concurrent.futures.as_completed(futures):\n", - " try:\n", - " worker_id, result, duration, error = future.result()\n", - " query_durations.append(duration)\n", - " total_queries += 1\n", - " if error == 'ThrottlingException':\n", - " throttling_count += 1\n", - " #print(f\"Worker {worker_id}: Duration: {duration:.2f} seconds, results: {result}\")\n", - " except Exception as e:\n", - " print(f\"Worker {worker_id}: Query failed: {e}\")\n", - "\n", - " print(f\"End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", - "\n", - " if query_durations:\n", - " p50 = np.percentile(query_durations, 50)\n", - " p90 = np.percentile(query_durations, 90)\n", - " p99 = np.percentile(query_durations, 99)\n", - " else:\n", - " p50 = p90 = p99 = None\n", - "\n", - " return total_queries, p50, p90, p99, throttling_count\n", - "\n", - "# Number of threads (workers) to test\n", - "worker_counts = [7, 14, 21, 28, 42, 50, 60]\n", - "duration_seconds = 60\n", - "\n", - "results = []\n", - "\n", - "# Run the tests for different worker counts\n", - "for max_workers in worker_counts:\n", - " total_queries, p50, p90, p99, throttling_count = run_parallel_queries(duration_seconds, queries, max_workers)\n", - " results.append((max_workers, total_queries, p50, p90, p99, throttling_count))\n", - " print(f\"num_workers: {max_workers}\")\n", - " print(f\"Total number of queries run in {duration_seconds} seconds: {total_queries}\")\n", - " print(f\"p50 (50th percentile) of query durations: {p50:.2f} seconds\")\n", - " print(f\"p90 (90th percentile) of query durations: {p90:.2f} seconds\")\n", - " print(f\"p99 (99th percentile) of query durations: {p99:.2f} seconds\")\n", - " print(f\"Throttling count: {throttling_count}\")\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "48f18dcd", - "metadata": {}, - "outputs": [], - "source": [ - "worker_counts = [result[0] for result in results]\n", - "total_queries = [result[1] for result in results]\n", - "p50s = [result[2] for result in results]\n", - "p90s = [result[3] for result in results]\n", - "p99s = [result[4] for result in results]\n", - "throttling_counts = [result[5] for result in results]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "123c3d90", - "metadata": {}, - "outputs": [], - "source": [ - "print(worker_counts)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55f0a802", - "metadata": {}, - "outputs": [], - "source": [ - "print(total_queries)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "52a4013f", - "metadata": {}, - "outputs": [], - "source": [ - "print(p50s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "203ed0b4", - "metadata": {}, - "outputs": [], - "source": [ - "print(p90s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a07cca8", - "metadata": {}, - "outputs": [], - "source": [ - "print(p99s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a7363b0b", - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 8))\n", - "\n", - "# Plot latency percentiles\n", - "plt.subplot(3, 1, 1)\n", - "plt.plot(worker_counts, p50s, label='p50')\n", - "plt.plot(worker_counts, p90s, label='p90')\n", - "plt.plot(worker_counts, p99s, label='p99')\n", - "plt.xlabel('Number of Workers')\n", - "plt.ylabel('Latency (seconds)')\n", - "plt.title('Latency Percentiles')\n", - "plt.legend()\n", - "\n", - "# Plot Queries Per Minute (QPM)\n", - "plt.subplot(3, 1, 2)\n", - "qpm = [q / (duration_seconds / 60) for q in total_queries]\n", - "plt.plot(worker_counts, qpm, label='Queries Per Minute (QPM)')\n", - "plt.xlabel('Number of Workers')\n", - "plt.ylabel('Queries Per Minute')\n", - "plt.title('Queries Per Minute')\n", - "plt.legend()\n", - "\n", - "# Plot Throttling Counts\n", - "plt.subplot(3, 1, 3)\n", - "plt.plot(worker_counts, throttling_counts, label='Throttling Count', color='red')\n", - "plt.xlabel('Number of Workers')\n", - "plt.ylabel('Throttling Count')\n", - "plt.title('Throttling Count')\n", - "plt.legend()\n", - "\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c78828c", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "conda_python3", - "language": "python", - "name": "conda_python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tools/python/timestream-compute-units-testing/single-groupby-orderby-query.ipynb b/tools/python/timestream-compute-units-testing/single-groupby-orderby-query.ipynb new file mode 100644 index 00000000..8886bf90 --- /dev/null +++ b/tools/python/timestream-compute-units-testing/single-groupby-orderby-query.ipynb @@ -0,0 +1,185 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "e33a646c", + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "import boto3\n", + "import threading\n", + "import time\n", + "from datetime import datetime\n", + "import numpy as np\n", + "from botocore.exceptions import ClientError\n", + "\n", + "# Initialize the Timestream client\n", + "client = boto3.client('timestream-query', region_name='us-east-2')\n", + "\n", + "# List of different Timestream queries\n", + "queries = [\n", + " f'select BIN(time, 1m) AS binned_time, max(cpu_utilization) as max_cpu_utilization from \"devops\".\"sample_devops\" where time > ago(10m) and hostname=\\'host2\\' group by BIN(time, 1m) order by binned_time asc',\n", + "]\n", + "\n", + "# Function to run a single query and measure its duration\n", + "def run_query(query, worker_id, worker_results,throttling_results):\n", + " start_time = time.time()\n", + " worker_results[worker_id] = []\n", + " end_time = time.time() + duration_seconds\n", + " throttling_results[worker_id] = []\n", + " throttling_count = []\n", + " results_dict = {}\n", + " while time.time() < end_time:\n", + " try:\n", + " start = time.time()\n", + " response = client.query(QueryString=query)\n", + " duration = time.time() - start\n", + " worker_results[worker_id].append((duration))\n", + " except ClientError as e:\n", + " if e.response['Error']['Code'] == 'ThrottlingException':\n", + " throttling_results[worker_id].append(\"ThrottlingException\")\n", + " else:\n", + " raise e \n", + "\n", + "\n", + "# Function to run queries in parallel using threads\n", + "def run_parallel_queries(duration_seconds, queries, users):\n", + " end_time = time.time() + duration_seconds\n", + " total_queries = 0\n", + " query_durations = []\n", + " throttling_count = {}\n", + " worker_results = {}\n", + " throttling_results = {}\n", + " threads = []\n", + "\n", + " print(f\"\\nLoad Test Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", + "\n", + "\n", + " for i in range(users):\n", + " worker_id = i\n", + " thread = threading.Thread(target=run_query, args=(queries[i % len(queries)], worker_id, worker_results, throttling_results))\n", + " threads.append(thread)\n", + " thread.start()\n", + "\n", + " for thread in threads:\n", + " thread.join()\n", + "\n", + " \n", + " query_durations = [value for worker_results in worker_results.values() for value in worker_results]\n", + " total_queries = len(query_durations)\n", + " throttling_count=(sum(len(exceptions_list) for exceptions_list in throttling_results.values()))\n", + " \n", + "\n", + " print(f\"Load Test End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", + " \n", + " if query_durations:\n", + " p50 = np.percentile(query_durations, 50)\n", + " p90 = np.percentile(query_durations, 90)\n", + " p99 = np.percentile(query_durations, 99)\n", + " else:\n", + " p50 = p90 = p99 = None \n", + "\n", + " \n", + " return total_queries, p50, p90, p99, throttling_count\n", + "\n", + "\n", + "users_count = [1,2,3,4,5,6,7,9,11,13,15,17,21] \n", + "duration_seconds = 60\n", + "\n", + "results = []\n", + "\n", + "# Run the tests for different worker counts\n", + "for users in users_count:\n", + " total_queries, p50, p90, p99, throttling_count = run_parallel_queries(duration_seconds, queries, users)\n", + " results.append((users, total_queries, p50, p90, p99, throttling_count))\n", + " print(f\"num_users: {users}\")\n", + " print(f\"Total number of queries run in {duration_seconds} seconds: {total_queries}\")\n", + " print(f\"p50 (50th percentile) of query durations: {p50:.2f} seconds\")\n", + " print(f\"p90 (90th percentile) of query durations: {p90:.2f} seconds\")\n", + " print(f\"p99 (99th percentile) of query durations: {p99:.2f} seconds\")\n", + " print(f\"Throttling count: {throttling_count}\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1aedc0c2", + "metadata": {}, + "outputs": [], + "source": [ + "total_queries = [result[1] for result in results]\n", + "p50s = [result[2] for result in results]\n", + "p90s = [result[3] for result in results]\n", + "p99s = [result[4] for result in results]\n", + "throttling_counts = [result[5] for result in results]\n", + "\n", + "plt.figure(figsize=(12, 8))\n", + "\n", + "# Plot latency percentiles\n", + "plt.subplot(3, 1, 1)\n", + "plt.plot(users_count, p50s, label='p50')\n", + "plt.plot(users_count, p90s, label='p90')\n", + "plt.plot(users_count, p99s, label='p99')\n", + "plt.xlabel('Number of Users')\n", + "plt.ylabel('Latency (seconds)')\n", + "plt.xticks(users_count)\n", + "plt.title('Latency Percentiles')\n", + "plt.legend()\n", + "\n", + "# Plot Queries Per Minute (QPM)\n", + "plt.subplot(3, 1, 2)\n", + "qpm = [q / (duration_seconds / 60) for q in total_queries]\n", + "plt.plot(users_count, qpm, label='Queries Per Minute (QPM)')\n", + "plt.xlabel('Number of Users')\n", + "plt.ylabel('Queries Per Minute')\n", + "plt.xticks(users_count)\n", + "plt.title('Queries Per Minute')\n", + "plt.legend()\n", + "\n", + "# Plot Throttling Counts\n", + "plt.subplot(3, 1, 3)\n", + "plt.plot(users_count, throttling_counts, label='Throttling Count', color='red')\n", + "plt.xlabel('Number of Users')\n", + "plt.ylabel('Throttling Count')\n", + "plt.xticks(users_count)\n", + "plt.title('Throttling Count')\n", + "plt.legend()\n", + "plt.tight_layout()\n", + "plt.show() " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "258db7a6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_python3", + "language": "python", + "name": "conda_python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tools/python/timestream-compute-units-testing/single-groupby-orderby.ipynb b/tools/python/timestream-compute-units-testing/single-groupby-orderby.ipynb deleted file mode 100644 index 7ca5f5bc..00000000 --- a/tools/python/timestream-compute-units-testing/single-groupby-orderby.ipynb +++ /dev/null @@ -1,234 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "d55b79d3", - "metadata": {}, - "outputs": [], - "source": [ - "import boto3\n", - "import concurrent.futures\n", - "import time\n", - "from datetime import datetime\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "from botocore.exceptions import ClientError\n", - "\n", - "# Initialize the Timestream client\n", - "client = boto3.client('timestream-query', region_name='us-east-2')\n", - "\n", - "# List of different Timestream queries\n", - "queries = [\n", - " f'select BIN(time, 1m) AS binned_time, max(cpu_utilization) as max_cpu_utilization from \"devops\".\"sample_devops\" where time > ago(10m) and hostname=\\'host2\\' group by BIN(time, 1m) order by binned_time asc',\n", - "]\n", - "\n", - "# Function to run a single query and measure its duration\n", - "def run_query(query, worker_id):\n", - " start_time = time.time()\n", - " try:\n", - " response = client.query(QueryString=query)\n", - " duration = time.time() - start_time\n", - " return worker_id, response, duration, None\n", - " except ClientError as e:\n", - " duration = time.time() - start_time\n", - " if e.response['Error']['Code'] == 'ThrottlingException':\n", - " return worker_id, None, duration, 'ThrottlingException'\n", - " else:\n", - " raise e\n", - "\n", - "# Function to run queries in parallel\n", - "def run_parallel_queries(duration_seconds, queries, max_workers):\n", - " end_time = time.time() + duration_seconds\n", - " total_queries = 0\n", - " query_durations = []\n", - " throttling_count = 0\n", - "\n", - " print(f\"\\nStart time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", - "\n", - " with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:\n", - " while time.time() < end_time:\n", - " # Launch each query in parallel with a worker ID\n", - " futures = {executor.submit(run_query, queries[i % len(queries)], i): i for i in range(max_workers)}\n", - "\n", - " # Process results\n", - " for future in concurrent.futures.as_completed(futures):\n", - " try:\n", - " worker_id, result, duration, error = future.result()\n", - " query_durations.append(duration)\n", - " total_queries += 1\n", - " if error == 'ThrottlingException':\n", - " throttling_count += 1\n", - " #print(f\"Worker {worker_id}: Duration: {duration:.2f} seconds, Error: {error}\")\n", - " except Exception as e:\n", - " print(f\"Worker {worker_id}: Query failed: {e}\")\n", - "\n", - " print(f\"End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n", - "\n", - " if query_durations:\n", - " p50 = np.percentile(query_durations, 50)\n", - " p90 = np.percentile(query_durations, 90)\n", - " p99 = np.percentile(query_durations, 99)\n", - " else:\n", - " p50 = p90 = p99 = None\n", - "\n", - " return total_queries, p50, p90, p99, throttling_count\n", - "\n", - "# Number of threads (workers) to test\n", - "worker_counts = [7, 14, 21, 28, 42, 50, 60]\n", - "duration_seconds = 60\n", - "\n", - "results = []\n", - "\n", - "# Run the tests for different worker counts\n", - "for max_workers in worker_counts:\n", - " total_queries, p50, p90, p99, throttling_count = run_parallel_queries(duration_seconds, queries, max_workers)\n", - " results.append((max_workers, total_queries, p50, p90, p99, throttling_count))\n", - " print(f\"num_workers: {max_workers}\")\n", - " print(f\"Total number of queries run in {duration_seconds} seconds: {total_queries}\")\n", - " print(f\"p50 (50th percentile) of query durations: {p50:.2f} seconds\")\n", - " print(f\"p90 (90th percentile) of query durations: {p90:.2f} seconds\")\n", - " print(f\"p99 (99th percentile) of query durations: {p99:.2f} seconds\")\n", - " print(f\"Throttling count: {throttling_count}\")\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "48f18dcd", - "metadata": {}, - "outputs": [], - "source": [ - "worker_counts = [result[0] for result in results]\n", - "total_queries = [result[1] for result in results]\n", - "p50s = [result[2] for result in results]\n", - "p90s = [result[3] for result in results]\n", - "p99s = [result[4] for result in results]\n", - "throttling_counts = [result[5] for result in results]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "123c3d90", - "metadata": {}, - "outputs": [], - "source": [ - "print(worker_counts)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55f0a802", - "metadata": {}, - "outputs": [], - "source": [ - "print(total_queries)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "52a4013f", - "metadata": {}, - "outputs": [], - "source": [ - "print(p50s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "203ed0b4", - "metadata": {}, - "outputs": [], - "source": [ - "print(p90s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a07cca8", - "metadata": {}, - "outputs": [], - "source": [ - "print(p99s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a7363b0b", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "plt.figure(figsize=(12, 8))\n", - "\n", - "# Plot latency percentiles\n", - "plt.subplot(3, 1, 1)\n", - "plt.plot(worker_counts, p50s, label='p50')\n", - "plt.plot(worker_counts, p90s, label='p90')\n", - "plt.plot(worker_counts, p99s, label='p99')\n", - "plt.xlabel('Number of Workers')\n", - "plt.ylabel('Latency (seconds)')\n", - "plt.title('Latency Percentiles')\n", - "plt.legend()\n", - "\n", - "# Plot Queries Per Minute (QPM)\n", - "plt.subplot(3, 1, 2)\n", - "qpm = [q / (duration_seconds / 60) for q in total_queries]\n", - "plt.plot(worker_counts, qpm, label='Queries Per Minute (QPM)')\n", - "plt.xlabel('Number of Workers')\n", - "plt.ylabel('Queries Per Minute')\n", - "plt.title('Queries Per Minute')\n", - "plt.legend()\n", - "\n", - "# Plot Throttling Counts\n", - "plt.subplot(3, 1, 3)\n", - "plt.plot(worker_counts, throttling_counts, label='Throttling Count', color='red')\n", - "plt.xlabel('Number of Workers')\n", - "plt.ylabel('Throttling Count')\n", - "plt.title('Throttling Count')\n", - "plt.legend()\n", - "\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c78828c", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "conda_python3", - "language": "python", - "name": "conda_python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.14" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}