From 9b01b00a45f572f1373d74a2757fa6a03a64b675 Mon Sep 17 00:00:00 2001
From: Anagha Mittal <43825882+anaghamittal@users.noreply.github.com>
Date: Mon, 10 Feb 2025 19:41:13 +0530
Subject: [PATCH] 3723 docs rfccreate a doc showing how to integrate amazon
sagemaker with timescale cloud (#3789)
* chore: update the sagemaker doc
---
.../integrations/amazon-sagemaker.md | 147 ++++++++++++++++++
use-timescale/integrations/apache-airflow.md | 4 +-
.../integrations/azure-data-studio.md | 2 +-
use-timescale/integrations/index.md | 10 +-
use-timescale/page-index/page-index.js | 5 +
5 files changed, 161 insertions(+), 7 deletions(-)
create mode 100644 use-timescale/integrations/amazon-sagemaker.md
diff --git a/use-timescale/integrations/amazon-sagemaker.md b/use-timescale/integrations/amazon-sagemaker.md
new file mode 100644
index 0000000000..bc4c4c2810
--- /dev/null
+++ b/use-timescale/integrations/amazon-sagemaker.md
@@ -0,0 +1,147 @@
+---
+title: Integrate Amazon Sagemaker with Timescale Cloud
+excerpt: Integrate Amazon SageMaker with Timescale Cloud to store and analyze ML model data.
+products: [cloud, mst, self_hosted]
+keywords: [connect, integrate, amazon, aws, sagemaker]
+---
+
+import IntegrationPrereqs from "versionContent/_partials/_integration-prereqs.mdx";
+
+# Integrate Amazon SageMaker with $CLOUD_LONG
+
+[Amazon SageMaker AI][Amazon Sagemaker] is a fully managed machine learning (ML) service. With SageMaker AI, data
+scientists and developers can quickly and confidently build, train, and deploy ML models into a production-ready
+hosted environment.
+
+This page shows you how to integrate Amazon Sagemaker with a $SERVICE_LONG.
+
+## Prerequisites
+
+
+
+* Setup an [AWS Account][aws-sign-up]
+
+## Prepare your $SERVICE_LONG to ingest data from SageMaker
+
+Create a table in $SERVICE_LONG to store model predictions generated by SageMaker.
+
+
+
+1. **Connect to your $SERVICE_LONG**
+
+ For $CLOUD_LONG, open an [SQL editor][run-queries] in [$CONSOLE][open-console]. For self-hosted, use [`psql`][psql].
+
+ ```sql
+ CREATE TABLE model_predictions (
+ time TIMESTAMPTZ NOT NULL,
+ model_name TEXT NOT NULL,
+ prediction DOUBLE PRECISION NOT NULL
+ );
+ ```
+
+1. **For better performance and easier real-time analytics, convert the table to a hypertable**
+
+ [Hypertables][about-hypertables] are PostgreSQL tables that automatically partition your data by time. You interact
+ with hypertables in the same way as regular PostgreSQL tables, but with extra features that makes managing your
+ time-series data much easier.
+
+ ```sql
+ SELECT create_hypertable('model_predictions', 'time');
+ ```
+
+
+
+## Create the code to inject data into a $SERVICE_LONG
+
+
+
+1. **Create a SageMaker Notebook instance**
+
+ 1. In [Amazon SageMaker > Notebooks and Git repos][aws-notebooks-git-repos], click `Create Notebook instance`.
+ 1. Follow the wizard to create a default Notebook instance.
+
+1. **Write a Notebook script that inserts data into your $SERVICE_LONG**
+
+ 1. When your Notebook instance is `inService,` click `Open JupyterLab` and click `conda_python3`.
+ 1. Update the following script with your [connection details][connection-info], then paste it in the Notebook.
+
+ ```python
+ import psycopg2
+ from datetime import datetime
+
+ def insert_prediction(model_name, prediction, host, port, user, password, dbname):
+ conn = psycopg2.connect(
+ host=host,
+ port=port,
+ user=user,
+ password=password,
+ dbname=dbname
+ )
+ cursor = conn.cursor()
+
+ query = """
+ INSERT INTO model_predictions (time, model_name, prediction)
+ VALUES (%s, %s, %s);
+ """
+
+ values = (datetime.utcnow(), model_name, prediction)
+ cursor.execute(query, values)
+ conn.commit()
+
+ cursor.close()
+ conn.close()
+
+ # Example usage
+ insert_prediction(
+ model_name="example_model",
+ prediction=0.95,
+ host="",
+ port="",
+ user="",
+ password="",
+ dbname=""
+ )
+ ```
+
+1. **Test your SageMaker script**
+
+ 1. Run the script in your SageMaker notebook.
+ 1. Verify that the data is in your $SERVICE_SHORT
+
+ Open an [SQL editor][run-queries] and check the `sensor_data` table:
+
+ ```sql
+ SELECT * FROM model_predictions;
+ ```
+ You see something like:
+
+ |time | model_name | prediction |
+ | -- | -- | -- |
+ |2025-02-06 16:56:34.370316+00| timescale-cloud-model| 0.95|
+
+
+
+Now you can seamlessly integrate Amazon SageMaker with $CLOUD_LONG to store and analyze time-series data generated by
+machine learning models. You can also untegrate visualization tools like [Grafana][grafana-integration] or
+[Tableau][tableau-integration] with $CLOUD_LONG to create real-time dashboards of your model predictions.
+
+
+
+
+
+
+[Amazon Sagemaker]: https://docs.aws.amazon.com/sagemaker/latest/dg/whatis.html
+[aws-sign-up]: https://signin.aws.amazon.com/signup?request_type=register
+[install-aws-cli]: https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html
+[install-python]: https://www.python.org/downloads/
+[install-postgresql]: https://www.postgresql.org/download/
+[console]: https://console.cloud.timescale.com/
+[grafana-integration]: use-timescale/:currentVersion:/integrations/grafana/
+[tableau-integration]: use-timescale/:currentVersion:/integrations/tableau/
+[run-queries]: /getting-started/:currentVersion:/run-queries-from-console/
+[open-console]: https://console.cloud.timescale.com/dashboard/services
+[psql]: /use-timescale/:currentVersion:/integrations/psql/
+[about-hypertables]: /use-timescale/:currentVersion:/hypertables/about-hypertables/
+[aws-notebooks-git-repos]:https://console.aws.amazon.com/sagemaker/home#/notebooks-and-git-repos
+[secure-vpc-aws]: /use-timescale/:currentVersion:/vpc/
+[connection-info]: /use-timescale/:currentVersion:/integrations/find-connection-details/
diff --git a/use-timescale/integrations/apache-airflow.md b/use-timescale/integrations/apache-airflow.md
index 0f1f323bd7..3af2fd5ea6 100644
--- a/use-timescale/integrations/apache-airflow.md
+++ b/use-timescale/integrations/apache-airflow.md
@@ -21,8 +21,8 @@ This page shows you how to use a Python connector in a DAG to integrate Apache A
-* [Install Python3 and pip3][install-python-pip]
-* [Install Apache Airflow][install-apache-airflow]
+* Install [Python3 and pip3][install-python-pip]
+* Install [Apache Airflow][install-apache-airflow]
Ensure that your Airflow instance has network access to $CLOUD_LONG.
diff --git a/use-timescale/integrations/azure-data-studio.md b/use-timescale/integrations/azure-data-studio.md
index 0eb1989a2d..f7a91684de 100644
--- a/use-timescale/integrations/azure-data-studio.md
+++ b/use-timescale/integrations/azure-data-studio.md
@@ -51,5 +51,5 @@ You have successfully integrated Azure Data Studio with $CLOUD_LONG.
[connection-info]: /use-timescale/:currentVersion:/integrations/find-connection-details/
[azure-data-studio]: https://azure.microsoft.com/en-us/products/data-studio
[ssl-mode]: /use-timescale/:currentVersion:/security/strict-ssl/
-
+[connection-info]: /use-timescale/:currentVersion:/integrations/find-connection-details/
diff --git a/use-timescale/integrations/index.md b/use-timescale/integrations/index.md
index 4bc9b34776..27856f9904 100644
--- a/use-timescale/integrations/index.md
+++ b/use-timescale/integrations/index.md
@@ -47,10 +47,11 @@ Some of the most in-demand integrations for $CLOUD_LONG are listed below, with l
## Data engineering and extract, transform, load
-| Name | Description |
-|:--------------------------------:|-------------------------------------------------------------------------------------|
-| [Apache Airflow][apache-airflow] | Programmatically author, schedule, and monitor workflows. |
-|[AWS Lambda][aws-lambda]| Run code without provisioning or managing servers, scaling automatically as needed. |
+| Name | Description |
+|:--------------------------------:|-------------------------------------------------------------------------------|
+| [Amazon SageMaker][amazon-sagemaker]| Build, train, and deploy ML models into a production-ready hosted environment |
+| [Apache Airflow][apache-airflow] | Programmatically author, schedule, and monitor workflows. |
+| [AWS Lambda][aws-lambda]| Run code without provisioning or managing servers, scaling automatically as needed. |
[psql]: /use-timescale/:currentVersion:/integrations/psql/
@@ -67,3 +68,4 @@ Some of the most in-demand integrations for $CLOUD_LONG are listed below, with l
[aws-lambda]: /use-timescale/:currentVersion:/integrations/aws-lambda
[postgresql-integrations]: https://slashdot.org/software/p/PostgreSQL/integrations/
[prometheus]: /use-timescale/:currentVersion:/integrations/prometheus
+[amazon-sagemaker]: /use-timescale/:currentVersion:/integrations/amazon-sagemaker
diff --git a/use-timescale/page-index/page-index.js b/use-timescale/page-index/page-index.js
index 794d9fff96..5ad57deff9 100644
--- a/use-timescale/page-index/page-index.js
+++ b/use-timescale/page-index/page-index.js
@@ -778,6 +778,11 @@ module.exports = [
href: "find-connection-details",
excerpt: "Learn about connecting to your Timescale database",
},
+ {
+ title: "Amazon SageMaker",
+ href: "amazon-sagemaker",
+ excerpt: "Integrate Amazon SageMaker with Timescale Cloud",
+ },
{
title: "Apache Airflow",
href: "apache-airflow",