From d0464095c0c1fc0a70a896cb146c03242114087f Mon Sep 17 00:00:00 2001 From: abdellah hariti Date: Thu, 16 May 2024 14:12:04 +0100 Subject: [PATCH] feat: Update Algolia index on master merge (#10032) * feat: Update Algolia index upon master merge * highlight the importance of the `div#main` on docPage * use env vars on Algolia search client * add public Algolia env vars to GH action * delete changelog/feed before build --- .env.example | 11 ++- .github/workflows/algolia-index.yml | 37 ++++++++++ package.json | 9 +-- scripts/algolia.ts | 102 ++++++++++++++++++++++++++++ src/components/docPage/index.tsx | 1 + src/components/search/index.tsx | 4 +- yarn.lock | 2 +- 7 files changed, 152 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/algolia-index.yml create mode 100644 scripts/algolia.ts diff --git a/.env.example b/.env.example index 2763a431adc55..94533a182c849 100644 --- a/.env.example +++ b/.env.example @@ -1,14 +1,11 @@ # rename this file to .env and supply the values listed below # also make sure they are available to the build tool (e.g. Netlify) -# warning: variables prefixed with GATSBY_ will be made available to client-side code +# warning: variables prefixed with NEXT_PUBLIC_ will be made available to client-side code # be careful not to expose sensitive data (in this case your Algolia admin key) -GATSBY_ALGOLIA_APP_ID=OOK48W9UCL -GATSBY_ALGOLIA_SEARCH_KEY=ca98597f559459c216891b75989832f8 -GATSBY_ALGOLIA_INDEX_PREFIX=test-sentry- -# ALGOLIA_ADMIN_KEY= -# ALGOLIA_INDEX=0 -# SENTRY_AUTH_TOKEN= +NEXT_PUBLIC_ALGOLIA_APP_ID=OOK48W9UCL +NEXT_PUBLIC_ALGOLIA_SEARCH_KEY=ca98597f559459c216891b75989832f8 + DATABASE_URL=postgresql://postgres:postgres@localhost:5432/changelog NEXTAUTH_URL=http://localhost:3000 NEXTAUTH_SECRET=secret diff --git a/.github/workflows/algolia-index.yml b/.github/workflows/algolia-index.yml new file mode 100644 index 0000000000000..5cc36849da0cd --- /dev/null +++ b/.github/workflows/algolia-index.yml @@ -0,0 +1,37 @@ +name: Update Algolia index +on: + push: + branches: + - master +jobs: + index: + name: Update Algolia index + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: getsentry/action-setup-volta@c52be2ea13cfdc084edb806e81958c13e445941e # v1.2.0 + - uses: oven-sh/setup-bun@v1 + with: + bun-version: latest + + - uses: actions/cache@v4 + id: cache + with: + path: ${{ github.workspace }}/node_modules + key: node-${{ runner.os }}-${{ hashFiles('**/yarn.lock') }} + + - run: yarn install --frozen-lockfile + if: steps.cache.outputs.cache-hit != 'true' + # Remove the changelog directory to avoid a build error due to missing `DATABASE_URL` + # and save some build time + - run: rm -r app/changelog/feed.xml + - run: yarn build + # bun seems to be the most straightforward way to run a TypeScript script + # without introducing another dependency like ts-node or tsx for everyone else + - run: bun ./scripts/algolia.ts + env: + ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }} + ALGOLIA_API_KEY: ${{ secrets.ALGOLIA_API_KEY }} + ALGOLIA_INDEX_PREFIX: ${{ secrets.ALGOLIA_INDEX_PREFIX }} + NEXT_PUBLIC_ALGOLIA_APP_ID: ${{ secrets.NEXT_PUBLIC_ALGOLIA_APP_ID }} + NEXT_PUBLIC_ALGOLIA_SEARCH_KEY: ${{ secrets.NEXT_PUBLIC_ALGOLIA_SEARCH_KEY }} diff --git a/package.json b/package.json index 545551718fc79..e63b978a0682b 100644 --- a/package.json +++ b/package.json @@ -18,10 +18,10 @@ "lint": "next lint", "lint:ts": "tsc --skipLibCheck", "lint:docs": "bin/lint-docs.ts", - "lint:eslint": "eslint \"{src,app}/**/*.{ts,tsx,js,jsx}\"", - "lint:eslint:fix": "eslint --fix \"{src,app}/**/*.{ts,tsx,js,jsx}\"", - "lint:prettier": "prettier --check \"./{src,app}/**/*.{md,mdx,ts,tsx,js,jsx}\"", - "lint:prettier:fix": "prettier --write \"./{src,app}/**/*.{md,mdx,ts,tsx,js,jsx}\"", + "lint:eslint": "eslint \"{src,app,scripts}/**/*.{ts,tsx,js,jsx}\"", + "lint:eslint:fix": "eslint --fix \"{src,app,scripts}/**/*.{ts,tsx,js,jsx}\"", + "lint:prettier": "prettier --check \"./{src,app,scripts}/**/*.{md,mdx,ts,tsx,js,jsx,mjs}\"", + "lint:prettier:fix": "prettier --write \"./{src,app,scripts}/**/*.{md,mdx,ts,tsx,js,jsx,mjs}\"", "lint:fix": "yarn run lint:prettier:fix && yarn run lint:eslint:fix", "sidecar": "yarn spotlight-sidecar", "test": "jest" @@ -52,6 +52,7 @@ "@sentry-internal/global-search": "^1.0.0", "@sentry/nextjs": "^8.0.0-beta.6", "@types/mdx": "^2.0.9", + "algoliasearch": "^4.23.3", "esbuild": "^0.19.8", "framer-motion": "^10.12.16", "gray-matter": "^4.0.3", diff --git a/scripts/algolia.ts b/scripts/algolia.ts new file mode 100644 index 0000000000000..1323a6e01f3fd --- /dev/null +++ b/scripts/algolia.ts @@ -0,0 +1,102 @@ +/** + * This script is used to index the static docs HTML files generated by Next.js into Algolia. + * + * It's a migration from the Gatsby solution, + * which relied on the `gatsby-plugin-algolia`: https://github.com/getsentry/sentry-docs/blob/3c1361bdcb23a0fcee1f3019bca7c14a5d632162/src/gatsby/utils/algolia.ts + * + * The record generation logic is reused as is, with *two* notable changes: + * 1. We manually feed the HTML files to the record generation function + * 2. We manually upload the records to Algolia + * + * This script is meant to be run on a GitHub Action (see `.github/workflows/algolia-index.yml`). + * + * If you want to run it locally, + * 1. Make sure you have the required env vars set up + * 2. be careful to change to `ALGOLIA_INDEX_PREFIX` to value different from `sentry-` + * to avoid nuking the production index + * 3. Run a production build of the app before running this script + */ + +import fs from 'fs'; +import {join} from 'path'; + +import {extrapolate, htmlToAlgoliaRecord} from '@sentry-internal/global-search'; +import algoliasearch from 'algoliasearch'; + +import {getDocsFrontMatter} from '../src/mdx'; +import {FrontMatter} from '../src/types'; + +// This is the path to the static files generated by Next.js for the app directory +// The directory structure is not documented and could change in the future +// The ideal way to do this is probably to run production server and fetch the HTML from there. +const staticHtmlFilesPath = join(process.cwd(), '.next', 'server', 'app'); + +const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID; +const ALGOLIA_API_KEY = process.env.ALGOLIA_API_KEY; +const ALGOLIA_INDEX_PREFIX = process.env.ALGOLIA_INDEX_PREFIX; + +if (!ALGOLIA_APP_ID) { + throw new Error('`ALGOLIA_APP_ID` env var must be configured in repo secrets'); +} +if (!ALGOLIA_API_KEY) { + throw new Error('`ALGOLIA_API_KEY` env var must be configured in repo secrets'); +} +if (!ALGOLIA_INDEX_PREFIX) { + throw new Error('`ALGOLIA_INDEX_PREFIX` env var must be configured in repo secrets'); +} + +const indexName = `${ALGOLIA_INDEX_PREFIX}docs`; +const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY); +const index = client.initIndex(indexName); + +indexAndUpload(); +async function indexAndUpload() { + // the page front matters are the source of truth for the static doc routes + // as they are used directly by generateStaticParams() on [[..path]] page + const pageFrontMatters = await getDocsFrontMatter(); + const records = await generateAlogliaRecords(pageFrontMatters); + // eslint-disable-next-line no-console + console.log('🔥 Generated %d Algolia records.', records.length); + // eslint-disable-next-line no-console + console.log('🔥 Saving records ...'); + await index + .saveObjects(records, { + batchSize: 10000, + autoGenerateObjectIDIfNotExist: true, + }) + .then(result => { + // eslint-disable-next-line no-console + console.log('🔥 Saved %d Algolia records', result.objectIDs.length); + }); +} + +async function generateAlogliaRecords(pageFrontMatters: FrontMatter[]) { + const records = await Promise.all( + pageFrontMatters + .filter( + frontMatter => !frontMatter.draft && !frontMatter.noindex && frontMatter.title + ) + .map(pageFm => { + // eslint-disable-next-line no-console + console.log('processing:', pageFm.slug); + + const htmlFile = join(staticHtmlFilesPath, pageFm.slug + '.html'); + const html = fs.readFileSync(htmlFile).toString(); + + const pageRecords = htmlToAlgoliaRecord( + html, + { + title: pageFm.title, + url: '/' + pageFm.slug + '/', + pathSegments: extrapolate(pageFm.slug, '/').map(x => `/${x}/`), + keywords: pageFm.keywords, + }, + '#main' + ); + + return pageRecords; + }) + ); + + return records.flat(); +} diff --git a/src/components/docPage/index.tsx b/src/components/docPage/index.tsx index 48a9e3b21e3b4..a174f6c1af366 100644 --- a/src/components/docPage/index.tsx +++ b/src/components/docPage/index.tsx @@ -70,6 +70,7 @@ export function DocPage({

{frontMatter.title}

{frontMatter.description}

+ {/* This exact id is important for Algolia indexing */}
{children}
diff --git a/src/components/search/index.tsx b/src/components/search/index.tsx index eedb0943d4732..fdd31c2847443 100644 --- a/src/components/search/index.tsx +++ b/src/components/search/index.tsx @@ -33,8 +33,8 @@ function uuidv4() { // Initialize Algolia Insights algoliaInsights('init', { - appId: 'OOK48W9UCL', - apiKey: '2d64ec1106519cbc672d863b0d200782', + appId: process.env.NEXT_PUBLIC_ALGOLIA_APP_ID, + apiKey: process.env.NEXT_PUBLIC_ALGOLIA_SEARCH_KEY, }); // We dont want to track anyone cross page/sessions or use cookies diff --git a/yarn.lock b/yarn.lock index 3c27a0cb7c9b4..fd56059286f75 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3556,7 +3556,7 @@ ajv@^6.12.4: json-schema-traverse "^0.4.1" uri-js "^4.2.2" -algoliasearch@^4.13.1: +algoliasearch@^4.13.1, algoliasearch@^4.23.3: version "4.23.3" resolved "https://registry.yarnpkg.com/algoliasearch/-/algoliasearch-4.23.3.tgz#e09011d0a3b0651444916a3e6bbcba064ec44b60" integrity sha512-Le/3YgNvjW9zxIQMRhUHuhiUjAlKY/zsdZpfq4dlLqg6mEm0nL6yk+7f2hDOtLpxsgE4jSzDmvHL7nXdBp5feg==