From 4764ee6318dda0bb020c614216a6ffb5acb0e90b Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 11 Mar 2025 13:10:32 +0000 Subject: [PATCH 01/10] WIP --- packages/pglite-sync/package.json | 3 +- packages/pglite-sync/src/apply.ts | 160 ++++ packages/pglite-sync/src/index-OLD.ts | 691 +++++++++++++++ packages/pglite-sync/src/index.ts | 834 ++++++------------ packages/pglite-sync/src/subscriptionState.ts | 135 +++ packages/pglite-sync/src/types.ts | 58 ++ packages/pglite-sync/test/sync.test.ts | 459 ---------- pnpm-lock.yaml | 25 +- 8 files changed, 1336 insertions(+), 1029 deletions(-) create mode 100644 packages/pglite-sync/src/apply.ts create mode 100644 packages/pglite-sync/src/index-OLD.ts create mode 100644 packages/pglite-sync/src/subscriptionState.ts create mode 100644 packages/pglite-sync/src/types.ts diff --git a/packages/pglite-sync/package.json b/packages/pglite-sync/package.json index 89bff3cb..1961ba36 100644 --- a/packages/pglite-sync/package.json +++ b/packages/pglite-sync/package.json @@ -45,7 +45,8 @@ "dist" ], "dependencies": { - "@electric-sql/client": "1.0.0-beta.3" + "@electric-sql/client": "1.0.0-beta.5", + "@electric-sql/experimental": "0.1.2-beta.4" }, "devDependencies": { "@electric-sql/pglite": "workspace:*", diff --git a/packages/pglite-sync/src/apply.ts b/packages/pglite-sync/src/apply.ts new file mode 100644 index 00000000..fa7cb73f --- /dev/null +++ b/packages/pglite-sync/src/apply.ts @@ -0,0 +1,160 @@ +import { ChangeMessage } from '@electric-sql/client' +import type { PGliteInterface, Transaction } from '@electric-sql/pglite' +import type { MapColumns, InsertChangeMessage } from './types' + +export interface ApplyMessageToTableOptions { + pg: PGliteInterface | Transaction + table: string + schema?: string + message: ChangeMessage + mapColumns?: MapColumns + primaryKey: string[] + debug: boolean +} + +export async function applyMessageToTable({ + pg, + table, + schema = 'public', + message, + mapColumns, + primaryKey, + debug, +}: ApplyMessageToTableOptions) { + const data = mapColumns ? doMapColumns(mapColumns, message) : message.value + + switch (message.headers.operation) { + case 'insert': { + if (debug) console.log('inserting', data) + const columns = Object.keys(data) + return await pg.query( + ` + INSERT INTO "${schema}"."${table}" + (${columns.map((s) => '"' + s + '"').join(', ')}) + VALUES + (${columns.map((_v, i) => '$' + (i + 1)).join(', ')}) + `, + columns.map((column) => data[column]), + ) + } + + case 'update': { + if (debug) console.log('updating', data) + const columns = Object.keys(data).filter( + // we don't update the primary key, they are used to identify the row + (column) => !primaryKey.includes(column), + ) + if (columns.length === 0) return // nothing to update + return await pg.query( + ` + UPDATE "${schema}"."${table}" + SET ${columns + .map((column, i) => '"' + column + '" = $' + (i + 1)) + .join(', ')} + WHERE ${primaryKey + .map( + (column, i) => + '"' + column + '" = $' + (columns.length + i + 1), + ) + .join(' AND ')} + `, + [ + ...columns.map((column) => data[column]), + ...primaryKey.map((column) => data[column]), + ], + ) + } + + case 'delete': { + if (debug) console.log('deleting', data) + return await pg.query( + ` + DELETE FROM "${schema}"."${table}" + WHERE ${primaryKey + .map((column, i) => '"' + column + '" = $' + (i + 1)) + .join(' AND ')} + `, + [...primaryKey.map((column) => data[column])], + ) + } + } +} + +export interface ApplyMessagesToTableWithCopyOptions { + pg: PGliteInterface | Transaction + table: string + schema?: string + messages: InsertChangeMessage[] + mapColumns?: MapColumns + primaryKey: string[] + debug: boolean +} + +export async function applyMessagesToTableWithCopy({ + pg, + table, + schema = 'public', + messages, + mapColumns, + debug, +}: ApplyMessagesToTableWithCopyOptions) { + if (debug) console.log('applying messages with COPY') + + // Map the messages to the data to be inserted + const data: Record[] = messages.map((message) => + mapColumns ? doMapColumns(mapColumns, message) : message.value, + ) + + // Get column names from the first message + const columns = Object.keys(data[0]) + + // Create CSV data + const csvData = data + .map((message) => { + return columns + .map((column) => { + const value = message[column] + // Escape double quotes and wrap in quotes if necessary + if ( + typeof value === 'string' && + (value.includes(',') || value.includes('"') || value.includes('\n')) + ) { + return `"${value.replace(/"/g, '""')}"` + } + return value === null ? '\\N' : value + }) + .join(',') + }) + .join('\n') + const csvBlob = new Blob([csvData], { type: 'text/csv' }) + + // Perform COPY FROM + await pg.query( + ` + COPY "${schema}"."${table}" (${columns.map((c) => `"${c}"`).join(', ')}) + FROM '/dev/blob' + WITH (FORMAT csv, NULL '\\N') + `, + [], + { + blob: csvBlob, + }, + ) + + if (debug) console.log(`Inserted ${messages.length} rows using COPY`) +} + +function doMapColumns( + mapColumns: MapColumns, + message: ChangeMessage, +): Record { + if (typeof mapColumns === 'function') { + return mapColumns(message) + } else { + const mappedColumns: Record = {} + for (const [key, value] of Object.entries(mapColumns)) { + mappedColumns[key] = message.value[value] + } + return mappedColumns + } +} diff --git a/packages/pglite-sync/src/index-OLD.ts b/packages/pglite-sync/src/index-OLD.ts new file mode 100644 index 00000000..8a463f7d --- /dev/null +++ b/packages/pglite-sync/src/index-OLD.ts @@ -0,0 +1,691 @@ +import type { Offset, Row, ShapeStreamOptions } from '@electric-sql/client' +import { + ChangeMessage, + ShapeStream, + isChangeMessage, + isControlMessage, + ShapeStreamInterface, +} from '@electric-sql/client' +import type { + Extension, + PGliteInterface, + Transaction, +} from '@electric-sql/pglite' + +interface LegacyChangeMessage> extends ChangeMessage { + offset?: Offset +} + +export type MapColumnsMap = Record +export type MapColumnsFn = (message: ChangeMessage) => Record +export type MapColumns = MapColumnsMap | MapColumnsFn +export type ShapeKey = string + +type InsertChangeMessage = ChangeMessage & { + headers: { operation: 'insert' } +} + +/** + * The granularity of the commit operation. + * - `up-to-date`: Commit all messages when the `up-to-date` message is received. + * - `operation`: Commit each message in its own transaction. + * - `number`: Commit every N messages. + * Note a commit will always be performed on the `up-to-date` message. + */ +export type CommitGranularity = + | 'up-to-date' + // | 'transaction' // Removed until Electric has stabilised on LSN metadata + | 'operation' + | number + +export interface SyncShapeToTableOptions { + shape: ShapeStreamOptions + table: string + schema?: string + mapColumns?: MapColumns + primaryKey: string[] + shapeKey: ShapeKey | null + useCopy?: boolean + commitGranularity?: CommitGranularity + commitThrottle?: number + onInitialSync?: () => void +} + +export interface SyncShapeToTableResult { + unsubscribe: () => void + readonly isUpToDate: boolean + readonly shapeId: string + subscribe: (cb: () => void, error: (err: Error) => void) => () => void + stream: ShapeStreamInterface +} + +export interface SyncShapeToTableResult { + unsubscribe: () => void + readonly isUpToDate: boolean + readonly shapeId: string + subscribe: (cb: () => void, error: (err: Error) => void) => () => void + stream: ShapeStreamInterface +} + +export interface ElectricSyncOptions { + debug?: boolean + metadataSchema?: string +} + +async function createPlugin( + pg: PGliteInterface, + options?: ElectricSyncOptions, +) { + const debug = options?.debug ?? false + const metadataSchema = options?.metadataSchema ?? 'electric' + const streams: Array<{ + stream: ShapeStream + aborter: AbortController + }> = [] + + // TODO: keeping an in-memory lock per table such that two + // shapes are not synced into one table - this will be + // resolved by using reference counting in shadow tables + const shapePerTableLock = new Map() + + let initMetadataTablesDone = false + const initMetadataTables = async () => { + if (initMetadataTablesDone) return + initMetadataTablesDone = true + await migrateShapeMetadataTables({ + pg, + metadataSchema, + }) + } + + const namespaceObj = { + initMetadataTables, + syncShapeToTable: async ( + options: SyncShapeToTableOptions, + ): Promise => { + await initMetadataTables() + options = { + commitGranularity: 'up-to-date', + ...options, + } + if (shapePerTableLock.has(options.table)) { + throw new Error('Already syncing shape for table ' + options.table) + } + shapePerTableLock.set(options.table) + let shapeSubState: ShapeSubscriptionState | null = null + + // if shapeKey is not null, ensure persistence of shape subscription + // state is possible and check if it is already persisted + if (options.shapeKey) { + shapeSubState = await getShapeSubscriptionState({ + pg, + metadataSchema, + shapeKey: options.shapeKey, + }) + if (debug && shapeSubState) { + console.log('resuming from shape state', shapeSubState) + } + } + + // If it's a new subscription there is no state to resume from + const isNewSubscription = shapeSubState === null + + // If it's a new subscription we can do a `COPY FROM` to insert the initial data + // TODO: in future when we can have multiple shapes on the same table we will need + // to make sure we only do a `COPY FROM` on the first shape on the table as they + // may overlap and so the insert logic will be wrong. + let doCopy = isNewSubscription && options.useCopy + + // Track if onInitialSync has been called + let onInitialSyncCalled = false + + const aborter = new AbortController() + if (options.shape.signal) { + // we new to have our own aborter to be able to abort the stream + // but still accept the signal from the user + options.shape.signal.addEventListener('abort', () => aborter.abort(), { + once: true, + }) + } + const stream = new ShapeStream({ + ...options.shape, + ...(shapeSubState ?? {}), + signal: aborter.signal, + }) + + // TODO: this aggregates all messages in memory until an + // up-to-date message is received, which is not viable for + // _very_ large shapes - either we should commit batches to + // a temporary table and copy over the transactional result + // or use a separate connection to hold a long transaction + let messageAggregator: LegacyChangeMessage[] = [] + let truncateNeeded = false + // let lastLSN: string | null = null // Removed until Electric has stabilised on LSN metadata + let lastCommitAt: number = 0 + + const commit = async () => { + if (messageAggregator.length === 0 && !truncateNeeded) return + const shapeHandle = stream.shapeHandle // The shape handle could change while we are committing + await pg.transaction(async (tx) => { + if (debug) { + console.log('committing message batch', messageAggregator.length) + console.time('commit') + } + + // Set the syncing flag to true during this transaction so that + // user defined triggers on the table are able to chose how to run + // during a sync + tx.exec(`SET LOCAL ${metadataSchema}.syncing = true;`) + + if (truncateNeeded) { + truncateNeeded = false + // TODO: sync into shadow table and reference count + // for now just clear the whole table - will break + // cases with multiple shapes on the same table + await tx.exec(`DELETE FROM ${options.table};`) + if (options.shapeKey) { + await deleteShapeSubscriptionState({ + pg: tx, + metadataSchema, + shapeKey: options.shapeKey, + }) + } + } + + if (doCopy) { + // We can do a `COPY FROM` to insert the initial data + // Split messageAggregator into initial inserts and remaining messages + const initialInserts: InsertChangeMessage[] = [] + const remainingMessages: ChangeMessage[] = [] + let foundNonInsert = false + for (const message of messageAggregator) { + if (!foundNonInsert && message.headers.operation === 'insert') { + initialInserts.push(message as InsertChangeMessage) + } else { + foundNonInsert = true + remainingMessages.push(message) + } + } + if (initialInserts.length > 0) { + // As `COPY FROM` doesn't trigger a NOTIFY, we pop + // the last insert message and and add it to the be beginning + // of the remaining messages to be applied after the `COPY FROM` + remainingMessages.unshift(initialInserts.pop()!) + } + messageAggregator = remainingMessages + + // Do the `COPY FROM` with initial inserts + if (initialInserts.length > 0) { + applyMessagesToTableWithCopy({ + pg: tx, + table: options.table, + schema: options.schema, + messages: initialInserts as InsertChangeMessage[], + mapColumns: options.mapColumns, + primaryKey: options.primaryKey, + debug, + }) + // We don't want to do a `COPY FROM` again after that + doCopy = false + } + } + + for (const changeMessage of messageAggregator) { + await applyMessageToTable({ + pg: tx, + table: options.table, + schema: options.schema, + message: changeMessage, + mapColumns: options.mapColumns, + primaryKey: options.primaryKey, + debug, + }) + } + + if ( + options.shapeKey && + messageAggregator.length > 0 && + shapeHandle !== undefined + ) { + await updateShapeSubscriptionState({ + pg: tx, + metadataSchema, + shapeKey: options.shapeKey, + shapeId: shapeHandle, + lastOffset: getMessageOffset( + stream, + messageAggregator[messageAggregator.length - 1], + ), + }) + } + }) + if (debug) console.timeEnd('commit') + messageAggregator = [] + // Await a timeout to start a new task and allow other connections to do work + await new Promise((resolve) => setTimeout(resolve, 0)) + } + + const throttledCommit = async ({ + reset = false, + }: { reset?: boolean } = {}) => { + const now = Date.now() + if (reset) { + // Reset the last commit time to 0, forcing the next commit to happen immediately + lastCommitAt = 0 + } + if (options.commitThrottle && debug) + console.log( + 'throttled commit: now:', + now, + 'lastCommitAt:', + lastCommitAt, + 'diff:', + now - lastCommitAt, + ) + if ( + options.commitThrottle && + now - lastCommitAt < options.commitThrottle + ) { + // Skip this commit - messages will be caught by next commit or up-to-date + if (debug) console.log('skipping commit due to throttle') + return + } + lastCommitAt = now + await commit() + } + + stream.subscribe(async (messages) => { + if (debug) console.log('sync messages received', messages) + + for (const message of messages) { + if (isChangeMessage(message)) { + // Removed until Electric has stabilised on LSN metadata + // const newLSN = message.offset.split('_')[0] + // if (newLSN !== lastLSN) { + // // If the LSN has changed and granularity is set to transaction + // // we need to commit the current batch. + // // This is done before we accumulate any more messages as they are + // // part of the next transaction batch. + // if (options.commitGranularity === 'transaction') { + // await throttledCommit() + // } + // lastLSN = newLSN + // } + + // accumulate change messages for committing all at once or in batches + messageAggregator.push(message) + + if (options.commitGranularity === 'operation') { + // commit after each operation if granularity is set to operation + await throttledCommit() + } else if (typeof options.commitGranularity === 'number') { + // commit after every N messages if granularity is set to a number + if (messageAggregator.length >= options.commitGranularity) { + await throttledCommit() + } + } + } else if (isControlMessage(message)) { + switch (message.headers.control) { + case 'must-refetch': + // mark table as needing truncation before next batch commit + if (debug) console.log('refetching shape') + truncateNeeded = true + messageAggregator = [] + break + + case 'up-to-date': + // perform all accumulated changes and store stream state + await throttledCommit({ reset: true }) // not throttled, we want this to happen ASAP + if ( + isNewSubscription && + !onInitialSyncCalled && + options.onInitialSync + ) { + options.onInitialSync() + onInitialSyncCalled = true + } + break + } + } + } + }) + + streams.push({ + stream, + aborter, + }) + const unsubscribe = () => { + stream.unsubscribeAll() + aborter.abort() + shapePerTableLock.delete(options.table) + } + return { + unsubscribe, + get isUpToDate() { + return stream.isUpToDate + }, + get shapeId() { + return stream.shapeHandle! + }, + stream, + subscribe: (cb: () => void, error: (err: Error) => void) => { + return stream.subscribe(() => { + if (stream.isUpToDate) { + cb() + } + }, error) + }, + } + }, + } + + const close = async () => { + for (const { stream, aborter } of streams) { + stream.unsubscribeAll() + aborter.abort() + } + } + + return { + namespaceObj, + close, + } +} + +export type SyncNamespaceObj = Awaited< + ReturnType +>['namespaceObj'] + +export type PGliteWithSync = PGliteInterface & { + sync: SyncNamespaceObj +} + +export function electricSync(options?: ElectricSyncOptions) { + return { + name: 'ElectricSQL Sync', + setup: async (pg: PGliteInterface) => { + const { namespaceObj, close } = await createPlugin(pg, options) + return { + namespaceObj, + close, + } + }, + } satisfies Extension +} + +function doMapColumns( + mapColumns: MapColumns, + message: ChangeMessage, +): Record { + if (typeof mapColumns === 'function') { + return mapColumns(message) + } else { + const mappedColumns: Record = {} + for (const [key, value] of Object.entries(mapColumns)) { + mappedColumns[key] = message.value[value] + } + return mappedColumns + } +} + +interface ApplyMessageToTableOptions { + pg: PGliteInterface | Transaction + table: string + schema?: string + message: ChangeMessage + mapColumns?: MapColumns + primaryKey: string[] + debug: boolean +} + +async function applyMessageToTable({ + pg, + table, + schema = 'public', + message, + mapColumns, + primaryKey, + debug, +}: ApplyMessageToTableOptions) { + const data = mapColumns ? doMapColumns(mapColumns, message) : message.value + + switch (message.headers.operation) { + case 'insert': { + if (debug) console.log('inserting', data) + const columns = Object.keys(data) + return await pg.query( + ` + INSERT INTO "${schema}"."${table}" + (${columns.map((s) => '"' + s + '"').join(', ')}) + VALUES + (${columns.map((_v, i) => '$' + (i + 1)).join(', ')}) + `, + columns.map((column) => data[column]), + ) + } + + case 'update': { + if (debug) console.log('updating', data) + const columns = Object.keys(data).filter( + // we don't update the primary key, they are used to identify the row + (column) => !primaryKey.includes(column), + ) + if (columns.length === 0) return // nothing to update + return await pg.query( + ` + UPDATE "${schema}"."${table}" + SET ${columns + .map((column, i) => '"' + column + '" = $' + (i + 1)) + .join(', ')} + WHERE ${primaryKey + .map( + (column, i) => + '"' + column + '" = $' + (columns.length + i + 1), + ) + .join(' AND ')} + `, + [ + ...columns.map((column) => data[column]), + ...primaryKey.map((column) => data[column]), + ], + ) + } + + case 'delete': { + if (debug) console.log('deleting', data) + return await pg.query( + ` + DELETE FROM "${schema}"."${table}" + WHERE ${primaryKey + .map((column, i) => '"' + column + '" = $' + (i + 1)) + .join(' AND ')} + `, + [...primaryKey.map((column) => data[column])], + ) + } + } +} + +interface ApplyMessagesToTableWithCopyOptions { + pg: PGliteInterface | Transaction + table: string + schema?: string + messages: InsertChangeMessage[] + mapColumns?: MapColumns + primaryKey: string[] + debug: boolean +} + +async function applyMessagesToTableWithCopy({ + pg, + table, + schema = 'public', + messages, + mapColumns, + debug, +}: ApplyMessagesToTableWithCopyOptions) { + if (debug) console.log('applying messages with COPY') + + // Map the messages to the data to be inserted + const data: Record[] = messages.map((message) => + mapColumns ? doMapColumns(mapColumns, message) : message.value, + ) + + // Get column names from the first message + const columns = Object.keys(data[0]) + + // Create CSV data + const csvData = data + .map((message) => { + return columns + .map((column) => { + const value = message[column] + // Escape double quotes and wrap in quotes if necessary + if ( + typeof value === 'string' && + (value.includes(',') || value.includes('"') || value.includes('\n')) + ) { + return `"${value.replace(/"/g, '""')}"` + } + return value === null ? '\\N' : value + }) + .join(',') + }) + .join('\n') + const csvBlob = new Blob([csvData], { type: 'text/csv' }) + + // Perform COPY FROM + await pg.query( + ` + COPY "${schema}"."${table}" (${columns.map((c) => `"${c}"`).join(', ')}) + FROM '/dev/blob' + WITH (FORMAT csv, NULL '\\N') + `, + [], + { + blob: csvBlob, + }, + ) + + if (debug) console.log(`Inserted ${messages.length} rows using COPY`) +} + +interface GetShapeSubscriptionStateOptions { + readonly pg: PGliteInterface | Transaction + readonly metadataSchema: string + readonly shapeKey: ShapeKey +} + +type ShapeSubscriptionState = Pick + +async function getShapeSubscriptionState({ + pg, + metadataSchema, + shapeKey, +}: GetShapeSubscriptionStateOptions): Promise { + const result = await pg.query<{ shape_id: string; last_offset: string }>( + ` + SELECT shape_id, last_offset + FROM ${subscriptionMetadataTableName(metadataSchema)} + WHERE shape_key = $1 + `, + [shapeKey], + ) + + if (result.rows.length === 0) return null + + const { shape_id: handle, last_offset: offset } = result.rows[0] + return { + handle, + offset: offset as Offset, + } +} + +interface UpdateShapeSubscriptionStateOptions { + pg: PGliteInterface | Transaction + metadataSchema: string + shapeKey: ShapeKey + shapeId: string + lastOffset: Offset +} + +async function updateShapeSubscriptionState({ + pg, + metadataSchema, + shapeKey, + shapeId, + lastOffset, +}: UpdateShapeSubscriptionStateOptions) { + await pg.query( + ` + INSERT INTO ${subscriptionMetadataTableName(metadataSchema)} (shape_key, shape_id, last_offset) + VALUES ($1, $2, $3) + ON CONFLICT(shape_key) + DO UPDATE SET + shape_id = EXCLUDED.shape_id, + last_offset = EXCLUDED.last_offset; + `, + [shapeKey, shapeId, lastOffset], + ) +} + +interface DeleteShapeSubscriptionStateOptions { + pg: PGliteInterface | Transaction + metadataSchema: string + shapeKey: ShapeKey +} + +async function deleteShapeSubscriptionState({ + pg, + metadataSchema, + shapeKey, +}: DeleteShapeSubscriptionStateOptions) { + await pg.query( + `DELETE FROM ${subscriptionMetadataTableName(metadataSchema)} WHERE shape_key = $1`, + [shapeKey], + ) +} + +interface MigrateShapeMetadataTablesOptions { + pg: PGliteInterface | Transaction + metadataSchema: string +} + +async function migrateShapeMetadataTables({ + pg, + metadataSchema, +}: MigrateShapeMetadataTablesOptions) { + await pg.exec( + ` + SET ${metadataSchema}.syncing = false; + CREATE SCHEMA IF NOT EXISTS "${metadataSchema}"; + CREATE TABLE IF NOT EXISTS ${subscriptionMetadataTableName(metadataSchema)} ( + shape_key TEXT PRIMARY KEY, + shape_id TEXT NOT NULL, + last_offset TEXT NOT NULL + ); + `, + ) +} + +function subscriptionMetadataTableName(metadatSchema: string) { + return `"${metadatSchema}"."${subscriptionTableName}"` +} + +const subscriptionTableName = `shape_subscriptions_metadata` + +function getMessageOffset( + stream: ShapeStream, + message: LegacyChangeMessage, +): Offset { + if (message.offset) { + return message.offset + } else if ( + message.headers.lsn !== undefined && + message.headers.op_position !== undefined + ) { + return `${message.headers.lsn}_${message.headers.op_position}` as Offset + } else { + return stream.lastOffset + } +} diff --git a/packages/pglite-sync/src/index.ts b/packages/pglite-sync/src/index.ts index 8a463f7d..deda05c1 100644 --- a/packages/pglite-sync/src/index.ts +++ b/packages/pglite-sync/src/index.ts @@ -1,76 +1,29 @@ -import type { Offset, Row, ShapeStreamOptions } from '@electric-sql/client' +import type { Row } from '@electric-sql/client' import { ChangeMessage, - ShapeStream, isChangeMessage, isControlMessage, - ShapeStreamInterface, } from '@electric-sql/client' +import { MultiShapeStream } from '@electric-sql/experimental' +import type { Extension, PGliteInterface } from '@electric-sql/pglite' +import { + migrateSubscriptionMetadataTables, + getSubscriptionState, + updateSubscriptionState, + deleteSubscriptionState, + SubscriptionState, +} from './subscriptionState' import type { - Extension, - PGliteInterface, - Transaction, -} from '@electric-sql/pglite' - -interface LegacyChangeMessage> extends ChangeMessage { - offset?: Offset -} - -export type MapColumnsMap = Record -export type MapColumnsFn = (message: ChangeMessage) => Record -export type MapColumns = MapColumnsMap | MapColumnsFn -export type ShapeKey = string + ElectricSyncOptions, + SyncShapesToTablesOptions, + SyncShapesToTablesResult, + SyncShapeToTableOptions, + SyncShapeToTableResult, + InsertChangeMessage, +} from './types' +import { applyMessageToTable, applyMessagesToTableWithCopy } from './apply' -type InsertChangeMessage = ChangeMessage & { - headers: { operation: 'insert' } -} - -/** - * The granularity of the commit operation. - * - `up-to-date`: Commit all messages when the `up-to-date` message is received. - * - `operation`: Commit each message in its own transaction. - * - `number`: Commit every N messages. - * Note a commit will always be performed on the `up-to-date` message. - */ -export type CommitGranularity = - | 'up-to-date' - // | 'transaction' // Removed until Electric has stabilised on LSN metadata - | 'operation' - | number - -export interface SyncShapeToTableOptions { - shape: ShapeStreamOptions - table: string - schema?: string - mapColumns?: MapColumns - primaryKey: string[] - shapeKey: ShapeKey | null - useCopy?: boolean - commitGranularity?: CommitGranularity - commitThrottle?: number - onInitialSync?: () => void -} - -export interface SyncShapeToTableResult { - unsubscribe: () => void - readonly isUpToDate: boolean - readonly shapeId: string - subscribe: (cb: () => void, error: (err: Error) => void) => () => void - stream: ShapeStreamInterface -} - -export interface SyncShapeToTableResult { - unsubscribe: () => void - readonly isUpToDate: boolean - readonly shapeId: string - subscribe: (cb: () => void, error: (err: Error) => void) => () => void - stream: ShapeStreamInterface -} - -export interface ElectricSyncOptions { - debug?: boolean - metadataSchema?: string -} +export * from './types' async function createPlugin( pg: PGliteInterface, @@ -79,11 +32,11 @@ async function createPlugin( const debug = options?.debug ?? false const metadataSchema = options?.metadataSchema ?? 'electric' const streams: Array<{ - stream: ShapeStream + stream: MultiShapeStream>> aborter: AbortController }> = [] - // TODO: keeping an in-memory lock per table such that two + // We keep an in-memory lock per table such that two // shapes are not synced into one table - this will be // resolved by using reference counting in shadow tables const shapePerTableLock = new Map() @@ -92,113 +45,143 @@ async function createPlugin( const initMetadataTables = async () => { if (initMetadataTablesDone) return initMetadataTablesDone = true - await migrateShapeMetadataTables({ + await migrateSubscriptionMetadataTables({ pg, metadataSchema, }) } - const namespaceObj = { - initMetadataTables, - syncShapeToTable: async ( - options: SyncShapeToTableOptions, - ): Promise => { - await initMetadataTables() - options = { - commitGranularity: 'up-to-date', - ...options, + const syncShapesToTables = async ({ + key, + shapes, + useCopy, + onInitialSync, + }: SyncShapesToTablesOptions): Promise => { + await initMetadataTables() + + Object.values(shapes).forEach((shape) => { + if (shapePerTableLock.has(shape.table)) { + throw new Error('Already syncing shape for table ' + shape.table) } - if (shapePerTableLock.has(options.table)) { - throw new Error('Already syncing shape for table ' + options.table) + shapePerTableLock.set(shape.table) + }) + + let subState: SubscriptionState | null = null + + // if key is not null, ensure persistence of subscription state + // is possible and check if it is already persisted + if (key) { + subState = await getSubscriptionState({ + pg, + metadataSchema, + subscriptionKey: key, + }) + if (debug && subState) { + console.log('resuming from subscription state', subState) } - shapePerTableLock.set(options.table) - let shapeSubState: ShapeSubscriptionState | null = null - - // if shapeKey is not null, ensure persistence of shape subscription - // state is possible and check if it is already persisted - if (options.shapeKey) { - shapeSubState = await getShapeSubscriptionState({ - pg, - metadataSchema, - shapeKey: options.shapeKey, - }) - if (debug && shapeSubState) { - console.log('resuming from shape state', shapeSubState) + } + + // If it's a new subscription there is no state to resume from + const isNewSubscription = subState === null + + // If it's a new subscription we can do a `COPY FROM` to insert the initial data + // TODO: in future when we can have multiple shapes on the same table we will need + // to make sure we only do a `COPY FROM` on the first shape on the table as they + // may overlap and so the insert logic will be wrong. + let doCopy = isNewSubscription && useCopy + + // Track if onInitialSync has been called + let onInitialSyncCalled = false + + // Map of shape name to lsn to changes + // We accumulate changes for each lsn and then apply them all at once + const changes = new Map>[]>>( + Object.keys(shapes).map((key) => [key, new Map()]), + ) + + // We track the highest completely buffered lsn for each shape + const completeLsns = new Map( + Object.keys(shapes).map((key) => [key, -Infinity]), + ) + + // We track which shapes need a truncate + // These are truncated at the start of the next commit + const truncateNeeded = new Set() + + // We also have to track the last lsn that we have committed + // This is across all shapes + const lastCommittedLsn: number = -Infinity + + // We need our own aborter to be able to abort the streams but still accept the + // signals from the user for each shape, and so we monitor the user provided signal + // for each shape and abort our own aborter when the user signal is aborted. + const aborter = new AbortController() + Object.values(shapes) + .filter((shapeOptions) => !!shapeOptions.shape.signal) + .forEach((shapeOptions) => { + shapeOptions.shape.signal!.addEventListener( + 'abort', + () => aborter.abort(), + { + once: true, + }, + ) + }) + + const multiShapeStream = new MultiShapeStream>>( + { + shapes: Object.fromEntries( + Object.entries(shapes).map(([key, shapeOptions]) => [ + key, + shapeOptions.shape, + ]), + ), + }, + ) + + const commitUpToLsn = async (targetLsn: number) => { + // We need to collect all the messages for each shape that we need to commit + const messagesToCommit = new Map>[]>( + Object.keys(shapes).map((shapeName) => [shapeName, []]), + ) + for (const [shapeName, shapeChanges] of changes.entries()) { + for (const lsn of shapeChanges.keys()) { + if (lsn <= targetLsn) { + messagesToCommit.get(shapeName)!.push(...shapeChanges.get(lsn)!) + shapeChanges.delete(lsn) + } } } - // If it's a new subscription there is no state to resume from - const isNewSubscription = shapeSubState === null - - // If it's a new subscription we can do a `COPY FROM` to insert the initial data - // TODO: in future when we can have multiple shapes on the same table we will need - // to make sure we only do a `COPY FROM` on the first shape on the table as they - // may overlap and so the insert logic will be wrong. - let doCopy = isNewSubscription && options.useCopy - - // Track if onInitialSync has been called - let onInitialSyncCalled = false - - const aborter = new AbortController() - if (options.shape.signal) { - // we new to have our own aborter to be able to abort the stream - // but still accept the signal from the user - options.shape.signal.addEventListener('abort', () => aborter.abort(), { - once: true, - }) + const hasMessagesToCommit = + Math.min( + ...Array.from(messagesToCommit.values()).map( + (messages) => messages.length, + ), + ) > 0 + + if (!hasMessagesToCommit) { + return } - const stream = new ShapeStream({ - ...options.shape, - ...(shapeSubState ?? {}), - signal: aborter.signal, - }) - // TODO: this aggregates all messages in memory until an - // up-to-date message is received, which is not viable for - // _very_ large shapes - either we should commit batches to - // a temporary table and copy over the transactional result - // or use a separate connection to hold a long transaction - let messageAggregator: LegacyChangeMessage[] = [] - let truncateNeeded = false - // let lastLSN: string | null = null // Removed until Electric has stabilised on LSN metadata - let lastCommitAt: number = 0 - - const commit = async () => { - if (messageAggregator.length === 0 && !truncateNeeded) return - const shapeHandle = stream.shapeHandle // The shape handle could change while we are committing - await pg.transaction(async (tx) => { - if (debug) { - console.log('committing message batch', messageAggregator.length) - console.time('commit') - } + await pg.transaction(async (tx) => { + for (let [shapeName, messages] of messagesToCommit.entries()) { + const shape = shapes[shapeName] - // Set the syncing flag to true during this transaction so that - // user defined triggers on the table are able to chose how to run - // during a sync - tx.exec(`SET LOCAL ${metadataSchema}.syncing = true;`) - - if (truncateNeeded) { - truncateNeeded = false - // TODO: sync into shadow table and reference count - // for now just clear the whole table - will break - // cases with multiple shapes on the same table - await tx.exec(`DELETE FROM ${options.table};`) - if (options.shapeKey) { - await deleteShapeSubscriptionState({ - pg: tx, - metadataSchema, - shapeKey: options.shapeKey, - }) - } + // If we need to truncate the table, do so + if (truncateNeeded.has(shapeName)) { + await tx.exec(`DELETE FROM ${shape.table};`) + truncateNeeded.delete(shapeName) } + // Apply the changes to the table if (doCopy) { // We can do a `COPY FROM` to insert the initial data // Split messageAggregator into initial inserts and remaining messages const initialInserts: InsertChangeMessage[] = [] const remainingMessages: ChangeMessage[] = [] let foundNonInsert = false - for (const message of messageAggregator) { + for (const message of messages) { if (!foundNonInsert && message.headers.operation === 'insert') { initialInserts.push(message as InsertChangeMessage) } else { @@ -212,17 +195,17 @@ async function createPlugin( // of the remaining messages to be applied after the `COPY FROM` remainingMessages.unshift(initialInserts.pop()!) } - messageAggregator = remainingMessages + messages = remainingMessages // Do the `COPY FROM` with initial inserts if (initialInserts.length > 0) { applyMessagesToTableWithCopy({ pg: tx, - table: options.table, - schema: options.schema, + table: shape.table, + schema: shape.schema, messages: initialInserts as InsertChangeMessage[], - mapColumns: options.mapColumns, - primaryKey: options.primaryKey, + mapColumns: shape.mapColumns, + primaryKey: shape.primaryKey, debug, }) // We don't want to do a `COPY FROM` again after that @@ -230,153 +213,153 @@ async function createPlugin( } } - for (const changeMessage of messageAggregator) { + for (const changeMessage of messages) { await applyMessageToTable({ pg: tx, - table: options.table, - schema: options.schema, + table: shape.table, + schema: shape.schema, message: changeMessage, - mapColumns: options.mapColumns, - primaryKey: options.primaryKey, + mapColumns: shape.mapColumns, + primaryKey: shape.primaryKey, debug, }) } - - if ( - options.shapeKey && - messageAggregator.length > 0 && - shapeHandle !== undefined - ) { - await updateShapeSubscriptionState({ - pg: tx, - metadataSchema, - shapeKey: options.shapeKey, - shapeId: shapeHandle, - lastOffset: getMessageOffset( - stream, - messageAggregator[messageAggregator.length - 1], - ), - }) - } - }) - if (debug) console.timeEnd('commit') - messageAggregator = [] - // Await a timeout to start a new task and allow other connections to do work - await new Promise((resolve) => setTimeout(resolve, 0)) - } - - const throttledCommit = async ({ - reset = false, - }: { reset?: boolean } = {}) => { - const now = Date.now() - if (reset) { - // Reset the last commit time to 0, forcing the next commit to happen immediately - lastCommitAt = 0 } - if (options.commitThrottle && debug) - console.log( - 'throttled commit: now:', - now, - 'lastCommitAt:', - lastCommitAt, - 'diff:', - now - lastCommitAt, - ) - if ( - options.commitThrottle && - now - lastCommitAt < options.commitThrottle - ) { - // Skip this commit - messages will be caught by next commit or up-to-date - if (debug) console.log('skipping commit due to throttle') - return + + if (key) { + await updateSubscriptionState({ + pg: tx, + metadataSchema, + subscriptionKey: key, + shapeMetadata: Object.fromEntries( + Object.keys(shapes).map((shapeName) => [ + shapeName, + { + handle: multiShapeStream.shapes[shapeName].shapeHandle!, + offset: multiShapeStream.shapes[shapeName].lastOffset, + }, + ]), + ), + lastLsn: targetLsn, + }) } - lastCommitAt = now - await commit() + }) + if (debug) console.timeEnd('commit') + if ( + onInitialSync && + !onInitialSyncCalled && + multiShapeStream.isUpToDate + ) { + onInitialSync() + onInitialSyncCalled = true } + } - stream.subscribe(async (messages) => { - if (debug) console.log('sync messages received', messages) - - for (const message of messages) { - if (isChangeMessage(message)) { - // Removed until Electric has stabilised on LSN metadata - // const newLSN = message.offset.split('_')[0] - // if (newLSN !== lastLSN) { - // // If the LSN has changed and granularity is set to transaction - // // we need to commit the current batch. - // // This is done before we accumulate any more messages as they are - // // part of the next transaction batch. - // if (options.commitGranularity === 'transaction') { - // await throttledCommit() - // } - // lastLSN = newLSN - // } - - // accumulate change messages for committing all at once or in batches - messageAggregator.push(message) - - if (options.commitGranularity === 'operation') { - // commit after each operation if granularity is set to operation - await throttledCommit() - } else if (typeof options.commitGranularity === 'number') { - // commit after every N messages if granularity is set to a number - if (messageAggregator.length >= options.commitGranularity) { - await throttledCommit() + multiShapeStream.subscribe((messages) => { + messages.forEach((message) => { + if (isChangeMessage(message)) { + const shapeChanges = changes.get(message.shape)! + const lsn = (message.headers.lsn as number | undefined) ?? 0 + const isLastOfLsn = + (message.headers.last as boolean | undefined) ?? false + if (!shapeChanges.has(lsn)) { + shapeChanges.set(lsn, []) + } + shapeChanges.get(lsn)!.push(message) + if (isLastOfLsn) { + completeLsns.set(message.shape, lsn) + } + } else if (isControlMessage(message)) { + switch (message.headers.control) { + case 'up-to-date': { + // Update the complete lsn for this shape + if (typeof message.headers.global_last_seen_lsn !== `number`) { + throw new Error(`global_last_seen_lsn is not a number`) } + const globalLastSeenLsn = message.headers.global_last_seen_lsn + completeLsns.set(message.shape, globalLastSeenLsn) + break } - } else if (isControlMessage(message)) { - switch (message.headers.control) { - case 'must-refetch': - // mark table as needing truncation before next batch commit - if (debug) console.log('refetching shape') - truncateNeeded = true - messageAggregator = [] - break - - case 'up-to-date': - // perform all accumulated changes and store stream state - await throttledCommit({ reset: true }) // not throttled, we want this to happen ASAP - if ( - isNewSubscription && - !onInitialSyncCalled && - options.onInitialSync - ) { - options.onInitialSync() - onInitialSyncCalled = true - } - break + case 'must-refetch': { + // Reset the changes for this shape + const shapeChanges = changes.get(message.shape)! + shapeChanges.clear() + completeLsns.set(message.shape, -Infinity) + // Track that we need to truncate the table for this shape + truncateNeeded.add(message.shape) + break } } } }) + const lowestCommittedLsn = Math.min(...Array.from(completeLsns.values())) + if (lowestCommittedLsn > lastCommittedLsn) { + // We have new changes to commit + commitUpToLsn(lowestCommittedLsn) + } + }) - streams.push({ - stream, - aborter, - }) - const unsubscribe = () => { - stream.unsubscribeAll() - aborter.abort() - shapePerTableLock.delete(options.table) + streams.push({ + stream: multiShapeStream, + aborter, + }) + const unsubscribe = () => { + multiShapeStream.unsubscribeAll() + aborter.abort() + for (const shape of Object.values(shapes)) { + shapePerTableLock.delete(shape.table) } - return { - unsubscribe, - get isUpToDate() { - return stream.isUpToDate - }, - get shapeId() { - return stream.shapeHandle! - }, - stream, - subscribe: (cb: () => void, error: (err: Error) => void) => { - return stream.subscribe(() => { - if (stream.isUpToDate) { - cb() - } - }, error) + } + return { + unsubscribe, + get isUpToDate() { + return multiShapeStream.isUpToDate + }, + streams: Object.fromEntries( + Object.keys(shapes).map((shapeName) => [ + shapeName, + multiShapeStream.shapes[shapeName], + ]), + ), + } + } + + const syncShapeToTable = async ( + options: SyncShapeToTableOptions, + ): Promise => { + const multiShapeSub = await syncShapesToTables({ + shapes: { + shape: { + shape: options.shape, + table: options.table, + schema: options.schema, + mapColumns: options.mapColumns, + primaryKey: options.primaryKey, }, - } - }, + }, + key: options.shapeKey, + useCopy: options.useCopy, + onInitialSync: options.onInitialSync, + }) + return { + unsubscribe: multiShapeSub.unsubscribe, + isUpToDate: multiShapeSub.isUpToDate, + stream: multiShapeSub.streams.shape, + } + } + const deleteSubscription = async (key: string) => { + await deleteSubscriptionState({ + pg, + metadataSchema, + subscriptionKey: key, + }) + } + + const namespaceObj = { + initMetadataTables, + syncShapesToTables, + syncShapeToTable, + deleteSubscription, } const close = async () => { @@ -412,280 +395,3 @@ export function electricSync(options?: ElectricSyncOptions) { }, } satisfies Extension } - -function doMapColumns( - mapColumns: MapColumns, - message: ChangeMessage, -): Record { - if (typeof mapColumns === 'function') { - return mapColumns(message) - } else { - const mappedColumns: Record = {} - for (const [key, value] of Object.entries(mapColumns)) { - mappedColumns[key] = message.value[value] - } - return mappedColumns - } -} - -interface ApplyMessageToTableOptions { - pg: PGliteInterface | Transaction - table: string - schema?: string - message: ChangeMessage - mapColumns?: MapColumns - primaryKey: string[] - debug: boolean -} - -async function applyMessageToTable({ - pg, - table, - schema = 'public', - message, - mapColumns, - primaryKey, - debug, -}: ApplyMessageToTableOptions) { - const data = mapColumns ? doMapColumns(mapColumns, message) : message.value - - switch (message.headers.operation) { - case 'insert': { - if (debug) console.log('inserting', data) - const columns = Object.keys(data) - return await pg.query( - ` - INSERT INTO "${schema}"."${table}" - (${columns.map((s) => '"' + s + '"').join(', ')}) - VALUES - (${columns.map((_v, i) => '$' + (i + 1)).join(', ')}) - `, - columns.map((column) => data[column]), - ) - } - - case 'update': { - if (debug) console.log('updating', data) - const columns = Object.keys(data).filter( - // we don't update the primary key, they are used to identify the row - (column) => !primaryKey.includes(column), - ) - if (columns.length === 0) return // nothing to update - return await pg.query( - ` - UPDATE "${schema}"."${table}" - SET ${columns - .map((column, i) => '"' + column + '" = $' + (i + 1)) - .join(', ')} - WHERE ${primaryKey - .map( - (column, i) => - '"' + column + '" = $' + (columns.length + i + 1), - ) - .join(' AND ')} - `, - [ - ...columns.map((column) => data[column]), - ...primaryKey.map((column) => data[column]), - ], - ) - } - - case 'delete': { - if (debug) console.log('deleting', data) - return await pg.query( - ` - DELETE FROM "${schema}"."${table}" - WHERE ${primaryKey - .map((column, i) => '"' + column + '" = $' + (i + 1)) - .join(' AND ')} - `, - [...primaryKey.map((column) => data[column])], - ) - } - } -} - -interface ApplyMessagesToTableWithCopyOptions { - pg: PGliteInterface | Transaction - table: string - schema?: string - messages: InsertChangeMessage[] - mapColumns?: MapColumns - primaryKey: string[] - debug: boolean -} - -async function applyMessagesToTableWithCopy({ - pg, - table, - schema = 'public', - messages, - mapColumns, - debug, -}: ApplyMessagesToTableWithCopyOptions) { - if (debug) console.log('applying messages with COPY') - - // Map the messages to the data to be inserted - const data: Record[] = messages.map((message) => - mapColumns ? doMapColumns(mapColumns, message) : message.value, - ) - - // Get column names from the first message - const columns = Object.keys(data[0]) - - // Create CSV data - const csvData = data - .map((message) => { - return columns - .map((column) => { - const value = message[column] - // Escape double quotes and wrap in quotes if necessary - if ( - typeof value === 'string' && - (value.includes(',') || value.includes('"') || value.includes('\n')) - ) { - return `"${value.replace(/"/g, '""')}"` - } - return value === null ? '\\N' : value - }) - .join(',') - }) - .join('\n') - const csvBlob = new Blob([csvData], { type: 'text/csv' }) - - // Perform COPY FROM - await pg.query( - ` - COPY "${schema}"."${table}" (${columns.map((c) => `"${c}"`).join(', ')}) - FROM '/dev/blob' - WITH (FORMAT csv, NULL '\\N') - `, - [], - { - blob: csvBlob, - }, - ) - - if (debug) console.log(`Inserted ${messages.length} rows using COPY`) -} - -interface GetShapeSubscriptionStateOptions { - readonly pg: PGliteInterface | Transaction - readonly metadataSchema: string - readonly shapeKey: ShapeKey -} - -type ShapeSubscriptionState = Pick - -async function getShapeSubscriptionState({ - pg, - metadataSchema, - shapeKey, -}: GetShapeSubscriptionStateOptions): Promise { - const result = await pg.query<{ shape_id: string; last_offset: string }>( - ` - SELECT shape_id, last_offset - FROM ${subscriptionMetadataTableName(metadataSchema)} - WHERE shape_key = $1 - `, - [shapeKey], - ) - - if (result.rows.length === 0) return null - - const { shape_id: handle, last_offset: offset } = result.rows[0] - return { - handle, - offset: offset as Offset, - } -} - -interface UpdateShapeSubscriptionStateOptions { - pg: PGliteInterface | Transaction - metadataSchema: string - shapeKey: ShapeKey - shapeId: string - lastOffset: Offset -} - -async function updateShapeSubscriptionState({ - pg, - metadataSchema, - shapeKey, - shapeId, - lastOffset, -}: UpdateShapeSubscriptionStateOptions) { - await pg.query( - ` - INSERT INTO ${subscriptionMetadataTableName(metadataSchema)} (shape_key, shape_id, last_offset) - VALUES ($1, $2, $3) - ON CONFLICT(shape_key) - DO UPDATE SET - shape_id = EXCLUDED.shape_id, - last_offset = EXCLUDED.last_offset; - `, - [shapeKey, shapeId, lastOffset], - ) -} - -interface DeleteShapeSubscriptionStateOptions { - pg: PGliteInterface | Transaction - metadataSchema: string - shapeKey: ShapeKey -} - -async function deleteShapeSubscriptionState({ - pg, - metadataSchema, - shapeKey, -}: DeleteShapeSubscriptionStateOptions) { - await pg.query( - `DELETE FROM ${subscriptionMetadataTableName(metadataSchema)} WHERE shape_key = $1`, - [shapeKey], - ) -} - -interface MigrateShapeMetadataTablesOptions { - pg: PGliteInterface | Transaction - metadataSchema: string -} - -async function migrateShapeMetadataTables({ - pg, - metadataSchema, -}: MigrateShapeMetadataTablesOptions) { - await pg.exec( - ` - SET ${metadataSchema}.syncing = false; - CREATE SCHEMA IF NOT EXISTS "${metadataSchema}"; - CREATE TABLE IF NOT EXISTS ${subscriptionMetadataTableName(metadataSchema)} ( - shape_key TEXT PRIMARY KEY, - shape_id TEXT NOT NULL, - last_offset TEXT NOT NULL - ); - `, - ) -} - -function subscriptionMetadataTableName(metadatSchema: string) { - return `"${metadatSchema}"."${subscriptionTableName}"` -} - -const subscriptionTableName = `shape_subscriptions_metadata` - -function getMessageOffset( - stream: ShapeStream, - message: LegacyChangeMessage, -): Offset { - if (message.offset) { - return message.offset - } else if ( - message.headers.lsn !== undefined && - message.headers.op_position !== undefined - ) { - return `${message.headers.lsn}_${message.headers.op_position}` as Offset - } else { - return stream.lastOffset - } -} diff --git a/packages/pglite-sync/src/subscriptionState.ts b/packages/pglite-sync/src/subscriptionState.ts new file mode 100644 index 00000000..f0fbe472 --- /dev/null +++ b/packages/pglite-sync/src/subscriptionState.ts @@ -0,0 +1,135 @@ +import type { PGliteInterface, Transaction } from '@electric-sql/pglite' +import type { Offset } from '@electric-sql/client' +import { SubscriptionKey } from './types' + +export interface SubscriptionState { + key: SubscriptionKey + shapeMetadata: ShapeSubscriptionState[] + lastLsn: number +} + +export interface ShapeSubscriptionState { + handle: string + offset: Offset +} + +export interface GetSubscriptionStateOptions { + readonly pg: PGliteInterface | Transaction + readonly metadataSchema: string + readonly subscriptionKey: SubscriptionKey +} + +/** + * Get the subscription state for a given key. + * @param options - The options for the subscription state. + * @returns The subscription state or null if it does not exist. + */ +export async function getSubscriptionState({ + pg, + metadataSchema, + subscriptionKey, +}: GetSubscriptionStateOptions): Promise { + const result = await pg.query( + ` + SELECT key, shape_metadata, last_lsn + FROM ${subscriptionMetadataTableName(metadataSchema)} + WHERE key = $1 + `, + [subscriptionKey], + ) + + if (result.rows.length === 0) { + return null + } else if (result.rows.length > 1) { + throw new Error(`Multiple subscriptions found for key: ${subscriptionKey}`) + } + + return result.rows[0] +} + +export interface UpdateSubscriptionStateOptions { + pg: PGliteInterface | Transaction + metadataSchema: string + subscriptionKey: SubscriptionKey + shapeMetadata: Record + lastLsn: number +} + +/** + * Update the subscription state for a given key. + * @param options - The options for the subscription state. + */ +export async function updateSubscriptionState({ + pg, + metadataSchema, + subscriptionKey, + shapeMetadata, + lastLsn, +}: UpdateSubscriptionStateOptions) { + await pg.query( + ` + INSERT INTO ${subscriptionMetadataTableName(metadataSchema)} + (key, shape_metadata, last_lsn) + VALUES + ($1, $2, $3) + ON CONFLICT(key) + DO UPDATE SET + shape_metadata = EXCLUDED.shape_metadata, + last_lsn = EXCLUDED.last_lsn; + `, + [subscriptionKey, shapeMetadata, lastLsn], + ) +} + +export interface DeleteSubscriptionStateOptions { + pg: PGliteInterface | Transaction + metadataSchema: string + subscriptionKey: SubscriptionKey +} + +/** + * Delete the subscription state for a given key. + * @param options - The options for the subscription state. + */ +export async function deleteSubscriptionState({ + pg, + metadataSchema, + subscriptionKey, +}: DeleteSubscriptionStateOptions) { + await pg.query( + `DELETE FROM ${subscriptionMetadataTableName(metadataSchema)} WHERE key = $1`, + [subscriptionKey], + ) +} + +export interface MigrateSubscriptionMetadataTablesOptions { + pg: PGliteInterface | Transaction + metadataSchema: string +} + +/** + * Migrate the subscription metadata tables. + * @param options - The options for the subscription metadata tables. + */ +export async function migrateSubscriptionMetadataTables({ + pg, + metadataSchema, +}: MigrateSubscriptionMetadataTablesOptions) { + await pg.exec( + ` + SET ${metadataSchema}.syncing = false; + CREATE SCHEMA IF NOT EXISTS "${metadataSchema}"; + CREATE TABLE IF NOT EXISTS ${subscriptionMetadataTableName(metadataSchema)} ( + key TEXT PRIMARY KEY, + shape_metadata JSONB NOT NULL, + last_lsn NUMERIC NOT NULL + ); + `, + ) +} + +function subscriptionMetadataTableName(metadataSchema: string) { + return `"${metadataSchema}"."${subscriptionTableName}"` +} + +const subscriptionTableName = `subscriptions_metadata` diff --git a/packages/pglite-sync/src/types.ts b/packages/pglite-sync/src/types.ts new file mode 100644 index 00000000..01c47aa6 --- /dev/null +++ b/packages/pglite-sync/src/types.ts @@ -0,0 +1,58 @@ +import type { + ShapeStreamOptions, + ShapeStreamInterface, + Row, + ChangeMessage, +} from '@electric-sql/client' + +export type MapColumnsMap = Record +export type MapColumnsFn = (message: ChangeMessage) => Record +export type MapColumns = MapColumnsMap | MapColumnsFn +export type SubscriptionKey = string + +export interface ShapeToTableOptions { + shape: ShapeStreamOptions + table: string + schema?: string + mapColumns?: MapColumns + primaryKey: string[] +} + +export interface SyncShapesToTablesOptions { + key: string | null + shapes: Record + useCopy?: boolean + onInitialSync?: () => void +} + +export interface SyncShapesToTablesResult { + unsubscribe: () => void + readonly isUpToDate: boolean + streams: Record>> +} + +export interface SyncShapeToTableOptions { + shape: ShapeStreamOptions + table: string + schema?: string + mapColumns?: MapColumns + primaryKey: string[] + shapeKey: string | null + useCopy?: boolean + onInitialSync?: () => void +} + +export interface SyncShapeToTableResult { + unsubscribe: () => void + readonly isUpToDate: boolean + stream: ShapeStreamInterface> +} + +export interface ElectricSyncOptions { + debug?: boolean + metadataSchema?: string +} + +export type InsertChangeMessage = ChangeMessage & { + headers: { operation: 'insert' } +} diff --git a/packages/pglite-sync/test/sync.test.ts b/packages/pglite-sync/test/sync.test.ts index b2201d14..89ad70c1 100644 --- a/packages/pglite-sync/test/sync.test.ts +++ b/packages/pglite-sync/test/sync.test.ts @@ -724,465 +724,6 @@ describe('pglite-sync', () => { shape.unsubscribe() }) - it('respects numeric batch commit granularity settings', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => cb([...messages, upToDateMsg]) - }), - unsubscribeAll: vi.fn(), - })) - - // Create a trigger to notify on transaction commit - await pg.exec(` - CREATE OR REPLACE FUNCTION notify_transaction() - RETURNS TRIGGER AS $$ - BEGIN - PERFORM pg_notify('transaction_commit', TG_TABLE_NAME); - RETURN NEW; - END; - $$ LANGUAGE plpgsql; - - CREATE TRIGGER todo_transaction_trigger - AFTER INSERT ON todo - FOR EACH STATEMENT - EXECUTE FUNCTION notify_transaction(); - `) - - const commits: string[] = [] - const unsubscribe = await pg.listen('transaction_commit', (payload) => { - commits.push(payload) - }) - - const batchSize = 5 - const shape = await pg.electric.syncShapeToTable({ - shape: { - url: 'http://localhost:3000/v1/shape', - params: { table: 'todo' }, - }, - table: 'todo', - primaryKey: ['id'], - commitGranularity: batchSize, - shapeKey: null, - }) - - // Create test messages - 7 total (should see batch of 5, then 2) - const messages = Array.from( - { length: 7 }, - (_, idx) => - ({ - headers: { operation: 'insert' }, - key: `id${idx}`, - value: { - id: idx, - task: `task${idx}`, - done: false, - }, - }) satisfies Message, - ) - - await feedMessages(messages) - - // Wait for all inserts to complete - await vi.waitUntil(async () => { - const result = await pg.sql<{ count: number }>` - SELECT COUNT(*) as count FROM todo; - ` - return result.rows[0].count === 7 - }) - - // Verify all rows were inserted - const result = await pg.sql` - SELECT * FROM todo ORDER BY id; - ` - expect(result.rows).toEqual( - messages.map((m) => ({ - id: m.value.id, - task: m.value.task, - done: m.value.done, - })), - ) - - // Should have received 2 commit notifications: - // - One for the first batch of 5 - // - One for the remaining 2 (triggered by up-to-date message) - expect(commits).toHaveLength(2) - expect(commits).toEqual(['todo', 'todo']) - - await unsubscribe() - shape.unsubscribe() - }) - - // Removed until Electric has stabilised on LSN metadata - // it('respects transaction commit granularity', async () => { - // let feedMessages: (messages: Message[]) => Promise = async (_) => {} - // MockShapeStream.mockImplementation(() => ({ - // subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - // feedMessages = (messages) => cb([...messages, upToDateMsg]) - // }), - // unsubscribeAll: vi.fn(), - // })) - - // // Create a trigger to notify on transaction commit - // await pg.exec(` - // CREATE OR REPLACE FUNCTION notify_transaction() - // RETURNS TRIGGER AS $$ - // BEGIN - // PERFORM pg_notify('transaction_commit', TG_TABLE_NAME); - // RETURN NEW; - // END; - // $$ LANGUAGE plpgsql; - - // CREATE TRIGGER todo_transaction_trigger - // AFTER INSERT ON todo - // FOR EACH STATEMENT - // EXECUTE FUNCTION notify_transaction(); - // `) - - // // Track transaction commits - // const transactionCommits: string[] = [] - // const unsubscribe = await pg.listen('transaction_commit', (payload) => { - // transactionCommits.push(payload) - // }) - - // const shape = await pg.electric.syncShapeToTable({ - // shape: { - // url: 'http://localhost:3000/v1/shape', - // params: { table: 'todo' }, - // }, - // table: 'todo', - // primaryKey: ['id'], - // commitGranularity: 'transaction', - // }) - - // // Send messages with different LSNs (first part of offset before _) - // await feedMessages([ - // { - // headers: { operation: 'insert' }, - // key: 'id1', - // value: { - // id: 1, - // task: 'task1', - // done: false, - // }, - // }, - // { - // headers: { operation: 'insert' }, - // key: 'id2', - // value: { - // id: 2, - // task: 'task2', - // done: false, - // }, - // }, - // { - // headers: { operation: 'insert' }, - // key: 'id3', - // value: { - // id: 3, - // task: 'task3', - // done: false, - // }, - // }, - // ]) - - // // Wait for all inserts to complete - // await vi.waitUntil(async () => { - // const result = await pg.sql<{ count: number }>` - // SELECT COUNT(*) as count FROM todo; - // ` - // return result.rows[0].count === 3 - // }) - - // // Verify all rows were inserted - // const result = await pg.sql` - // SELECT * FROM todo ORDER BY id; - // ` - // expect(result.rows).toEqual([ - // { id: 1, task: 'task1', done: false }, - // { id: 2, task: 'task2', done: false }, - // { id: 3, task: 'task3', done: false }, - // ]) - - // // Should have received 2 transaction notifications - // // One for LSN 1 (containing 2 inserts) and one for LSN 2 (containing 1 insert) - // expect(transactionCommits).toHaveLength(2) - // expect(transactionCommits).toEqual(['todo', 'todo']) - - // await unsubscribe() - // shape.unsubscribe() - // }) - - it('respects up-to-date commit granularity settings', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => cb([...messages, upToDateMsg]) - }), - unsubscribeAll: vi.fn(), - })) - - // Create a trigger to notify on transaction commit - await pg.exec(` - CREATE OR REPLACE FUNCTION notify_transaction() - RETURNS TRIGGER AS $$ - BEGIN - PERFORM pg_notify('transaction_commit', TG_TABLE_NAME); - RETURN NEW; - END; - $$ LANGUAGE plpgsql; - - CREATE TRIGGER todo_transaction_trigger - AFTER INSERT ON todo - FOR EACH STATEMENT - EXECUTE FUNCTION notify_transaction(); - `) - - const commits: string[] = [] - const unsubscribe = await pg.listen('transaction_commit', (payload) => { - commits.push(payload) - }) - - const shape = await pg.electric.syncShapeToTable({ - shape: { - url: 'http://localhost:3000/v1/shape', - params: { table: 'todo' }, - }, - table: 'todo', - primaryKey: ['id'], - commitGranularity: 'up-to-date', - shapeKey: null, - }) - - // Send multiple messages - await feedMessages([ - { - headers: { operation: 'insert' }, - key: 'id1', - value: { id: 1, task: 'task1', done: false }, - }, - { - headers: { operation: 'insert' }, - key: 'id2', - value: { id: 2, task: 'task2', done: false }, - }, - { - headers: { operation: 'insert' }, - key: 'id3', - value: { id: 3, task: 'task3', done: false }, - }, - ]) - - // Wait for all inserts to complete - await vi.waitUntil(async () => { - const result = await pg.sql<{ count: number }>` - SELECT COUNT(*) as count FROM todo; - ` - return result.rows[0].count === 3 - }) - - // Should have received only one commit notification since all operations - // are committed together when up-to-date message is received - expect(commits).toHaveLength(1) - expect(commits).toEqual(['todo']) - - await unsubscribe() - shape.unsubscribe() - }) - - it('respects operation commit granularity settings', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => cb([...messages, upToDateMsg]) - }), - unsubscribeAll: vi.fn(), - })) - - // Create a trigger to notify on transaction commit - await pg.exec(` - CREATE OR REPLACE FUNCTION notify_transaction() - RETURNS TRIGGER AS $$ - BEGIN - PERFORM pg_notify('transaction_commit', TG_TABLE_NAME); - RETURN NEW; - END; - $$ LANGUAGE plpgsql; - - CREATE TRIGGER todo_transaction_trigger - AFTER INSERT ON todo - FOR EACH STATEMENT - EXECUTE FUNCTION notify_transaction(); - `) - - const commits: string[] = [] - const unsubscribe = await pg.listen('transaction_commit', (payload) => { - commits.push(payload) - }) - - const shape = await pg.electric.syncShapeToTable({ - shape: { - url: 'http://localhost:3000/v1/shape', - params: { table: 'todo' }, - }, - table: 'todo', - primaryKey: ['id'], - commitGranularity: 'operation', - shapeKey: null, - }) - - // Send multiple messages - await feedMessages([ - { - headers: { operation: 'insert' }, - key: 'id1', - value: { id: 1, task: 'task1', done: false }, - }, - { - headers: { operation: 'insert' }, - key: 'id2', - value: { id: 2, task: 'task2', done: false }, - }, - { - headers: { operation: 'insert' }, - key: 'id3', - value: { id: 3, task: 'task3', done: false }, - }, - ]) - - // Wait for all inserts to complete - await vi.waitUntil(async () => { - const result = await pg.sql<{ count: number }>` - SELECT COUNT(*) as count FROM todo; - ` - return result.rows[0].count === 3 - }) - - // Should have received a notification for each operation - expect(commits).toHaveLength(3) - expect(commits).toEqual(['todo', 'todo', 'todo']) - - await unsubscribe() - shape.unsubscribe() - }) - - // Skip this test as it's flaky in CI, timing is sensitive - it.skip('respects commitThrottle with operation commit granularity', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => cb([...messages]) - }), - unsubscribeAll: vi.fn(), - })) - - // Create a trigger to notify on transaction commit - await pg.exec(` - CREATE OR REPLACE FUNCTION notify_transaction() - RETURNS TRIGGER AS $$ - BEGIN - PERFORM pg_notify('transaction_commit', - TG_TABLE_NAME || '_' || - (SELECT COUNT(*) FROM todo)::text || '_' || - EXTRACT(MILLISECONDS FROM NOW())::text - ); - RETURN NEW; - END; - $$ LANGUAGE plpgsql; - - CREATE TRIGGER todo_transaction_trigger - AFTER INSERT ON todo - FOR EACH STATEMENT - EXECUTE FUNCTION notify_transaction(); - `) - - const commits: string[] = [] - const unsubscribe = await pg.listen('transaction_commit', (payload) => { - commits.push(payload) - }) - - const throttleMs = 15 // Short throttle for testing - const shape = await pg.electric.syncShapeToTable({ - shape: { - url: 'http://localhost:3000/v1/shape', - params: { table: 'todo' }, - }, - table: 'todo', - primaryKey: ['id'], - commitGranularity: 'operation', - commitThrottle: throttleMs, - shapeKey: null, - }) - - // Send messages with 10ms delays between them - for (const message of [ - { - headers: { operation: 'insert' as const }, - offset: '1_1' as const, - key: 'id1', - value: { id: 1, task: 'task1', done: false }, - }, - { - headers: { operation: 'insert' as const }, - offset: '1_2' as const, - key: 'id2', - value: { id: 2, task: 'task2', done: false }, - }, - { - headers: { operation: 'insert' as const }, - offset: '1_3' as const, - key: 'id3', - value: { id: 3, task: 'task3', done: false }, - }, - { - headers: { operation: 'insert' as const }, - offset: '1_4' as const, - key: 'id4', - value: { id: 4, task: 'task4', done: false }, - }, - upToDateMsg, - ]) { - await feedMessages([message]) - await new Promise((resolve) => setTimeout(resolve, 10)) - } - - // Wait for all inserts to complete - await vi.waitUntil(async () => { - const result = await pg.sql<{ count: number }>` - SELECT COUNT(*) as count FROM todo; - ` - return result.rows[0].count === 4 - }) - - console.log(commits) - - // Extract row counts and timestamps from commit notifications - const commitInfo = commits.map((commit) => { - const [_, rowCount, timestamp] = commit.split('_') - return { - rowCount: parseInt(rowCount), - timestamp: parseFloat(timestamp), - } - }) - - // Verify we got 4 operation messages - expect(commitInfo.length).toBe(4) - - // Check timestamps are at least 15ms apart for first 3 - expect( - commitInfo[1].timestamp - commitInfo[0].timestamp, - ).toBeGreaterThanOrEqual(15) - expect( - commitInfo[2].timestamp - commitInfo[1].timestamp, - ).toBeGreaterThanOrEqual(15) - - // Last 2 operation messages should have same timestamp since they're batched - expect(commitInfo[3].timestamp).toBe(commitInfo[2].timestamp) - - await unsubscribe() - shape.unsubscribe() - }) - it('calls onInitialSync callback after initial sync', async () => { let feedMessages: (messages: Message[]) => Promise = async (_) => {} MockShapeStream.mockImplementation(() => ({ diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 74f8fa3c..813b1302 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -298,8 +298,11 @@ importers: packages/pglite-sync: dependencies: '@electric-sql/client': - specifier: 1.0.0-beta.3 - version: 1.0.0-beta.3 + specifier: 1.0.0-beta.5 + version: 1.0.0-beta.5 + '@electric-sql/experimental': + specifier: 0.1.2-beta.4 + version: 0.1.2-beta.4(@electric-sql/client@1.0.0-beta.5) devDependencies: '@electric-sql/pglite': specifier: workspace:* @@ -685,8 +688,13 @@ packages: search-insights: optional: true - '@electric-sql/client@1.0.0-beta.3': - resolution: {integrity: sha512-x3bzYlX+IRwBAILPxzu3ARkXzmrAQtVOuJCKCxlSqENuJa4zvLPF4f8vC6HMOiiJiHPAntJjfI3Hb0lrt2PTxA==} + '@electric-sql/client@1.0.0-beta.5': + resolution: {integrity: sha512-sP4yBBt4sDWE7FhMMVkVceKULLPStumFzlgltUqb7AzzXP6AK2qs4bZ2QrabY6feBgRgrMP19bOzxI/WOSnTAA==} + + '@electric-sql/experimental@0.1.2-beta.4': + resolution: {integrity: sha512-P+V8wvTaUddyMrPtvfEwkoGkrLUCX6MVTLJn303oEEb/aK0NkAqUaCXVj9BNZ3bM673dTqdysy+Wg61mP42kfw==} + peerDependencies: + '@electric-sql/client': 1.0.0-beta.5 '@electric-sql/pglite-react@0.2.17': resolution: {integrity: sha512-TEdV1UAUO50gb/okYcVaiG2dQdox53K2E8jSEl0Wa/97x3Xi238ls7ZB7wgmoqNgy45f/tgYxyztml03EFClng==} @@ -2000,6 +2008,7 @@ packages: bun@1.1.30: resolution: {integrity: sha512-ysRL1pq10Xba0jqVLPrKU3YIv0ohfp3cTajCPtpjCyppbn3lfiAVNpGoHfyaxS17OlPmWmR67UZRPw/EueQuug==} + cpu: [arm64, x64] os: [darwin, linux, win32] hasBin: true @@ -4926,7 +4935,13 @@ snapshots: transitivePeerDependencies: - '@algolia/client-search' - '@electric-sql/client@1.0.0-beta.3': + '@electric-sql/client@1.0.0-beta.5': + optionalDependencies: + '@rollup/rollup-darwin-arm64': 4.24.0 + + '@electric-sql/experimental@0.1.2-beta.4(@electric-sql/client@1.0.0-beta.5)': + dependencies: + '@electric-sql/client': 1.0.0-beta.5 optionalDependencies: '@rollup/rollup-darwin-arm64': 4.24.0 From 56d367592b55dcec5e4d7016389b66289f9e893a Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 11 Mar 2025 15:07:52 +0000 Subject: [PATCH 02/10] passing old tests --- packages/pglite-sync/src/index-OLD.ts | 691 ------------------------- packages/pglite-sync/src/index.ts | 13 +- packages/pglite-sync/test/sync.test.ts | 402 ++++++++++---- 3 files changed, 306 insertions(+), 800 deletions(-) delete mode 100644 packages/pglite-sync/src/index-OLD.ts diff --git a/packages/pglite-sync/src/index-OLD.ts b/packages/pglite-sync/src/index-OLD.ts deleted file mode 100644 index 8a463f7d..00000000 --- a/packages/pglite-sync/src/index-OLD.ts +++ /dev/null @@ -1,691 +0,0 @@ -import type { Offset, Row, ShapeStreamOptions } from '@electric-sql/client' -import { - ChangeMessage, - ShapeStream, - isChangeMessage, - isControlMessage, - ShapeStreamInterface, -} from '@electric-sql/client' -import type { - Extension, - PGliteInterface, - Transaction, -} from '@electric-sql/pglite' - -interface LegacyChangeMessage> extends ChangeMessage { - offset?: Offset -} - -export type MapColumnsMap = Record -export type MapColumnsFn = (message: ChangeMessage) => Record -export type MapColumns = MapColumnsMap | MapColumnsFn -export type ShapeKey = string - -type InsertChangeMessage = ChangeMessage & { - headers: { operation: 'insert' } -} - -/** - * The granularity of the commit operation. - * - `up-to-date`: Commit all messages when the `up-to-date` message is received. - * - `operation`: Commit each message in its own transaction. - * - `number`: Commit every N messages. - * Note a commit will always be performed on the `up-to-date` message. - */ -export type CommitGranularity = - | 'up-to-date' - // | 'transaction' // Removed until Electric has stabilised on LSN metadata - | 'operation' - | number - -export interface SyncShapeToTableOptions { - shape: ShapeStreamOptions - table: string - schema?: string - mapColumns?: MapColumns - primaryKey: string[] - shapeKey: ShapeKey | null - useCopy?: boolean - commitGranularity?: CommitGranularity - commitThrottle?: number - onInitialSync?: () => void -} - -export interface SyncShapeToTableResult { - unsubscribe: () => void - readonly isUpToDate: boolean - readonly shapeId: string - subscribe: (cb: () => void, error: (err: Error) => void) => () => void - stream: ShapeStreamInterface -} - -export interface SyncShapeToTableResult { - unsubscribe: () => void - readonly isUpToDate: boolean - readonly shapeId: string - subscribe: (cb: () => void, error: (err: Error) => void) => () => void - stream: ShapeStreamInterface -} - -export interface ElectricSyncOptions { - debug?: boolean - metadataSchema?: string -} - -async function createPlugin( - pg: PGliteInterface, - options?: ElectricSyncOptions, -) { - const debug = options?.debug ?? false - const metadataSchema = options?.metadataSchema ?? 'electric' - const streams: Array<{ - stream: ShapeStream - aborter: AbortController - }> = [] - - // TODO: keeping an in-memory lock per table such that two - // shapes are not synced into one table - this will be - // resolved by using reference counting in shadow tables - const shapePerTableLock = new Map() - - let initMetadataTablesDone = false - const initMetadataTables = async () => { - if (initMetadataTablesDone) return - initMetadataTablesDone = true - await migrateShapeMetadataTables({ - pg, - metadataSchema, - }) - } - - const namespaceObj = { - initMetadataTables, - syncShapeToTable: async ( - options: SyncShapeToTableOptions, - ): Promise => { - await initMetadataTables() - options = { - commitGranularity: 'up-to-date', - ...options, - } - if (shapePerTableLock.has(options.table)) { - throw new Error('Already syncing shape for table ' + options.table) - } - shapePerTableLock.set(options.table) - let shapeSubState: ShapeSubscriptionState | null = null - - // if shapeKey is not null, ensure persistence of shape subscription - // state is possible and check if it is already persisted - if (options.shapeKey) { - shapeSubState = await getShapeSubscriptionState({ - pg, - metadataSchema, - shapeKey: options.shapeKey, - }) - if (debug && shapeSubState) { - console.log('resuming from shape state', shapeSubState) - } - } - - // If it's a new subscription there is no state to resume from - const isNewSubscription = shapeSubState === null - - // If it's a new subscription we can do a `COPY FROM` to insert the initial data - // TODO: in future when we can have multiple shapes on the same table we will need - // to make sure we only do a `COPY FROM` on the first shape on the table as they - // may overlap and so the insert logic will be wrong. - let doCopy = isNewSubscription && options.useCopy - - // Track if onInitialSync has been called - let onInitialSyncCalled = false - - const aborter = new AbortController() - if (options.shape.signal) { - // we new to have our own aborter to be able to abort the stream - // but still accept the signal from the user - options.shape.signal.addEventListener('abort', () => aborter.abort(), { - once: true, - }) - } - const stream = new ShapeStream({ - ...options.shape, - ...(shapeSubState ?? {}), - signal: aborter.signal, - }) - - // TODO: this aggregates all messages in memory until an - // up-to-date message is received, which is not viable for - // _very_ large shapes - either we should commit batches to - // a temporary table and copy over the transactional result - // or use a separate connection to hold a long transaction - let messageAggregator: LegacyChangeMessage[] = [] - let truncateNeeded = false - // let lastLSN: string | null = null // Removed until Electric has stabilised on LSN metadata - let lastCommitAt: number = 0 - - const commit = async () => { - if (messageAggregator.length === 0 && !truncateNeeded) return - const shapeHandle = stream.shapeHandle // The shape handle could change while we are committing - await pg.transaction(async (tx) => { - if (debug) { - console.log('committing message batch', messageAggregator.length) - console.time('commit') - } - - // Set the syncing flag to true during this transaction so that - // user defined triggers on the table are able to chose how to run - // during a sync - tx.exec(`SET LOCAL ${metadataSchema}.syncing = true;`) - - if (truncateNeeded) { - truncateNeeded = false - // TODO: sync into shadow table and reference count - // for now just clear the whole table - will break - // cases with multiple shapes on the same table - await tx.exec(`DELETE FROM ${options.table};`) - if (options.shapeKey) { - await deleteShapeSubscriptionState({ - pg: tx, - metadataSchema, - shapeKey: options.shapeKey, - }) - } - } - - if (doCopy) { - // We can do a `COPY FROM` to insert the initial data - // Split messageAggregator into initial inserts and remaining messages - const initialInserts: InsertChangeMessage[] = [] - const remainingMessages: ChangeMessage[] = [] - let foundNonInsert = false - for (const message of messageAggregator) { - if (!foundNonInsert && message.headers.operation === 'insert') { - initialInserts.push(message as InsertChangeMessage) - } else { - foundNonInsert = true - remainingMessages.push(message) - } - } - if (initialInserts.length > 0) { - // As `COPY FROM` doesn't trigger a NOTIFY, we pop - // the last insert message and and add it to the be beginning - // of the remaining messages to be applied after the `COPY FROM` - remainingMessages.unshift(initialInserts.pop()!) - } - messageAggregator = remainingMessages - - // Do the `COPY FROM` with initial inserts - if (initialInserts.length > 0) { - applyMessagesToTableWithCopy({ - pg: tx, - table: options.table, - schema: options.schema, - messages: initialInserts as InsertChangeMessage[], - mapColumns: options.mapColumns, - primaryKey: options.primaryKey, - debug, - }) - // We don't want to do a `COPY FROM` again after that - doCopy = false - } - } - - for (const changeMessage of messageAggregator) { - await applyMessageToTable({ - pg: tx, - table: options.table, - schema: options.schema, - message: changeMessage, - mapColumns: options.mapColumns, - primaryKey: options.primaryKey, - debug, - }) - } - - if ( - options.shapeKey && - messageAggregator.length > 0 && - shapeHandle !== undefined - ) { - await updateShapeSubscriptionState({ - pg: tx, - metadataSchema, - shapeKey: options.shapeKey, - shapeId: shapeHandle, - lastOffset: getMessageOffset( - stream, - messageAggregator[messageAggregator.length - 1], - ), - }) - } - }) - if (debug) console.timeEnd('commit') - messageAggregator = [] - // Await a timeout to start a new task and allow other connections to do work - await new Promise((resolve) => setTimeout(resolve, 0)) - } - - const throttledCommit = async ({ - reset = false, - }: { reset?: boolean } = {}) => { - const now = Date.now() - if (reset) { - // Reset the last commit time to 0, forcing the next commit to happen immediately - lastCommitAt = 0 - } - if (options.commitThrottle && debug) - console.log( - 'throttled commit: now:', - now, - 'lastCommitAt:', - lastCommitAt, - 'diff:', - now - lastCommitAt, - ) - if ( - options.commitThrottle && - now - lastCommitAt < options.commitThrottle - ) { - // Skip this commit - messages will be caught by next commit or up-to-date - if (debug) console.log('skipping commit due to throttle') - return - } - lastCommitAt = now - await commit() - } - - stream.subscribe(async (messages) => { - if (debug) console.log('sync messages received', messages) - - for (const message of messages) { - if (isChangeMessage(message)) { - // Removed until Electric has stabilised on LSN metadata - // const newLSN = message.offset.split('_')[0] - // if (newLSN !== lastLSN) { - // // If the LSN has changed and granularity is set to transaction - // // we need to commit the current batch. - // // This is done before we accumulate any more messages as they are - // // part of the next transaction batch. - // if (options.commitGranularity === 'transaction') { - // await throttledCommit() - // } - // lastLSN = newLSN - // } - - // accumulate change messages for committing all at once or in batches - messageAggregator.push(message) - - if (options.commitGranularity === 'operation') { - // commit after each operation if granularity is set to operation - await throttledCommit() - } else if (typeof options.commitGranularity === 'number') { - // commit after every N messages if granularity is set to a number - if (messageAggregator.length >= options.commitGranularity) { - await throttledCommit() - } - } - } else if (isControlMessage(message)) { - switch (message.headers.control) { - case 'must-refetch': - // mark table as needing truncation before next batch commit - if (debug) console.log('refetching shape') - truncateNeeded = true - messageAggregator = [] - break - - case 'up-to-date': - // perform all accumulated changes and store stream state - await throttledCommit({ reset: true }) // not throttled, we want this to happen ASAP - if ( - isNewSubscription && - !onInitialSyncCalled && - options.onInitialSync - ) { - options.onInitialSync() - onInitialSyncCalled = true - } - break - } - } - } - }) - - streams.push({ - stream, - aborter, - }) - const unsubscribe = () => { - stream.unsubscribeAll() - aborter.abort() - shapePerTableLock.delete(options.table) - } - return { - unsubscribe, - get isUpToDate() { - return stream.isUpToDate - }, - get shapeId() { - return stream.shapeHandle! - }, - stream, - subscribe: (cb: () => void, error: (err: Error) => void) => { - return stream.subscribe(() => { - if (stream.isUpToDate) { - cb() - } - }, error) - }, - } - }, - } - - const close = async () => { - for (const { stream, aborter } of streams) { - stream.unsubscribeAll() - aborter.abort() - } - } - - return { - namespaceObj, - close, - } -} - -export type SyncNamespaceObj = Awaited< - ReturnType ->['namespaceObj'] - -export type PGliteWithSync = PGliteInterface & { - sync: SyncNamespaceObj -} - -export function electricSync(options?: ElectricSyncOptions) { - return { - name: 'ElectricSQL Sync', - setup: async (pg: PGliteInterface) => { - const { namespaceObj, close } = await createPlugin(pg, options) - return { - namespaceObj, - close, - } - }, - } satisfies Extension -} - -function doMapColumns( - mapColumns: MapColumns, - message: ChangeMessage, -): Record { - if (typeof mapColumns === 'function') { - return mapColumns(message) - } else { - const mappedColumns: Record = {} - for (const [key, value] of Object.entries(mapColumns)) { - mappedColumns[key] = message.value[value] - } - return mappedColumns - } -} - -interface ApplyMessageToTableOptions { - pg: PGliteInterface | Transaction - table: string - schema?: string - message: ChangeMessage - mapColumns?: MapColumns - primaryKey: string[] - debug: boolean -} - -async function applyMessageToTable({ - pg, - table, - schema = 'public', - message, - mapColumns, - primaryKey, - debug, -}: ApplyMessageToTableOptions) { - const data = mapColumns ? doMapColumns(mapColumns, message) : message.value - - switch (message.headers.operation) { - case 'insert': { - if (debug) console.log('inserting', data) - const columns = Object.keys(data) - return await pg.query( - ` - INSERT INTO "${schema}"."${table}" - (${columns.map((s) => '"' + s + '"').join(', ')}) - VALUES - (${columns.map((_v, i) => '$' + (i + 1)).join(', ')}) - `, - columns.map((column) => data[column]), - ) - } - - case 'update': { - if (debug) console.log('updating', data) - const columns = Object.keys(data).filter( - // we don't update the primary key, they are used to identify the row - (column) => !primaryKey.includes(column), - ) - if (columns.length === 0) return // nothing to update - return await pg.query( - ` - UPDATE "${schema}"."${table}" - SET ${columns - .map((column, i) => '"' + column + '" = $' + (i + 1)) - .join(', ')} - WHERE ${primaryKey - .map( - (column, i) => - '"' + column + '" = $' + (columns.length + i + 1), - ) - .join(' AND ')} - `, - [ - ...columns.map((column) => data[column]), - ...primaryKey.map((column) => data[column]), - ], - ) - } - - case 'delete': { - if (debug) console.log('deleting', data) - return await pg.query( - ` - DELETE FROM "${schema}"."${table}" - WHERE ${primaryKey - .map((column, i) => '"' + column + '" = $' + (i + 1)) - .join(' AND ')} - `, - [...primaryKey.map((column) => data[column])], - ) - } - } -} - -interface ApplyMessagesToTableWithCopyOptions { - pg: PGliteInterface | Transaction - table: string - schema?: string - messages: InsertChangeMessage[] - mapColumns?: MapColumns - primaryKey: string[] - debug: boolean -} - -async function applyMessagesToTableWithCopy({ - pg, - table, - schema = 'public', - messages, - mapColumns, - debug, -}: ApplyMessagesToTableWithCopyOptions) { - if (debug) console.log('applying messages with COPY') - - // Map the messages to the data to be inserted - const data: Record[] = messages.map((message) => - mapColumns ? doMapColumns(mapColumns, message) : message.value, - ) - - // Get column names from the first message - const columns = Object.keys(data[0]) - - // Create CSV data - const csvData = data - .map((message) => { - return columns - .map((column) => { - const value = message[column] - // Escape double quotes and wrap in quotes if necessary - if ( - typeof value === 'string' && - (value.includes(',') || value.includes('"') || value.includes('\n')) - ) { - return `"${value.replace(/"/g, '""')}"` - } - return value === null ? '\\N' : value - }) - .join(',') - }) - .join('\n') - const csvBlob = new Blob([csvData], { type: 'text/csv' }) - - // Perform COPY FROM - await pg.query( - ` - COPY "${schema}"."${table}" (${columns.map((c) => `"${c}"`).join(', ')}) - FROM '/dev/blob' - WITH (FORMAT csv, NULL '\\N') - `, - [], - { - blob: csvBlob, - }, - ) - - if (debug) console.log(`Inserted ${messages.length} rows using COPY`) -} - -interface GetShapeSubscriptionStateOptions { - readonly pg: PGliteInterface | Transaction - readonly metadataSchema: string - readonly shapeKey: ShapeKey -} - -type ShapeSubscriptionState = Pick - -async function getShapeSubscriptionState({ - pg, - metadataSchema, - shapeKey, -}: GetShapeSubscriptionStateOptions): Promise { - const result = await pg.query<{ shape_id: string; last_offset: string }>( - ` - SELECT shape_id, last_offset - FROM ${subscriptionMetadataTableName(metadataSchema)} - WHERE shape_key = $1 - `, - [shapeKey], - ) - - if (result.rows.length === 0) return null - - const { shape_id: handle, last_offset: offset } = result.rows[0] - return { - handle, - offset: offset as Offset, - } -} - -interface UpdateShapeSubscriptionStateOptions { - pg: PGliteInterface | Transaction - metadataSchema: string - shapeKey: ShapeKey - shapeId: string - lastOffset: Offset -} - -async function updateShapeSubscriptionState({ - pg, - metadataSchema, - shapeKey, - shapeId, - lastOffset, -}: UpdateShapeSubscriptionStateOptions) { - await pg.query( - ` - INSERT INTO ${subscriptionMetadataTableName(metadataSchema)} (shape_key, shape_id, last_offset) - VALUES ($1, $2, $3) - ON CONFLICT(shape_key) - DO UPDATE SET - shape_id = EXCLUDED.shape_id, - last_offset = EXCLUDED.last_offset; - `, - [shapeKey, shapeId, lastOffset], - ) -} - -interface DeleteShapeSubscriptionStateOptions { - pg: PGliteInterface | Transaction - metadataSchema: string - shapeKey: ShapeKey -} - -async function deleteShapeSubscriptionState({ - pg, - metadataSchema, - shapeKey, -}: DeleteShapeSubscriptionStateOptions) { - await pg.query( - `DELETE FROM ${subscriptionMetadataTableName(metadataSchema)} WHERE shape_key = $1`, - [shapeKey], - ) -} - -interface MigrateShapeMetadataTablesOptions { - pg: PGliteInterface | Transaction - metadataSchema: string -} - -async function migrateShapeMetadataTables({ - pg, - metadataSchema, -}: MigrateShapeMetadataTablesOptions) { - await pg.exec( - ` - SET ${metadataSchema}.syncing = false; - CREATE SCHEMA IF NOT EXISTS "${metadataSchema}"; - CREATE TABLE IF NOT EXISTS ${subscriptionMetadataTableName(metadataSchema)} ( - shape_key TEXT PRIMARY KEY, - shape_id TEXT NOT NULL, - last_offset TEXT NOT NULL - ); - `, - ) -} - -function subscriptionMetadataTableName(metadatSchema: string) { - return `"${metadatSchema}"."${subscriptionTableName}"` -} - -const subscriptionTableName = `shape_subscriptions_metadata` - -function getMessageOffset( - stream: ShapeStream, - message: LegacyChangeMessage, -): Offset { - if (message.offset) { - return message.offset - } else if ( - message.headers.lsn !== undefined && - message.headers.op_position !== undefined - ) { - return `${message.headers.lsn}_${message.headers.op_position}` as Offset - } else { - return stream.lastOffset - } -} diff --git a/packages/pglite-sync/src/index.ts b/packages/pglite-sync/src/index.ts index deda05c1..0a674a28 100644 --- a/packages/pglite-sync/src/index.ts +++ b/packages/pglite-sync/src/index.ts @@ -165,6 +165,15 @@ async function createPlugin( } await pg.transaction(async (tx) => { + if (debug) { + console.time('commit') + } + + // Set the syncing flag to true during this transaction so that + // user defined triggers on the table are able to chose how to run + // during a sync + tx.exec(`SET LOCAL ${metadataSchema}.syncing = true;`) + for (let [shapeName, messages] of messagesToCommit.entries()) { const shape = shapes[shapeName] @@ -255,7 +264,7 @@ async function createPlugin( } } - multiShapeStream.subscribe((messages) => { + multiShapeStream.subscribe(async (messages) => { messages.forEach((message) => { if (isChangeMessage(message)) { const shapeChanges = changes.get(message.shape)! @@ -296,6 +305,8 @@ async function createPlugin( if (lowestCommittedLsn > lastCommittedLsn) { // We have new changes to commit commitUpToLsn(lowestCommittedLsn) + // Await a timeout to start a new task and allow other connections to do work + await new Promise((resolve) => setTimeout(resolve, 0)) } }) diff --git a/packages/pglite-sync/test/sync.test.ts b/packages/pglite-sync/test/sync.test.ts index 89ad70c1..74c254da 100644 --- a/packages/pglite-sync/test/sync.test.ts +++ b/packages/pglite-sync/test/sync.test.ts @@ -1,30 +1,29 @@ -import { - ControlMessage, - Message, - ShapeStream, - ShapeStreamOptions, -} from '@electric-sql/client' +import { ShapeStreamOptions } from '@electric-sql/client' +import { MultiShapeMessages } from '@electric-sql/experimental' import { PGlite, PGliteInterfaceExtensions } from '@electric-sql/pglite' import { Mock, beforeEach, describe, expect, it, vi } from 'vitest' import { electricSync } from '../src/index.js' +import { MultiShapeStream } from '@electric-sql/experimental' -vi.mock('@electric-sql/client', async (importOriginal) => { - const mod = await importOriginal() - const ShapeStream = vi.fn(() => ({ +type MultiShapeMessage = MultiShapeMessages + +vi.mock('@electric-sql/experimental', async (importOriginal) => { + const mod = + await importOriginal() + const MultiShapeStream = vi.fn(() => ({ subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: {}, })) - return { ...mod, ShapeStream } + return { ...mod, MultiShapeStream } }) -const upToDateMsg: ControlMessage = { - headers: { control: 'up-to-date' }, -} - describe('pglite-sync', () => { let pg: PGlite & PGliteInterfaceExtensions<{ electric: ReturnType }> - const MockShapeStream = ShapeStream as unknown as Mock + const MockMultiShapeStream = MultiShapeStream as unknown as Mock beforeEach(async () => { pg = await PGlite.create({ @@ -43,12 +42,33 @@ describe('pglite-sync', () => { }) it('handles inserts/updates/deletes', async () => { - let feedMessage: (message: Message) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessage = (message) => cb([message, upToDateMsg]) - }), + let feedMessage: (message: MultiShapeMessage) => Promise = async ( + _, + ) => {} + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessage = (message) => + cb([ + message, + { + shape: 'shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + ]) + }, + ), unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, })) const shape = await pg.electric.syncShapeToTable({ @@ -70,6 +90,7 @@ describe('pglite-sync', () => { task: 'task1', done: false, }, + shape: 'shape', }) expect((await pg.sql`SELECT* FROM todo;`).rows).toEqual([ { @@ -88,6 +109,7 @@ describe('pglite-sync', () => { task: 'task2', done: true, }, + shape: 'shape', }) expect((await pg.sql`SELECT* FROM todo;`).rows).toEqual([ { @@ -106,6 +128,7 @@ describe('pglite-sync', () => { task: 'task2', done: true, }, + shape: 'shape', }) expect((await pg.sql`SELECT* FROM todo;`).rows).toEqual([]) @@ -113,12 +136,33 @@ describe('pglite-sync', () => { }) it('performs operations within a transaction', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => cb([...messages, upToDateMsg]) - }), + let feedMessages: (messages: MultiShapeMessage[]) => Promise = async ( + _, + ) => {} + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessages = (messages) => + cb([ + ...messages, + { + shape: 'shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + ]) + }, + ), unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, })) const shape = await pg.electric.syncShapeToTable({ @@ -140,13 +184,13 @@ describe('pglite-sync', () => { const itemIdx = i * numBatchInserts + idx return { headers: { operation: 'insert' }, - offset: `1_${itemIdx}`, key: `id${itemIdx}`, value: { id: itemIdx, task: `task${itemIdx}`, done: false, }, + shape: 'shape', } }), ) @@ -180,27 +224,45 @@ describe('pglite-sync', () => { }) it('persists shape stream state and automatically resumes', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} + let feedMessages: ( + lsn: number, + messages: MultiShapeMessage[], + ) => Promise = async (_) => {} const shapeStreamInits = vi.fn() let mockShapeId: string | void = undefined - MockShapeStream.mockImplementation((initOpts: ShapeStreamOptions) => { + MockMultiShapeStream.mockImplementation((initOpts: ShapeStreamOptions) => { shapeStreamInits(initOpts) return { - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => { - mockShapeId ??= Math.random() + '' - return cb([...messages, upToDateMsg]) - } - }), + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessages = (lsn, messages) => { + mockShapeId ??= Math.random() + '' + return cb([ + ...messages, + { + shape: 'shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: lsn, + }, + }, + ]) + } + }, + ), unsubscribeAll: vi.fn(), - get shapeId() { - return mockShapeId + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, }, } }) let totalRowCount = 0 - const numInserts = 100 + const numInserts = 3 //100 const shapeIds: string[] = [] const numResumes = 3 @@ -216,15 +278,19 @@ describe('pglite-sync', () => { }) await feedMessages( + i, Array.from({ length: numInserts }, (_, idx) => ({ - headers: { operation: 'insert' }, - offset: `1_${i * numInserts + idx}`, + headers: { + operation: 'insert', + lsn: i, + }, key: `id${i * numInserts + idx}`, value: { id: i * numInserts + idx, task: `task${idx}`, done: false, }, + shape: 'shape', })), ) @@ -252,31 +318,46 @@ describe('pglite-sync', () => { }) it('clears and restarts persisted shape stream state on refetch', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} + let feedMessages: (messages: MultiShapeMessage[]) => Promise = async ( + _, + ) => {} const shapeStreamInits = vi.fn() let mockShapeId: string | void = undefined - MockShapeStream.mockImplementation((initOpts: ShapeStreamOptions) => { + MockMultiShapeStream.mockImplementation((initOpts: ShapeStreamOptions) => { shapeStreamInits(initOpts) - return { - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => { - mockShapeId ??= Math.random() + '' - if (messages.find((m) => m.headers.control === 'must-refetch')) { - mockShapeId = undefined + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessages = (messages) => { + mockShapeId ??= Math.random() + '' + if (messages.find((m) => m.headers.control === 'must-refetch')) { + mockShapeId = undefined + } + + return cb([ + ...messages, + { + shape: 'shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + ]) } - - return cb([...messages, upToDateMsg]) - } - }), + }, + ), unsubscribeAll: vi.fn(), - get shapeId() { - return mockShapeId + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, }, } }) - const numInserts = 100 const shape = await pg.electric.syncShapeToTable({ shape: { url: 'http://localhost:3000/v1/shape', @@ -287,28 +368,7 @@ describe('pglite-sync', () => { shapeKey: 'foo', }) - await feedMessages( - Array.from({ length: numInserts }, (_, idx) => ({ - headers: { operation: 'insert' }, - offset: `1_${idx}`, - key: `id${idx}`, - value: { - id: idx, - task: `task${idx}`, - done: false, - }, - })), - ) - - await vi.waitUntil(async () => { - const result = await pg.sql<{ - count: number - }>`SELECT COUNT(*) as count FROM todo;` - return result.rows[0]?.count === numInserts - }) - - // feed a must-refetch message that should clear the table - // and any aggregated messages + const numInserts = 100 await feedMessages([ { headers: { operation: 'insert' }, @@ -318,8 +378,9 @@ describe('pglite-sync', () => { task: `task`, done: false, }, + shape: 'shape', }, - { headers: { control: 'must-refetch' } }, + { headers: { control: 'must-refetch' }, shape: 'shape' }, { headers: { operation: 'insert' }, key: `id21`, @@ -328,6 +389,7 @@ describe('pglite-sync', () => { task: `task`, done: false, }, + shape: 'shape', }, ]) @@ -379,9 +441,16 @@ describe('pglite-sync', () => { }) it('forbids multiple subscriptions to the same table', async () => { - MockShapeStream.mockImplementation(() => ({ + MockMultiShapeStream.mockImplementation(() => ({ subscribe: vi.fn(), unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, })) const table = 'foo' @@ -440,12 +509,33 @@ describe('pglite-sync', () => { }) it('handles an update message with no columns to update', async () => { - let feedMessage: (message: Message) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessage = (message) => cb([message, upToDateMsg]) - }), + let feedMessage: (message: MultiShapeMessage) => Promise = async ( + _, + ) => {} + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessage = (message) => + cb([ + message, + { + shape: 'shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + ]) + }, + ), unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, })) const shape = await pg.electric.syncShapeToTable({ @@ -467,6 +557,7 @@ describe('pglite-sync', () => { task: 'task1', done: false, }, + shape: 'shape', }) expect((await pg.sql`SELECT* FROM todo;`).rows).toEqual([ { @@ -483,6 +574,7 @@ describe('pglite-sync', () => { value: { id: 1, }, + shape: 'shape', }) expect((await pg.sql`SELECT* FROM todo;`).rows).toEqual([ { @@ -496,12 +588,33 @@ describe('pglite-sync', () => { }) it('sets the syncing flag to true when syncing begins', async () => { - let feedMessage: (message: Message) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessage = (message) => cb([message, upToDateMsg]) - }), + let feedMessage: (message: MultiShapeMessage) => Promise = async ( + _, + ) => {} + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessage = (message) => + cb([ + message, + { + shape: 'shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + ]) + }, + ), unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, })) await pg.exec(` @@ -553,6 +666,7 @@ describe('pglite-sync', () => { id: 'id1', value: 'test value', }, + shape: 'shape', }) // Check the flag is set during a sync @@ -573,12 +687,33 @@ describe('pglite-sync', () => { }) it('uses COPY FROM for initial batch of inserts', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => cb([...messages, upToDateMsg]) - }), + let feedMessages: (messages: MultiShapeMessage[]) => Promise = async ( + _, + ) => {} + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessages = (messages) => + cb([ + ...messages, + { + shape: 'shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + ]) + }, + ), unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, })) const shape = await pg.electric.syncShapeToTable({ @@ -594,20 +729,20 @@ describe('pglite-sync', () => { // Create a batch of insert messages followed by an update const numInserts = 1000 - const messages: Message[] = [ + const messages: MultiShapeMessage[] = [ ...Array.from( { length: numInserts }, (_, idx) => ({ headers: { operation: 'insert' as const }, - offset: `1_${idx}`, key: `id${idx}`, value: { id: idx, task: `task${idx}`, done: idx % 2 === 0, }, - }) as Message, + shape: 'shape', + }) as MultiShapeMessage, ), { headers: { operation: 'update' as const }, @@ -617,6 +752,7 @@ describe('pglite-sync', () => { task: 'updated task', done: true, }, + shape: 'shape', }, ] @@ -652,12 +788,33 @@ describe('pglite-sync', () => { }) it('handles special characters in COPY FROM data', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => cb([...messages, upToDateMsg]) - }), + let feedMessages: (messages: MultiShapeMessage[]) => Promise = async ( + _, + ) => {} + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessages = (messages) => + cb([ + ...messages, + { + shape: 'shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + ]) + }, + ), unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, })) const shape = await pg.electric.syncShapeToTable({ @@ -671,7 +828,7 @@ describe('pglite-sync', () => { shapeKey: null, }) - const specialCharMessages: Message[] = [ + const specialCharMessages: MultiShapeMessage[] = [ { headers: { operation: 'insert' }, key: 'id1', @@ -680,6 +837,7 @@ describe('pglite-sync', () => { task: 'task with, comma', done: false, }, + shape: 'shape', }, { headers: { operation: 'insert' }, @@ -689,6 +847,7 @@ describe('pglite-sync', () => { task: 'task with "quotes"', done: true, }, + shape: 'shape', }, { headers: { operation: 'insert' }, @@ -698,6 +857,7 @@ describe('pglite-sync', () => { task: 'task with\nnewline', done: false, }, + shape: 'shape', }, ] @@ -725,15 +885,38 @@ describe('pglite-sync', () => { }) it('calls onInitialSync callback after initial sync', async () => { - let feedMessages: (messages: Message[]) => Promise = async (_) => {} - MockShapeStream.mockImplementation(() => ({ - subscribe: vi.fn((cb: (messages: Message[]) => Promise) => { - feedMessages = (messages) => cb([...messages, upToDateMsg]) - }), + let feedMessages: (messages: MultiShapeMessage[]) => Promise = async ( + _, + ) => {} + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessages = (messages) => + cb([ + ...messages, + { + shape: 'shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + ]) + }, + ), unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, })) - const onInitialSync = vi.fn() + const onInitialSync = vi.fn(() => { + console.log('onInitialSync') + }) const shape = await pg.electric.syncShapeToTable({ shape: { url: 'http://localhost:3000/v1/shape', @@ -755,6 +938,7 @@ describe('pglite-sync', () => { task: 'task1', done: false, }, + shape: 'shape', }, { headers: { operation: 'insert' }, @@ -764,6 +948,7 @@ describe('pglite-sync', () => { task: 'task2', done: true, }, + shape: 'shape', }, ]) @@ -780,6 +965,7 @@ describe('pglite-sync', () => { task: 'task3', done: false, }, + shape: 'shape', }, ]) From fc41bbfd0e9dd8f9538a7c16ed37fa8cf8c3d3a8 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 11 Mar 2025 16:25:48 +0000 Subject: [PATCH 03/10] Add multiple table tests --- packages/pglite-sync/test/sync.test.ts | 545 +++++++++++++++++++++++++ 1 file changed, 545 insertions(+) diff --git a/packages/pglite-sync/test/sync.test.ts b/packages/pglite-sync/test/sync.test.ts index 74c254da..025f067f 100644 --- a/packages/pglite-sync/test/sync.test.ts +++ b/packages/pglite-sync/test/sync.test.ts @@ -980,4 +980,549 @@ describe('pglite-sync', () => { shape.unsubscribe() }) + + it('syncs multiple shapes to multiple tables simultaneously', async () => { + // Create a second table for testing multi-shape sync + await pg.exec(` + CREATE TABLE IF NOT EXISTS project ( + id SERIAL PRIMARY KEY, + name TEXT, + active BOOLEAN + ); + `) + await pg.exec(`TRUNCATE project;`) + + // Setup mock for MultiShapeStream with two shapes + let feedMessages: (messages: MultiShapeMessage[]) => Promise = async ( + _, + ) => {} + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessages = (messages) => + cb([ + ...messages, + { + shape: 'todo_shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + { + shape: 'project_shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + }, + ]) + }, + ), + unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + todo_shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + project_shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, + })) + + // Set up sync for both tables + const onInitialSync = vi.fn() + const syncResult = await pg.electric.syncShapesToTables({ + key: 'multi_sync_test', + shapes: { + todo_shape: { + shape: { + url: 'http://localhost:3000/v1/shape', + params: { table: 'todo' }, + }, + table: 'todo', + primaryKey: ['id'], + }, + project_shape: { + shape: { + url: 'http://localhost:3000/v1/shape', + params: { table: 'project' }, + }, + table: 'project', + primaryKey: ['id'], + }, + }, + onInitialSync, + }) + + // Send data for both shapes in a single batch + await feedMessages([ + // Todo table inserts + { + headers: { operation: 'insert' }, + key: 'id1', + value: { + id: 1, + task: 'task1', + done: false, + }, + shape: 'todo_shape', + }, + { + headers: { operation: 'insert' }, + key: 'id2', + value: { + id: 2, + task: 'task2', + done: true, + }, + shape: 'todo_shape', + }, + // Project table inserts + { + headers: { operation: 'insert' }, + key: 'id1', + value: { + id: 1, + name: 'Project 1', + active: true, + }, + shape: 'project_shape', + }, + { + headers: { operation: 'insert' }, + key: 'id2', + value: { + id: 2, + name: 'Project 2', + active: false, + }, + shape: 'project_shape', + }, + ]) + + // Verify data was inserted into both tables + const todoResult = await pg.sql`SELECT * FROM todo ORDER BY id;` + expect(todoResult.rows).toEqual([ + { id: 1, task: 'task1', done: false }, + { id: 2, task: 'task2', done: true }, + ]) + + const projectResult = await pg.sql`SELECT * FROM project ORDER BY id;` + expect(projectResult.rows).toEqual([ + { id: 1, name: 'Project 1', active: true }, + { id: 2, name: 'Project 2', active: false }, + ]) + + // Verify onInitialSync was called + expect(onInitialSync).toHaveBeenCalledTimes(1) + + // Test updates across both tables + await feedMessages([ + // Update todo + { + headers: { operation: 'update' }, + key: 'id1', + value: { + id: 1, + task: 'Updated task 1', + done: true, + }, + shape: 'todo_shape', + }, + // Update project + { + headers: { operation: 'update' }, + key: 'id2', + value: { + id: 2, + name: 'Updated Project 2', + active: true, + }, + shape: 'project_shape', + }, + ]) + + // Verify updates were applied to both tables + const updatedTodoResult = await pg.sql`SELECT * FROM todo WHERE id = 1;` + expect(updatedTodoResult.rows[0]).toEqual({ + id: 1, + task: 'Updated task 1', + done: true, + }) + + const updatedProjectResult = + await pg.sql`SELECT * FROM project WHERE id = 2;` + expect(updatedProjectResult.rows[0]).toEqual({ + id: 2, + name: 'Updated Project 2', + active: true, + }) + + // Test deletes across both tables + await feedMessages([ + { + headers: { operation: 'delete' }, + key: 'id2', + shape: 'todo_shape', + value: { id: 2 }, + }, + { + headers: { operation: 'delete' }, + key: 'id1', + shape: 'project_shape', + value: { id: 1 }, + }, + ]) + + // Verify deletes were applied to both tables + const todoCountAfterDelete = await pg.sql<{ count: number }>` + SELECT COUNT(*) as count FROM todo; + ` + expect(todoCountAfterDelete.rows[0].count).toBe(1) + + const projectCountAfterDelete = await pg.sql<{ count: number }>` + SELECT COUNT(*) as count FROM project; + ` + expect(projectCountAfterDelete.rows[0].count).toBe(1) + + // Cleanup + syncResult.unsubscribe() + }) + + it('handles transactions across multiple tables with syncShapesToTables', async () => { + // Create a second table for testing multi-shape sync + await pg.exec(` + CREATE TABLE IF NOT EXISTS project ( + id SERIAL PRIMARY KEY, + name TEXT, + active BOOLEAN + ); + `) + await pg.exec(`TRUNCATE project;`) + + // Setup mock for MultiShapeStream with two shapes and LSN tracking + let feedMessages: ( + lsn: number, + messages: MultiShapeMessage[], + ) => Promise = async (_lsn, _messages) => {} + + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessages = (lsn, messages) => + cb([ + ...messages.map((msg) => { + if ('headers' in msg && 'operation' in msg.headers) { + return { + ...msg, + headers: { + ...msg.headers, + lsn, + }, + } as MultiShapeMessage + } + return msg + }), + { + shape: 'todo_shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: lsn, + }, + } as MultiShapeMessage, + { + shape: 'project_shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: lsn, + }, + } as MultiShapeMessage, + ]) + }, + ), + unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + todo_shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + project_shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, + })) + + // Set up sync for both tables + const syncResult = await pg.electric.syncShapesToTables({ + key: 'transaction_test', + shapes: { + todo_shape: { + shape: { + url: 'http://localhost:3000/v1/shape', + params: { table: 'todo' }, + }, + table: 'todo', + primaryKey: ['id'], + }, + project_shape: { + shape: { + url: 'http://localhost:3000/v1/shape', + params: { table: 'project' }, + }, + table: 'project', + primaryKey: ['id'], + }, + }, + }) + + // Send initial data with LSN 1 + await feedMessages(1, [ + { + headers: { operation: 'insert' }, + key: 'id1', + value: { + id: 1, + task: 'Initial task', + done: false, + }, + shape: 'todo_shape', + }, + { + headers: { operation: 'insert' }, + key: 'id1', + value: { + id: 1, + name: 'Initial project', + active: true, + }, + shape: 'project_shape', + }, + ]) + + // Verify initial data was inserted + const initialTodoCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(initialTodoCount.rows[0].count).toBe(1) + + const initialProjectCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM project;` + expect(initialProjectCount.rows[0].count).toBe(1) + + // Simulate a transaction with LSN 2 that updates both tables + await feedMessages(2, [ + { + headers: { operation: 'update' }, + key: 'id1', + value: { + id: 1, + task: 'Updated in transaction', + done: true, + }, + shape: 'todo_shape', + }, + { + headers: { operation: 'update' }, + key: 'id1', + value: { + id: 1, + name: 'Updated in transaction', + active: false, + }, + shape: 'project_shape', + }, + ]) + + // Verify both updates were applied + const todoResult = await pg.sql`SELECT * FROM todo WHERE id = 1;` + expect(todoResult.rows[0]).toEqual({ + id: 1, + task: 'Updated in transaction', + done: true, + }) + + const projectResult = await pg.sql`SELECT * FROM project WHERE id = 1;` + expect(projectResult.rows[0]).toEqual({ + id: 1, + name: 'Updated in transaction', + active: false, + }) + + // Cleanup + syncResult.unsubscribe() + }) + + it('handles must-refetch control message across multiple tables', async () => { + // Create a second table for testing multi-shape sync + await pg.exec(` + CREATE TABLE IF NOT EXISTS project ( + id SERIAL PRIMARY KEY, + name TEXT, + active BOOLEAN + ); + `) + await pg.exec(`TRUNCATE project;`) + + // Setup mock for MultiShapeStream with refetch handling + let feedMessages: (messages: MultiShapeMessage[]) => Promise = async ( + _, + ) => {} + let mockShapeId: string | void = undefined + + MockMultiShapeStream.mockImplementation(() => ({ + subscribe: vi.fn( + (cb: (messages: MultiShapeMessage[]) => Promise) => { + feedMessages = (messages) => { + mockShapeId ??= Math.random() + '' + if (messages.find((m) => m.headers.control === 'must-refetch')) { + mockShapeId = undefined + } + + return cb([ + ...messages, + { + shape: 'todo_shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + } as MultiShapeMessage, + { + shape: 'project_shape', + headers: { + control: 'up-to-date', + global_last_seen_lsn: 0, + }, + } as MultiShapeMessage, + ]) + } + }, + ), + unsubscribeAll: vi.fn(), + isUpToDate: true, + shapes: { + todo_shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + project_shape: { + subscribe: vi.fn(), + unsubscribeAll: vi.fn(), + }, + }, + })) + + // Set up sync for both tables + const syncResult = await pg.electric.syncShapesToTables({ + key: 'refetch_test', + shapes: { + todo_shape: { + shape: { + url: 'http://localhost:3000/v1/shape', + params: { table: 'todo' }, + }, + table: 'todo', + primaryKey: ['id'], + }, + project_shape: { + shape: { + url: 'http://localhost:3000/v1/shape', + params: { table: 'project' }, + }, + table: 'project', + primaryKey: ['id'], + }, + }, + }) + + // Insert initial data + await feedMessages([ + { + headers: { operation: 'insert' }, + key: 'id1', + value: { + id: 1, + task: 'Initial task', + done: false, + }, + shape: 'todo_shape', + }, + { + headers: { operation: 'insert' }, + key: 'id1', + value: { + id: 1, + name: 'Initial project', + active: true, + }, + shape: 'project_shape', + }, + ]) + + // Verify initial data was inserted + const refetchTodoCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(refetchTodoCount.rows[0].count).toBe(1) + + const refetchProjectCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM project;` + expect(refetchProjectCount.rows[0].count).toBe(1) + + // Send must-refetch control message and new data + await feedMessages([ + { headers: { control: 'must-refetch' }, shape: 'todo_shape' }, + { headers: { control: 'must-refetch' }, shape: 'project_shape' }, + { + headers: { operation: 'insert' }, + key: 'id2', + value: { + id: 2, + task: 'New task after refetch', + done: true, + }, + shape: 'todo_shape', + }, + { + headers: { operation: 'insert' }, + key: 'id2', + value: { + id: 2, + name: 'New project after refetch', + active: false, + }, + shape: 'project_shape', + }, + ]) + + // Verify tables were cleared and new data was inserted + const todoResult = await pg.sql`SELECT * FROM todo ORDER BY id;` + expect(todoResult.rows).toEqual([ + { + id: 2, + task: 'New task after refetch', + done: true, + }, + ]) + + const projectResult = await pg.sql`SELECT * FROM project ORDER BY id;` + expect(projectResult.rows).toEqual([ + { + id: 2, + name: 'New project after refetch', + active: false, + }, + ]) + + // Cleanup + syncResult.unsubscribe() + }) }) From 37ec06e5786b5b9ed47081aaf2483b54a1aa1708 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 11 Mar 2025 16:29:30 +0000 Subject: [PATCH 04/10] fix style check --- packages/pglite-sync/src/index.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/pglite-sync/src/index.ts b/packages/pglite-sync/src/index.ts index 0a674a28..0526866f 100644 --- a/packages/pglite-sync/src/index.ts +++ b/packages/pglite-sync/src/index.ts @@ -174,8 +174,9 @@ async function createPlugin( // during a sync tx.exec(`SET LOCAL ${metadataSchema}.syncing = true;`) - for (let [shapeName, messages] of messagesToCommit.entries()) { + for (const [shapeName, initialMessages] of messagesToCommit.entries()) { const shape = shapes[shapeName] + let messages = initialMessages // If we need to truncate the table, do so if (truncateNeeded.has(shapeName)) { From 4441f14cfa38389f1900dc9061146cf89ba04168 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 11 Mar 2025 16:48:30 +0000 Subject: [PATCH 05/10] docs --- docs/docs/sync.md | 93 +++++++++++++++++++++++++++------- packages/pglite-sync/README.md | 37 +++++++++++++- 2 files changed, 110 insertions(+), 20 deletions(-) diff --git a/docs/docs/sync.md b/docs/docs/sync.md index 83cfb92d..9d0dbab6 100644 --- a/docs/docs/sync.md +++ b/docs/docs/sync.md @@ -34,7 +34,11 @@ await pg.exec(` `) ``` -You can then use the `syncShapeToTable` method to sync a table from Electric: +You can sync data from Electric using either the single table or multi-table API. + +### Single Table Sync + +Use the `syncShapeToTable` method to sync a single table from Electric: ```ts const shape = await pg.electric.syncShapeToTable({ @@ -46,13 +50,47 @@ const shape = await pg.electric.syncShapeToTable({ }, table: 'todo', primaryKey: ['id'], + shapeKey: 'todo', // or null if the shape state does not need to be persisted }) + +// Stop syncing when done +shape.unsubscribe() ``` -To stop syncing you can call `unsubscribe` on the shape: +### Multi-Table Sync + +The multi-table API ensures transactional consistency across tables by syncing updates that happened in a single transaction in Postgres within a single transaction in PGLite. + +Use the `syncShapesToTables` method to sync multiple tables simultaneously: ```ts -shape.unsubscribe() +const sync = await pg.electric.syncShapesToTables({ + shapes: { + todos: { + shape: { + url: 'http://localhost:3000/v1/shape', + params: { table: 'todo' }, + }, + table: 'todo', + primaryKey: ['id'], + }, + users: { + shape: { + url: 'http://localhost:3000/v1/shape', + params: { table: 'users' }, + }, + table: 'users', + primaryKey: ['id'], + }, + }, + key: 'my-sync', // or null if the sync state does not need to be persisted + onInitialSync: () => { + console.log('Initial sync complete') + }, +}) + +// Stop syncing when done +sync.unsubscribe() ``` There is a full example you can run locally in the [GitHub repository](https://github.com/electric-sql/pglite/tree/main/packages/pglite-sync/example). @@ -94,18 +132,6 @@ It takes the following options as an object: - `useCopy: boolean`
Whether to use the `COPY FROM` command to insert the initial data, defaults to `false`. This process may be faster than inserting row by row as it combines the inserts into a CSV to be passed to Postgres. -- `commitGranularity: CommitGranularity`
- The granularity of the commit operation, defaults to `"up-to-date"`. Note that a commit will always be performed immediately on the `up-to-date` message. - Options: - - - `"up-to-date"`: Commit all messages when the `up-to-date` message is received. - - - `"operation"`: Commit each message in its own transaction. - - `number`: Commit every N messages. - -- `commitThrottle: number`
- The number of milliseconds to wait between commits, defaults to `0`. - - `onInitialSync: () => void`
A callback that is called when the initial sync is complete. @@ -126,12 +152,41 @@ The returned `shape` object from the `syncShapeToTable` call has the following m - `stream: ShapeStream`
The underlying `ShapeStream` instance, see the [ShapeStream API](https://electric-sql.com/docs/api/clients/typescript#shapestream) for more details. +## syncShapesToTables API + +The `syncShapesToTables` API allows syncing multiple shapes into multiple tables simultaneously while maintaining transactional consistency. It takes the following options: + +- `shapes: Record`
+ An object mapping shape names to their configuration options. Each shape configuration includes: + + - `shape: ShapeStreamOptions` - The shape stream specification + - `table: string` - The target table name + - `schema?: string` - Optional schema name (defaults to "public") + - `mapColumns?: MapColumns` - Optional column mapping + - `primaryKey: string[]` - Array of primary key columns + +- `key: string | null`
+ Identifier for the multi-shape subscription. If provided, sync state will be persisted to allow resuming between sessions. + +- `useCopy?: boolean`
+ Whether to use `COPY FROM` for faster initial data loading (defaults to false). + +- `onInitialSync?: () => void`
+ Optional callback that fires when initial sync is complete for all shapes. + +The returned sync object provides: + +- `isUpToDate: boolean`
+ Whether all shapes have caught up to the main Postgres. + +- `streams: Record`
+ Access to individual shape streams by their names. + +- `unsubscribe()`
+ Stop syncing all shapes. + ## Limitations - It is currently not possible to sync multiple shapes into the same table, as shape subscriptions require being able to drop all data and start over. We are working on a fix for this case, but the current version will throw if a shape is synced into the same table more than once. - In order to maintain transactional consistency, data is aggregated in-memory until we can guarantee its consistency, which might create a lot of memory usage for very large shapes. We are working on resolving this issue, and it is only a problem for initial syncing. - -## Sync using legacy Electric - -Prior to the development of the new sync engine, the previous version of PGlite and Electric also had a sync capability. You can [read more about it on our blog](https://electric-sql.com/blog/2024/05/14/electricsql-postgres-client-support). diff --git a/packages/pglite-sync/README.md b/packages/pglite-sync/README.md index 8bd1ea8a..eae72a7f 100644 --- a/packages/pglite-sync/README.md +++ b/packages/pglite-sync/README.md @@ -28,12 +28,47 @@ await pg.exec(` `) ``` -You can then use the syncShapeToTable method to sync a table from Electric: +You can sync data from Electric using either the single table or multi-table API: + +### Single Table Sync + +Use `syncShapeToTable` to sync a single table: ```ts const shape = await pg.electric.syncShapeToTable({ shape: { url: 'http://localhost:3000/v1/shape', table: 'todo' }, + shapeKey: 'todo', // or null if the shape state does not need to be persisted table: 'todo', primaryKey: ['id'], }) ``` + +### Multi-Table Sync + +The multi-table API is useful when you need to sync related tables together, ensuring consistency across multiple tables by syncing updates that happened in as single transaction in Postgres within a single transaction in PGLite. + +Use `syncShapesToTables` to sync multiple tables simultaneously: + +```ts +const sync = await pg.electric.syncShapesToTables({ + shapes: { + todos: { + shape: { url: 'http://localhost:3000/v1/shape', table: 'todo' }, + table: 'todo', + primaryKey: ['id'], + }, + users: { + shape: { url: 'http://localhost:3000/v1/shape', table: 'users' }, + table: 'users', + primaryKey: ['id'], + } + }, + key: 'my-sync', // or null if the sync state does not need to be persisted + onInitialSync: () => { + console.log('Initial sync complete') + } +}) + +// Unsubscribe when done +sync.unsubscribe() +``` From f18173c150a8e7f4c41be58ec996a9b8100af149 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 11 Mar 2025 17:37:16 +0000 Subject: [PATCH 06/10] Scafolding for e2e tests --- packages/pglite-sync/package.json | 3 ++ .../pglite-sync/test-e2e/docker_compose.yaml | 29 ++++++++++++++ .../pglite-sync/test-e2e/sync-e2e.test.ts | 15 +++++++ packages/pglite-sync/vitest-e2e.config.ts | 16 ++++++++ pnpm-lock.yaml | 39 +++++++++++++++++++ 5 files changed, 102 insertions(+) create mode 100644 packages/pglite-sync/test-e2e/docker_compose.yaml create mode 100644 packages/pglite-sync/test-e2e/sync-e2e.test.ts create mode 100644 packages/pglite-sync/vitest-e2e.config.ts diff --git a/packages/pglite-sync/package.json b/packages/pglite-sync/package.json index 1961ba36..fda6b2bb 100644 --- a/packages/pglite-sync/package.json +++ b/packages/pglite-sync/package.json @@ -28,6 +28,7 @@ "scripts": { "build": "tsup", "test": "vitest", + "test:e2e": "docker compose -f test-e2e/docker_compose.yaml up -d && pnpm vitest --config vitest-e2e.config.ts && docker compose -f test-e2e/docker_compose.yaml down --volumes", "lint": "eslint ./src ./test --report-unused-disable-directives --max-warnings 0", "format": "prettier --write ./src ./test", "typecheck": "tsc", @@ -51,8 +52,10 @@ "devDependencies": { "@electric-sql/pglite": "workspace:*", "@eslint-react/eslint-plugin": "^1.14.3", + "@types/node": "^20.16.11", "@vitejs/plugin-react": "^4.3.2", "globals": "^15.11.0", + "pg": "^8.14.0", "vitest": "^2.1.2" }, "peerDependencies": { diff --git a/packages/pglite-sync/test-e2e/docker_compose.yaml b/packages/pglite-sync/test-e2e/docker_compose.yaml new file mode 100644 index 00000000..078cf176 --- /dev/null +++ b/packages/pglite-sync/test-e2e/docker_compose.yaml @@ -0,0 +1,29 @@ +version: "3.3" +name: "electric_quickstart" + +services: + postgres: + image: postgres:16-alpine + environment: + POSTGRES_DB: electric + POSTGRES_USER: postgres + POSTGRES_PASSWORD: password + ports: + - 54321:5432 + tmpfs: + - /var/lib/postgresql/data + - /tmp + command: + - -c + - listen_addresses=* + - -c + - wal_level=logical + + electric: + image: electricsql/electric + environment: + DATABASE_URL: postgresql://postgres:password@postgres:5432/electric?sslmode=disable + ports: + - "3000:3000" + depends_on: + - postgres \ No newline at end of file diff --git a/packages/pglite-sync/test-e2e/sync-e2e.test.ts b/packages/pglite-sync/test-e2e/sync-e2e.test.ts new file mode 100644 index 00000000..be295643 --- /dev/null +++ b/packages/pglite-sync/test-e2e/sync-e2e.test.ts @@ -0,0 +1,15 @@ +/// +import { describe, it, expect } from 'vitest' + +const DATABASE_URL = process.env.DATABASE_URL || 'postgresql://postgres:password@localhost:54321/electric?sslmode=disable' +const ELECTRIC_URL = process.env.ELECTRIC_URL || 'http://localhost:3000/v1/shape' + + +describe('sync-e2e', () => { + it('should sync data from postgres to pglite', async () => { + console.log('DATABASE_URL', DATABASE_URL) + console.log('ELECTRIC_URL', ELECTRIC_URL) + // TODO: write tests! + }) +}) + diff --git a/packages/pglite-sync/vitest-e2e.config.ts b/packages/pglite-sync/vitest-e2e.config.ts new file mode 100644 index 00000000..309ff565 --- /dev/null +++ b/packages/pglite-sync/vitest-e2e.config.ts @@ -0,0 +1,16 @@ +import { defineConfig } from 'vitest/config' + +export default defineConfig({ + test: { + name: 'pglite-sync-e2e', + dir: './test-e2e', + watch: false, + typecheck: { enabled: true }, + testTimeout: 30000, + hookTimeout: 30000, + restoreMocks: true, + testTransformMode: { + ssr: ['**/*'], + }, + }, +}) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 813b1302..0631cce3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -310,12 +310,18 @@ importers: '@eslint-react/eslint-plugin': specifier: ^1.14.3 version: 1.14.3(eslint@8.57.1)(typescript@5.6.3) + '@types/node': + specifier: ^20.16.11 + version: 20.16.11 '@vitejs/plugin-react': specifier: ^4.3.2 version: 4.3.2(vite@5.4.8(@types/node@20.16.11)(terser@5.34.1)) globals: specifier: ^15.11.0 version: 15.11.0 + pg: + specifier: ^8.14.0 + version: 8.14.0 vitest: specifier: ^2.1.2 version: 2.1.2(@types/node@20.16.11)(jsdom@24.1.3)(terser@5.34.1) @@ -3378,9 +3384,17 @@ packages: peerDependencies: pg: '>=8.0' + pg-pool@3.8.0: + resolution: {integrity: sha512-VBw3jiVm6ZOdLBTIcXLNdSotb6Iy3uOCwDGFAksZCXmi10nyRvnP2v3jl4d+IsLYRyXf6o9hIm/ZtUzlByNUdw==} + peerDependencies: + pg: '>=8.0' + pg-protocol@1.7.0: resolution: {integrity: sha512-hTK/mE36i8fDDhgDFjy6xNOG+LCorxLG3WO17tku+ij6sVHXh1jQUJ8hYAnRhNla4QVD2H8er/FOjc/+EgC6yQ==} + pg-protocol@1.8.0: + resolution: {integrity: sha512-jvuYlEkL03NRvOoyoRktBK7+qU5kOvlAwvmrH8sr3wbLrOdVWsRxQfz8mMy9sZFsqJ1hEWNfdWKI4SAmoL+j7g==} + pg-types@2.2.0: resolution: {integrity: sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==} engines: {node: '>=4'} @@ -3394,6 +3408,15 @@ packages: pg-native: optional: true + pg@8.14.0: + resolution: {integrity: sha512-nXbVpyoaXVmdqlKEzToFf37qzyeeh7mbiXsnoWvstSqohj88yaa/I/Rq/HEVn2QPSZEuLIJa/jSpRDyzjEx4FQ==} + engines: {node: '>= 8.0.0'} + peerDependencies: + pg-native: '>=3.0.1' + peerDependenciesMeta: + pg-native: + optional: true + pgpass@1.0.5: resolution: {integrity: sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==} @@ -7865,8 +7888,14 @@ snapshots: dependencies: pg: 8.13.0 + pg-pool@3.8.0(pg@8.14.0): + dependencies: + pg: 8.14.0 + pg-protocol@1.7.0: {} + pg-protocol@1.8.0: {} + pg-types@2.2.0: dependencies: pg-int8: 1.0.1 @@ -7885,6 +7914,16 @@ snapshots: optionalDependencies: pg-cloudflare: 1.1.1 + pg@8.14.0: + dependencies: + pg-connection-string: 2.7.0 + pg-pool: 3.8.0(pg@8.14.0) + pg-protocol: 1.8.0 + pg-types: 2.2.0 + pgpass: 1.0.5 + optionalDependencies: + pg-cloudflare: 1.1.1 + pgpass@1.0.5: dependencies: split2: 4.2.0 From c603f7f7546a40797c835d3ad804986a94b9f068 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 11 Mar 2025 17:53:32 +0000 Subject: [PATCH 07/10] First e2e test --- .../pglite-sync/test-e2e/sync-e2e.test.ts | 169 +++++++++++++++++- 1 file changed, 163 insertions(+), 6 deletions(-) diff --git a/packages/pglite-sync/test-e2e/sync-e2e.test.ts b/packages/pglite-sync/test-e2e/sync-e2e.test.ts index be295643..b66e3604 100644 --- a/packages/pglite-sync/test-e2e/sync-e2e.test.ts +++ b/packages/pglite-sync/test-e2e/sync-e2e.test.ts @@ -1,15 +1,172 @@ /// -import { describe, it, expect } from 'vitest' +import { describe, it, expect, beforeAll, afterAll, beforeEach, afterEach } from 'vitest' +import { Client } from 'pg' +import { PGlite, PGliteInterfaceExtensions } from '@electric-sql/pglite' +import { electricSync } from '../src/index.js' +import { MultiShapeMessages } from '@electric-sql/experimental' const DATABASE_URL = process.env.DATABASE_URL || 'postgresql://postgres:password@localhost:54321/electric?sslmode=disable' const ELECTRIC_URL = process.env.ELECTRIC_URL || 'http://localhost:3000/v1/shape' +type MultiShapeMessage = MultiShapeMessages + +// Define types for our database records +interface TodoRecord { + id: number + task: string + done: boolean +} + +interface ProjectRecord { + id: number + name: string + active: boolean +} + +interface CountResult { + count: number +} describe('sync-e2e', () => { - it('should sync data from postgres to pglite', async () => { - console.log('DATABASE_URL', DATABASE_URL) - console.log('ELECTRIC_URL', ELECTRIC_URL) - // TODO: write tests! + let pgClient: typeof Client.prototype + let pg: PGlite & PGliteInterfaceExtensions<{ electric: ReturnType }> + + // Setup PostgreSQL client and tables + beforeAll(async () => { + // Connect to PostgreSQL + pgClient = new Client({ + connectionString: DATABASE_URL + }) + await pgClient.connect() + + // Create test tables in PostgreSQL + const res = await pgClient.query(` + CREATE TABLE IF NOT EXISTS todo ( + id SERIAL PRIMARY KEY, + task TEXT, + done BOOLEAN + ); + `) + console.log(res) + await pgClient.query('TRUNCATE todo;') + const res2 = await pgClient.query('SELECT * FROM todo;') + console.log(res2) + + // Create PGlite instance with electric sync extension + pg = await PGlite.create({ + extensions: { + electric: electricSync(), + }, + }) + + // Create the same tables in PGlite + await pg.exec(` + CREATE TABLE IF NOT EXISTS todo ( + id SERIAL PRIMARY KEY, + task TEXT, + done BOOLEAN + ); + `) + }) + + afterAll(async () => { + // Clean up + await pgClient.query('DROP TABLE IF EXISTS todo;') + await pgClient.end() }) -}) + beforeEach(async () => { + // Clear tables before each test + await pgClient.query('TRUNCATE todo;') + await pg.exec('TRUNCATE todo;') + }) + + // Helper function to wait for an expectation to pass + const waitForExpect = async ( + expectFn: () => Promise | void, + { timeout = 5000, interval = 50 } = {} + ): Promise => { + const startTime = Date.now() + + while (true) { + try { + await expectFn() + return // Success! Expectation passed + } catch (error) { + if (Date.now() - startTime > timeout) { + throw new Error(`Expectation not met within ${timeout}ms: ${error}`) + } + await new Promise(resolve => setTimeout(resolve, interval)) + } + } + } + + it('handles inserts/updates/deletes', async () => { + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: null, + }) + + // Insert data into PostgreSQL + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (1, 'task1', false); + `) + + await waitForExpect( + async () => { + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows.length).toBe(1) + } + ) + + expect((await pg.sql`SELECT * FROM todo;`).rows).toEqual([ + { + id: 1, + task: 'task1', + done: false, + }, + ]) + + // Update data in PostgreSQL + await pgClient.query(` + UPDATE todo SET task = 'task2', done = true WHERE id = 1; + `) + + await waitForExpect( + async () => { + const result = await pg.sql`SELECT * FROM todo WHERE task = 'task2' AND done = true;` + expect(result.rows.length).toBe(1) + } + ) + + expect((await pg.sql`SELECT * FROM todo;`).rows).toEqual([ + { + id: 1, + task: 'task2', + done: true, + }, + ]) + + // Delete data in PostgreSQL + await pgClient.query(` + DELETE FROM todo WHERE id = 1; + `) + + await waitForExpect( + async () => { + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows.length).toBe(0) + } + ) + + expect((await pg.sql`SELECT * FROM todo;`).rows).toEqual([]) + + shape.unsubscribe() + }) +}) \ No newline at end of file From 19cba25647f0faa7b9f484ffc58e8427e31ba4fd Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Wed, 12 Mar 2025 12:54:49 +0000 Subject: [PATCH 08/10] initial passing e2e tests --- packages/pglite-sync/package.json | 6 +- .../pglite-sync/test-e2e/docker_compose.yaml | 3 +- .../pglite-sync/test-e2e/sync-e2e.test.ts | 406 ++++++++++++++---- 3 files changed, 327 insertions(+), 88 deletions(-) diff --git a/packages/pglite-sync/package.json b/packages/pglite-sync/package.json index fda6b2bb..bc49d929 100644 --- a/packages/pglite-sync/package.json +++ b/packages/pglite-sync/package.json @@ -28,7 +28,11 @@ "scripts": { "build": "tsup", "test": "vitest", - "test:e2e": "docker compose -f test-e2e/docker_compose.yaml up -d && pnpm vitest --config vitest-e2e.config.ts && docker compose -f test-e2e/docker_compose.yaml down --volumes", + "test:e2e:up": "docker compose -f test-e2e/docker_compose.yaml up -d", + "test:e2e:down": "docker compose -f test-e2e/docker_compose.yaml down --volumes", + "test:e2e:reset": "pnpm test:e2e:down && pnpm test:e2e:up", + "test:e2e:run": "pnpm vitest --config vitest-e2e.config.ts", + "test:e2e": "pnpm test:e2e:up && pnpm test:e2e:run && pnpm test:e2e:down", "lint": "eslint ./src ./test --report-unused-disable-directives --max-warnings 0", "format": "prettier --write ./src ./test", "typecheck": "tsc", diff --git a/packages/pglite-sync/test-e2e/docker_compose.yaml b/packages/pglite-sync/test-e2e/docker_compose.yaml index 078cf176..b2b29431 100644 --- a/packages/pglite-sync/test-e2e/docker_compose.yaml +++ b/packages/pglite-sync/test-e2e/docker_compose.yaml @@ -20,9 +20,10 @@ services: - wal_level=logical electric: - image: electricsql/electric + image: electricsql/electric:latest environment: DATABASE_URL: postgresql://postgres:password@postgres:5432/electric?sslmode=disable + ELECTRIC_ENABLE_INTEGRATION_TESTING: true ports: - "3000:3000" depends_on: diff --git a/packages/pglite-sync/test-e2e/sync-e2e.test.ts b/packages/pglite-sync/test-e2e/sync-e2e.test.ts index b66e3604..c89e305e 100644 --- a/packages/pglite-sync/test-e2e/sync-e2e.test.ts +++ b/packages/pglite-sync/test-e2e/sync-e2e.test.ts @@ -1,57 +1,111 @@ /// -import { describe, it, expect, beforeAll, afterAll, beforeEach, afterEach } from 'vitest' +import { + describe, + it, + expect, + beforeAll, + afterAll, + beforeEach, + afterEach, + vi, +} from 'vitest' import { Client } from 'pg' import { PGlite, PGliteInterfaceExtensions } from '@electric-sql/pglite' import { electricSync } from '../src/index.js' -import { MultiShapeMessages } from '@electric-sql/experimental' -const DATABASE_URL = process.env.DATABASE_URL || 'postgresql://postgres:password@localhost:54321/electric?sslmode=disable' -const ELECTRIC_URL = process.env.ELECTRIC_URL || 'http://localhost:3000/v1/shape' +const DATABASE_URL = + process.env.DATABASE_URL || + 'postgresql://postgres:password@localhost:54321/electric?sslmode=disable' +const ELECTRIC_URL = + process.env.ELECTRIC_URL || 'http://localhost:3000/v1/shape' -type MultiShapeMessage = MultiShapeMessages +const shapeHandles: Map = new Map() -// Define types for our database records -interface TodoRecord { - id: number - task: string - done: boolean +const fetchClient: typeof fetch = async ( + url: string | Request | URL, + options: RequestInit = {}, +) => { + let table: string | null = null + if (typeof url === 'string') { + table = new URL(url).searchParams.get('table') + } else if (url instanceof Request) { + table = new URL(url.url).searchParams.get('table') + } else if (url instanceof URL) { + table = url.searchParams.get('table') + } + const res = await fetch(url, { + ...options, + headers: { + 'Content-Type': 'application/json', + }, + }) + if (table) { + shapeHandles.set(res.headers.get('electric-handle')!, table) + } + return res } -interface ProjectRecord { - id: number - name: string - active: boolean +const deleteShape = async (table: string, handle: string) => { + const deleteUrl = new URL(ELECTRIC_URL) + deleteUrl.searchParams.set('table', table) + deleteUrl.searchParams.set('handle', handle) + const res = await fetch(deleteUrl, { + method: 'DELETE', + }) + if (res.status === 404) { + // Nothing to delete + return + } + if (!res.ok) { + throw new Error(`Failed to delete shape: ${res.statusText}`) + } +} + +const deleteAllShapes = async () => { + for (const [handle, table] of shapeHandles.entries()) { + await deleteShape(table, handle) + } + shapeHandles.clear() } -interface CountResult { - count: number +const deleteAllShapesForTable = async (table: string) => { + for (const [handle, table] of shapeHandles.entries()) { + if (table === table) { + await deleteShape(table, handle) + } + } } describe('sync-e2e', () => { let pgClient: typeof Client.prototype - let pg: PGlite & PGliteInterfaceExtensions<{ electric: ReturnType }> + let pg: PGlite & + PGliteInterfaceExtensions<{ electric: ReturnType }> // Setup PostgreSQL client and tables beforeAll(async () => { // Connect to PostgreSQL pgClient = new Client({ - connectionString: DATABASE_URL + connectionString: DATABASE_URL, }) await pgClient.connect() // Create test tables in PostgreSQL - const res = await pgClient.query(` + await pgClient.query(` CREATE TABLE IF NOT EXISTS todo ( id SERIAL PRIMARY KEY, task TEXT, done BOOLEAN ); `) - console.log(res) + }) + + afterAll(async () => { await pgClient.query('TRUNCATE todo;') - const res2 = await pgClient.query('SELECT * FROM todo;') - console.log(res2) + await pgClient.end() + await deleteAllShapes() + }) + beforeEach(async () => { // Create PGlite instance with electric sync extension pg = await PGlite.create({ extensions: { @@ -61,7 +115,7 @@ describe('sync-e2e', () => { // Create the same tables in PGlite await pg.exec(` - CREATE TABLE IF NOT EXISTS todo ( + CREATE TABLE todo ( id SERIAL PRIMARY KEY, task TEXT, done BOOLEAN @@ -69,43 +123,18 @@ describe('sync-e2e', () => { `) }) - afterAll(async () => { - // Clean up - await pgClient.query('DROP TABLE IF EXISTS todo;') - await pgClient.end() - }) - - beforeEach(async () => { - // Clear tables before each test + afterEach(async () => { + await pg.close() + await deleteAllShapes() await pgClient.query('TRUNCATE todo;') - await pg.exec('TRUNCATE todo;') }) - // Helper function to wait for an expectation to pass - const waitForExpect = async ( - expectFn: () => Promise | void, - { timeout = 5000, interval = 50 } = {} - ): Promise => { - const startTime = Date.now() - - while (true) { - try { - await expectFn() - return // Success! Expectation passed - } catch (error) { - if (Date.now() - startTime > timeout) { - throw new Error(`Expectation not met within ${timeout}ms: ${error}`) - } - await new Promise(resolve => setTimeout(resolve, interval)) - } - } - } - it('handles inserts/updates/deletes', async () => { const shape = await pg.electric.syncShapeToTable({ shape: { url: ELECTRIC_URL, params: { table: 'todo' }, + fetchClient, }, table: 'todo', primaryKey: ['id'], @@ -118,55 +147,260 @@ describe('sync-e2e', () => { VALUES (1, 'task1', false); `) - await waitForExpect( - async () => { - const result = await pg.sql`SELECT * FROM todo;` - expect(result.rows.length).toBe(1) - } - ) - - expect((await pg.sql`SELECT * FROM todo;`).rows).toEqual([ - { - id: 1, - task: 'task1', - done: false, - }, - ]) + // Wait for sync to complete + await vi.waitFor(async () => { + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows).toEqual([ + { + id: 1, + task: 'task1', + done: false, + }, + ]) + }) // Update data in PostgreSQL await pgClient.query(` UPDATE todo SET task = 'task2', done = true WHERE id = 1; `) - await waitForExpect( - async () => { - const result = await pg.sql`SELECT * FROM todo WHERE task = 'task2' AND done = true;` - expect(result.rows.length).toBe(1) - } - ) - - expect((await pg.sql`SELECT * FROM todo;`).rows).toEqual([ - { - id: 1, - task: 'task2', - done: true, - }, - ]) + // Wait for sync to complete + await vi.waitFor(async () => { + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows).toEqual([ + { + id: 1, + task: 'task2', + done: true, + }, + ]) + }) // Delete data in PostgreSQL await pgClient.query(` DELETE FROM todo WHERE id = 1; `) - await waitForExpect( + // Wait for sync to complete + await vi.waitFor(async () => { + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows).toEqual([]) + }) + + shape.unsubscribe() + }) + + it('performs operations within a transaction', async () => { + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: null, + }) + + // Insert a large batch of records to test transaction behavior + const numInserts = 1000 // Reduced from 10000 in the mock test for practical e2e testing + const numBatches = 5 + const batchSize = Math.floor(numInserts / numBatches) + + for (let i = 0; i < numBatches; i++) { + const values = Array.from({ length: batchSize }, (_, idx) => { + const itemIdx = i * batchSize + idx + return `(${itemIdx}, 'task${itemIdx}', false)` + }).join(', ') + + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES ${values}; + `) + } + + // Wait for all inserts to be synced + await vi.waitFor( async () => { - const result = await pg.sql`SELECT * FROM todo;` - expect(result.rows.length).toBe(0) - } + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(numInserts) + }, + { timeout: 10000 }, // Increase timeout for larger batch ) - expect((await pg.sql`SELECT * FROM todo;`).rows).toEqual([]) + // Verify some sample data + const firstItem = await pg.sql`SELECT * FROM todo WHERE id = 0;` + expect(firstItem.rows[0]).toEqual({ + id: 0, + task: 'task0', + done: false, + }) + + const lastItem = + await pg.sql`SELECT * FROM todo WHERE id = ${numInserts - 1};` + expect(lastItem.rows[0]).toEqual({ + id: numInserts - 1, + task: `task${numInserts - 1}`, + done: false, + }) + + shape.unsubscribe() + }) + + it('persists shape stream state and automatically resumes', async () => { + // First sync session with a persistent key + let shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: 'persistent_sync_test', + }) + + // Insert initial batch of data + const numInserts = 3 + for (let i = 0; i < numInserts; i++) { + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (${i}, 'task${i}', false); + `) + } + + // Wait for initial sync to complete + await vi.waitFor(async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(numInserts) + }) + // Unsubscribe from first sync session shape.unsubscribe() + + // Clear local data to simulate a fresh start + await pg.exec('TRUNCATE todo;') + + // Start a new sync session with the same key + shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: 'persistent_sync_test', + }) + + // Wait for sync to resume and restore data + await vi.waitFor(async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(numInserts) + }) + + // Verify the data was restored + const result = await pg.sql`SELECT * FROM todo ORDER BY id;` + expect(result.rows.length).toBe(numInserts) + expect(result.rows[0]).toEqual({ + id: 0, + task: 'task0', + done: false, + }) + + // Clean up + shape.unsubscribe() + await pg.electric.deleteSubscription('persistent_sync_test') + }) + + it('syncs multiple shapes to multiple tables simultaneously', async () => { + // Create a second table for testing multi-shape sync + await pgClient.query(` + CREATE TABLE IF NOT EXISTS project ( + id SERIAL PRIMARY KEY, + name TEXT, + active BOOLEAN + ); + `) + await pgClient.query('TRUNCATE project;') + + await pg.exec(` + CREATE TABLE IF NOT EXISTS project ( + id SERIAL PRIMARY KEY, + name TEXT, + active BOOLEAN + ); + `) + await pg.exec('TRUNCATE project;') + + // Set up sync for both tables + const todoShape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: null, + }) + + const projectShape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'project' }, + }, + table: 'project', + primaryKey: ['id'], + shapeKey: null, + }) + + // Insert data into both tables in PostgreSQL + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (1, 'task1', false), (2, 'task2', true); + `) + + await pgClient.query(` + INSERT INTO project (id, name, active) + VALUES (1, 'Project 1', true), (2, 'Project 2', false); + `) + + // Wait for todo table sync to complete + await vi.waitFor(async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(2) + }) + + // Wait for project table sync to complete + await vi.waitFor(async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM project;` + expect(result.rows[0].count).toBe(2) + }) + + // Verify data was inserted into both tables + const todoResult = await pg.sql`SELECT * FROM todo ORDER BY id;` + expect(todoResult.rows).toEqual([ + { id: 1, task: 'task1', done: false }, + { id: 2, task: 'task2', done: true }, + ]) + + const projectResult = await pg.sql`SELECT * FROM project ORDER BY id;` + expect(projectResult.rows).toEqual([ + { id: 1, name: 'Project 1', active: true }, + { id: 2, name: 'Project 2', active: false }, + ]) + + // Clean up + todoShape.unsubscribe() + projectShape.unsubscribe() + await pgClient.query('DROP TABLE IF EXISTS project;') + await pg.exec('DROP TABLE IF EXISTS project;') }) -}) \ No newline at end of file +}) From d7f4a8ad4d8ece37dbb5630c7935eb2d48eeb2b8 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Wed, 12 Mar 2025 17:55:01 +0000 Subject: [PATCH 09/10] mostly passing e2e tests --- packages/pglite-sync/package.json | 2 +- packages/pglite-sync/src/index.ts | 66 +- packages/pglite-sync/src/subscriptionState.ts | 14 +- .../pglite-sync/test-e2e/sync-e2e.test.ts | 1263 +++++++++++++++-- 4 files changed, 1216 insertions(+), 129 deletions(-) diff --git a/packages/pglite-sync/package.json b/packages/pglite-sync/package.json index bc49d929..af83eba2 100644 --- a/packages/pglite-sync/package.json +++ b/packages/pglite-sync/package.json @@ -32,7 +32,7 @@ "test:e2e:down": "docker compose -f test-e2e/docker_compose.yaml down --volumes", "test:e2e:reset": "pnpm test:e2e:down && pnpm test:e2e:up", "test:e2e:run": "pnpm vitest --config vitest-e2e.config.ts", - "test:e2e": "pnpm test:e2e:up && pnpm test:e2e:run && pnpm test:e2e:down", + "test:e2e": "pnpm test:e2e:reset && pnpm test:e2e:run && pnpm test:e2e:down", "lint": "eslint ./src ./test --report-unused-disable-directives --max-warnings 0", "format": "prettier --write ./src ./test", "typecheck": "tsc", diff --git a/packages/pglite-sync/src/index.ts b/packages/pglite-sync/src/index.ts index 0526866f..12642d85 100644 --- a/packages/pglite-sync/src/index.ts +++ b/packages/pglite-sync/src/index.ts @@ -3,6 +3,7 @@ import { ChangeMessage, isChangeMessage, isControlMessage, + ShapeStreamOptions, } from '@electric-sql/client' import { MultiShapeStream } from '@electric-sql/experimental' import type { Extension, PGliteInterface } from '@electric-sql/pglite' @@ -57,6 +58,7 @@ async function createPlugin( useCopy, onInitialSync, }: SyncShapesToTablesOptions): Promise => { + let unsubscribed = false await initMetadataTables() Object.values(shapes).forEach((shape) => { @@ -110,7 +112,7 @@ async function createPlugin( // We also have to track the last lsn that we have committed // This is across all shapes - const lastCommittedLsn: number = -Infinity + const lastCommittedLsn: number = subState?.last_lsn ?? -Infinity // We need our own aborter to be able to abort the streams but still accept the // signals from the user for each shape, and so we monitor the user provided signal @@ -131,10 +133,20 @@ async function createPlugin( const multiShapeStream = new MultiShapeStream>>( { shapes: Object.fromEntries( - Object.entries(shapes).map(([key, shapeOptions]) => [ - key, - shapeOptions.shape, - ]), + Object.entries(shapes).map(([key, shapeOptions]) => { + const shapeMetadata = subState?.shape_metadata[key] + const offset = shapeMetadata?.offset ?? undefined + const handle = shapeMetadata?.handle ?? undefined + return [ + key, + { + ...shapeOptions.shape, + offset, + handle, + signal: aborter.signal, + } satisfies ShapeStreamOptions, + ] + }), ), }, ) @@ -180,6 +192,9 @@ async function createPlugin( // If we need to truncate the table, do so if (truncateNeeded.has(shapeName)) { + if (debug) { + console.log('truncating table', shape.table) + } await tx.exec(`DELETE FROM ${shape.table};`) truncateNeeded.delete(shapeName) } @@ -251,8 +266,12 @@ async function createPlugin( ]), ), lastLsn: targetLsn, + debug, }) } + if (unsubscribed) { + tx.rollback() + } }) if (debug) console.timeEnd('commit') if ( @@ -266,10 +285,23 @@ async function createPlugin( } multiShapeStream.subscribe(async (messages) => { + if (unsubscribed) { + return + } + if (debug) { + console.log('received messages', messages.length) + } messages.forEach((message) => { + const lastCommittedLsnForShape = + completeLsns.get(message.shape) ?? -Infinity if (isChangeMessage(message)) { const shapeChanges = changes.get(message.shape)! const lsn = (message.headers.lsn as number | undefined) ?? 0 + if (lsn <= lastCommittedLsnForShape) { + // We are replaying changes / have already seen this lsn + // skip and move on to the next message + return + } const isLastOfLsn = (message.headers.last as boolean | undefined) ?? false if (!shapeChanges.has(lsn)) { @@ -283,15 +315,26 @@ async function createPlugin( switch (message.headers.control) { case 'up-to-date': { // Update the complete lsn for this shape + if (debug) { + console.log('received up-to-date', message) + } if (typeof message.headers.global_last_seen_lsn !== `number`) { throw new Error(`global_last_seen_lsn is not a number`) } const globalLastSeenLsn = message.headers.global_last_seen_lsn + if (globalLastSeenLsn <= lastCommittedLsnForShape) { + // We are replaying changes / have already seen this lsn + // skip and move on to the next message + return + } completeLsns.set(message.shape, globalLastSeenLsn) break } case 'must-refetch': { // Reset the changes for this shape + if (debug) { + console.log('received must-refetch', message) + } const shapeChanges = changes.get(message.shape)! shapeChanges.clear() completeLsns.set(message.shape, -Infinity) @@ -303,7 +346,14 @@ async function createPlugin( } }) const lowestCommittedLsn = Math.min(...Array.from(completeLsns.values())) - if (lowestCommittedLsn > lastCommittedLsn) { + + // Normal commit needed + const isCommitNeeded = lowestCommittedLsn > lastCommittedLsn + // We've had a must-refetch and are catching up on one of the shape + const isMustRefetchAndCatchingUp = + lowestCommittedLsn >= lastCommittedLsn && truncateNeeded.size > 0 + + if (isCommitNeeded || isMustRefetchAndCatchingUp) { // We have new changes to commit commitUpToLsn(lowestCommittedLsn) // Await a timeout to start a new task and allow other connections to do work @@ -316,6 +366,10 @@ async function createPlugin( aborter, }) const unsubscribe = () => { + if (debug) { + console.log('unsubscribing') + } + unsubscribed = true multiShapeStream.unsubscribeAll() aborter.abort() for (const shape of Object.values(shapes)) { diff --git a/packages/pglite-sync/src/subscriptionState.ts b/packages/pglite-sync/src/subscriptionState.ts index f0fbe472..38863fd3 100644 --- a/packages/pglite-sync/src/subscriptionState.ts +++ b/packages/pglite-sync/src/subscriptionState.ts @@ -4,8 +4,8 @@ import { SubscriptionKey } from './types' export interface SubscriptionState { key: SubscriptionKey - shapeMetadata: ShapeSubscriptionState[] - lastLsn: number + shape_metadata: Record + last_lsn: number } export interface ShapeSubscriptionState { @@ -53,6 +53,7 @@ export interface UpdateSubscriptionStateOptions { subscriptionKey: SubscriptionKey shapeMetadata: Record lastLsn: number + debug?: boolean } /** @@ -65,7 +66,16 @@ export async function updateSubscriptionState({ subscriptionKey, shapeMetadata, lastLsn, + debug, }: UpdateSubscriptionStateOptions) { + if (debug) { + console.log( + 'updating subscription state', + subscriptionKey, + shapeMetadata, + lastLsn, + ) + } await pg.query( ` INSERT INTO ${subscriptionMetadataTableName(metadataSchema)} diff --git a/packages/pglite-sync/test-e2e/sync-e2e.test.ts b/packages/pglite-sync/test-e2e/sync-e2e.test.ts index c89e305e..fa17d45a 100644 --- a/packages/pglite-sync/test-e2e/sync-e2e.test.ts +++ b/packages/pglite-sync/test-e2e/sync-e2e.test.ts @@ -21,10 +21,17 @@ const ELECTRIC_URL = const shapeHandles: Map = new Map() +const LOG_FETCH = false +let fetchCount = 0 + const fetchClient: typeof fetch = async ( url: string | Request | URL, options: RequestInit = {}, ) => { + fetchCount++ + if (LOG_FETCH) { + console.log('>> fetch', fetchCount, url, options) + } let table: string | null = null if (typeof url === 'string') { table = new URL(url).searchParams.get('table') @@ -33,15 +40,27 @@ const fetchClient: typeof fetch = async ( } else if (url instanceof URL) { table = url.searchParams.get('table') } - const res = await fetch(url, { - ...options, - headers: { - 'Content-Type': 'application/json', - }, - }) + let res: Response + try { + res = await fetch(url, options) + } catch (e) { + if (LOG_FETCH) { + console.log('>> fetch error', fetchCount, e) + } + throw e + } if (table) { shapeHandles.set(res.headers.get('electric-handle')!, table) } + if (LOG_FETCH) { + console.log( + '>> fetch res', + fetchCount, + res.status, + res.statusText, + res.headers, + ) + } return res } @@ -57,7 +76,7 @@ const deleteShape = async (table: string, handle: string) => { return } if (!res.ok) { - throw new Error(`Failed to delete shape: ${res.statusText}`) + throw new Error(`Error deleting shape: ${res.statusText}`) } } @@ -89,7 +108,7 @@ describe('sync-e2e', () => { }) await pgClient.connect() - // Create test tables in PostgreSQL + // Create test tables in PostgreSQL if they don't exist await pgClient.query(` CREATE TABLE IF NOT EXISTS todo ( id SERIAL PRIMARY KEY, @@ -97,15 +116,90 @@ describe('sync-e2e', () => { done BOOLEAN ); `) + + // Create additional tables needed for tests + await pgClient.query(` + CREATE TABLE IF NOT EXISTS project ( + id SERIAL PRIMARY KEY, + name TEXT, + active BOOLEAN + ); + `) + + await pgClient.query(` + CREATE TABLE IF NOT EXISTS alt_todo ( + id SERIAL PRIMARY KEY, + task TEXT, + done BOOLEAN + ); + `) + + await pgClient.query(` + CREATE TABLE IF NOT EXISTS test_syncing ( + id TEXT PRIMARY KEY, + value TEXT, + is_syncing BOOLEAN + ); + `) + + // Create or replace the trigger function + await pgClient.query(` + CREATE OR REPLACE FUNCTION check_syncing() + RETURNS TRIGGER AS $$ + DECLARE + is_syncing BOOLEAN; + BEGIN + is_syncing := COALESCE(current_setting('electric.syncing', true)::boolean, false); + IF is_syncing THEN + NEW.is_syncing := TRUE; + ELSE + NEW.is_syncing := FALSE; + END IF; + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + `) + + // Drop and recreate the trigger + await pgClient.query( + `DROP TRIGGER IF EXISTS test_syncing_trigger ON test_syncing;`, + ) + await pgClient.query(` + CREATE TRIGGER test_syncing_trigger + BEFORE INSERT ON test_syncing + FOR EACH ROW EXECUTE FUNCTION check_syncing(); + `) + + // Create a todo_alt table for the multiple subscriptions test + await pgClient.query(` + CREATE TABLE IF NOT EXISTS todo_alt ( + id SERIAL PRIMARY KEY, + task TEXT, + done BOOLEAN + ); + `) + + // Clean up any existing data + await pgClient.query( + 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt;', + ) }) afterAll(async () => { - await pgClient.query('TRUNCATE todo;') + // Truncate all tables + await pgClient.query( + 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt;', + ) + await pgClient.end() await deleteAllShapes() }) beforeEach(async () => { + await pgClient.query( + 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt;', + ) + // Create PGlite instance with electric sync extension pg = await PGlite.create({ extensions: { @@ -121,12 +215,69 @@ describe('sync-e2e', () => { done BOOLEAN ); `) + + // Create additional tables needed for tests + await pg.exec(` + CREATE TABLE project ( + id SERIAL PRIMARY KEY, + name TEXT, + active BOOLEAN + ); + `) + + await pg.exec(` + CREATE TABLE alt_todo ( + id SERIAL PRIMARY KEY, + task TEXT, + done BOOLEAN + ); + `) + + await pg.exec(` + CREATE TABLE test_syncing ( + id TEXT PRIMARY KEY, + value TEXT, + is_syncing BOOLEAN + ); + + CREATE OR REPLACE FUNCTION check_syncing() + RETURNS TRIGGER AS $$ + DECLARE + is_syncing BOOLEAN; + BEGIN + is_syncing := COALESCE(current_setting('electric.syncing', true)::boolean, false); + IF is_syncing THEN + NEW.is_syncing := TRUE; + ELSE + NEW.is_syncing := FALSE; + END IF; + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + + CREATE TRIGGER test_syncing_trigger + BEFORE INSERT ON test_syncing + FOR EACH ROW EXECUTE FUNCTION check_syncing(); + `) + + // Create a todo_alt table for the multiple subscriptions test + await pg.exec(` + CREATE TABLE todo_alt ( + id SERIAL PRIMARY KEY, + task TEXT, + done BOOLEAN + ); + `) }) afterEach(async () => { await pg.close() await deleteAllShapes() - await pgClient.query('TRUNCATE todo;') + + // Truncate all tables + await pgClient.query( + 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt;', + ) }) it('handles inserts/updates/deletes', async () => { @@ -148,16 +299,19 @@ describe('sync-e2e', () => { `) // Wait for sync to complete - await vi.waitFor(async () => { - const result = await pg.sql`SELECT * FROM todo;` - expect(result.rows).toEqual([ - { - id: 1, - task: 'task1', - done: false, - }, - ]) - }) + await vi.waitFor( + async () => { + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows).toEqual([ + { + id: 1, + task: 'task1', + done: false, + }, + ]) + }, + { timeout: 5000 }, + ) // Update data in PostgreSQL await pgClient.query(` @@ -165,16 +319,19 @@ describe('sync-e2e', () => { `) // Wait for sync to complete - await vi.waitFor(async () => { - const result = await pg.sql`SELECT * FROM todo;` - expect(result.rows).toEqual([ - { - id: 1, - task: 'task2', - done: true, - }, - ]) - }) + await vi.waitFor( + async () => { + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows).toEqual([ + { + id: 1, + task: 'task2', + done: true, + }, + ]) + }, + { timeout: 5000 }, + ) // Delete data in PostgreSQL await pgClient.query(` @@ -182,10 +339,13 @@ describe('sync-e2e', () => { `) // Wait for sync to complete - await vi.waitFor(async () => { - const result = await pg.sql`SELECT * FROM todo;` - expect(result.rows).toEqual([]) - }) + await vi.waitFor( + async () => { + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows).toEqual([]) + }, + { timeout: 5000 }, + ) shape.unsubscribe() }) @@ -248,92 +408,9 @@ describe('sync-e2e', () => { shape.unsubscribe() }) - it('persists shape stream state and automatically resumes', async () => { - // First sync session with a persistent key - let shape = await pg.electric.syncShapeToTable({ - shape: { - url: ELECTRIC_URL, - params: { table: 'todo' }, - }, - table: 'todo', - primaryKey: ['id'], - shapeKey: 'persistent_sync_test', - }) - - // Insert initial batch of data - const numInserts = 3 - for (let i = 0; i < numInserts; i++) { - await pgClient.query(` - INSERT INTO todo (id, task, done) - VALUES (${i}, 'task${i}', false); - `) - } - - // Wait for initial sync to complete - await vi.waitFor(async () => { - const result = await pg.sql<{ - count: number - }>`SELECT COUNT(*) as count FROM todo;` - expect(result.rows[0].count).toBe(numInserts) - }) - - // Unsubscribe from first sync session - shape.unsubscribe() - - // Clear local data to simulate a fresh start - await pg.exec('TRUNCATE todo;') - - // Start a new sync session with the same key - shape = await pg.electric.syncShapeToTable({ - shape: { - url: ELECTRIC_URL, - params: { table: 'todo' }, - }, - table: 'todo', - primaryKey: ['id'], - shapeKey: 'persistent_sync_test', - }) - - // Wait for sync to resume and restore data - await vi.waitFor(async () => { - const result = await pg.sql<{ - count: number - }>`SELECT COUNT(*) as count FROM todo;` - expect(result.rows[0].count).toBe(numInserts) - }) - - // Verify the data was restored - const result = await pg.sql`SELECT * FROM todo ORDER BY id;` - expect(result.rows.length).toBe(numInserts) - expect(result.rows[0]).toEqual({ - id: 0, - task: 'task0', - done: false, - }) - - // Clean up - shape.unsubscribe() - await pg.electric.deleteSubscription('persistent_sync_test') - }) - it('syncs multiple shapes to multiple tables simultaneously', async () => { - // Create a second table for testing multi-shape sync - await pgClient.query(` - CREATE TABLE IF NOT EXISTS project ( - id SERIAL PRIMARY KEY, - name TEXT, - active BOOLEAN - ); - `) + // Clean up any existing data in the project table await pgClient.query('TRUNCATE project;') - - await pg.exec(` - CREATE TABLE IF NOT EXISTS project ( - id SERIAL PRIMARY KEY, - name TEXT, - active BOOLEAN - ); - `) await pg.exec('TRUNCATE project;') // Set up sync for both tables @@ -400,7 +477,953 @@ describe('sync-e2e', () => { // Clean up todoShape.unsubscribe() projectShape.unsubscribe() - await pgClient.query('DROP TABLE IF EXISTS project;') - await pg.exec('DROP TABLE IF EXISTS project;') + }) + + it('handles an update message with no columns to update', async () => { + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: null, + }) + + // Insert data into PostgreSQL + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (1, 'task1', false); + `) + + // Wait for sync to complete + await vi.waitFor(async () => { + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows).toEqual([ + { + id: 1, + task: 'task1', + done: false, + }, + ]) + }) + + // Update data in PostgreSQL with only the primary key (no other columns) + await pgClient.query(` + UPDATE todo SET id = 1 WHERE id = 1; + `) + + // Wait a moment to ensure sync has time to process + await new Promise((resolve) => setTimeout(resolve, 1000)) + + // Verify data remains unchanged + const result = await pg.sql`SELECT * FROM todo;` + expect(result.rows).toEqual([ + { + id: 1, + task: 'task1', + done: false, + }, + ]) + + shape.unsubscribe() + }) + + it('sets the syncing flag to true when syncing begins', async () => { + // Check the flag is not set outside of a sync + const result0 = + await pg.sql`SELECT current_setting('electric.syncing', true)` + expect(result0.rows[0]).toEqual({ current_setting: null }) // not set yet as syncShapeToTable hasn't been called + + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'test_syncing' }, + fetchClient, + }, + table: 'test_syncing', + primaryKey: ['id'], + shapeKey: null, + }) + + // Insert data into PostgreSQL + await pgClient.query(` + INSERT INTO test_syncing (id, value) + VALUES ('id1', 'test value'); + `) + + // Wait for sync to complete + await vi.waitFor(async () => { + const result = await pg.sql`SELECT * FROM test_syncing WHERE id = 'id1'` + expect(result.rows).toHaveLength(1) + }) + + // Check the syncing flag was set during sync + const result = await pg.sql`SELECT * FROM test_syncing WHERE id = 'id1'` + expect(result.rows[0]).toEqual({ + id: 'id1', + value: 'test value', + is_syncing: true, + }) + + // Check the flag is not set outside of a sync + const result2 = + await pg.sql`SELECT current_setting('electric.syncing', true)` + expect(result2.rows[0]).toEqual({ current_setting: 'false' }) + + // Clean up + shape.unsubscribe() + }) + + it('forbids multiple subscriptions to the same table', async () => { + const table = 'todo' + const altTable = 'alt_todo' + + // First subscription + const shape1 = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table }, + fetchClient, + }, + table, + primaryKey: ['id'], + shapeKey: null, + }) + + // Should throw if syncing more shapes into same table + await expect( + async () => + await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo_alt' }, + fetchClient, + }, + table, + primaryKey: ['id'], + shapeKey: null, + }), + ).rejects.toThrowError(`Already syncing shape for table ${table}`) + + // Should be able to sync shape into other table + const altShape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: altTable }, + fetchClient, + }, + table: altTable, + primaryKey: ['id'], + shapeKey: null, + }) + + // Clean up first subscription + shape1.unsubscribe() + + // Should be able to sync different shape if previous is unsubscribed + const shape2 = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo_alt' }, + fetchClient, + }, + table, + primaryKey: ['id'], + shapeKey: null, + }) + + // Clean up + altShape.unsubscribe() + shape2.unsubscribe() + }) + + it('uses COPY FROM for initial batch of inserts', async () => { + // Insert a large batch of records to test COPY FROM behavior + const numInserts = 1000 + const values = Array.from( + { length: numInserts }, + (_, idx) => `(${idx}, 'task${idx}', ${idx % 2 === 0})`, + ).join(', ') + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES ${values}; + `) + + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + useCopy: true, + shapeKey: null, + }) + + // Wait for all inserts to be synced + await vi.waitFor( + async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(numInserts) + }, + { timeout: 20000 }, // Increase timeout for larger batch + ) + + // Verify some sample data + const sampleResult = await pg.sql` + SELECT * FROM todo ORDER BY id LIMIT 5; + ` + expect(sampleResult.rows).toEqual([ + { id: 0, task: 'task0', done: true }, + { id: 1, task: 'task1', done: false }, + { id: 2, task: 'task2', done: true }, + { id: 3, task: 'task3', done: false }, + { id: 4, task: 'task4', done: true }, + ]) + + // Update one record to verify updates still work after COPY + await pgClient.query(` + UPDATE todo SET task = 'updated task' WHERE id = 0; + `) + + // Wait for update to sync + await vi.waitFor( + async () => { + const result = await pg.sql`SELECT * FROM todo WHERE id = 0;` + expect(result.rows[0]).toEqual({ + id: 0, + task: 'updated task', + done: true, + }) + }, + { timeout: 5000 }, + ) + + shape.unsubscribe() + }) + + it('handles special characters in COPY FROM data', async () => { + // Insert records with special characters + await pgClient.query(` + INSERT INTO todo (id, task, done) VALUES + (1, 'task with, comma', false), + (2, 'task with "quotes"', true), + (3, 'task with +newline', false); + `) + + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + useCopy: true, + shapeKey: null, + }) + + // Wait for inserts to complete + await vi.waitFor( + async () => { + const result = await pg.sql<{ count: number }>` + SELECT COUNT(*) as count FROM todo; + ` + expect(result.rows[0].count).toBe(3) + }, + { timeout: 5000 }, + ) + + // Verify the data was inserted correctly with special characters preserved + const result = await pg.sql` + SELECT * FROM todo ORDER BY id; + ` + expect(result.rows).toEqual([ + { id: 1, task: 'task with, comma', done: false }, + { id: 2, task: 'task with "quotes"', done: true }, + { id: 3, task: 'task with\nnewline', done: false }, + ]) + + shape.unsubscribe() + }) + + it('calls onInitialSync callback after initial sync', async () => { + let callbackCalled = false + const onInitialSync = () => { + callbackCalled = true + } + + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + onInitialSync, + shapeKey: null, + }) + + // Insert some initial data + await pgClient.query(` + INSERT INTO todo (id, task, done) VALUES + (1, 'task1', false), + (2, 'task2', true); + `) + + // Wait for initial sync to complete + await vi.waitFor( + async () => { + const result = await pg.sql<{ count: number }>` + SELECT COUNT(*) as count FROM todo; + ` + return result.rows[0].count === 2 + }, + { timeout: 5000 }, + ) + + // Verify callback was called + await vi.waitFor( + () => { + expect(callbackCalled).toBe(true) + return callbackCalled === true + }, + { timeout: 5000 }, + ) + + // Insert more data - callback should not be called again + callbackCalled = false + await pgClient.query(` + INSERT INTO todo (id, task, done) VALUES + (3, 'task3', false); + `) + + // Wait for sync to complete + await vi.waitFor( + async () => { + const result = await pg.sql<{ count: number }>` + SELECT COUNT(*) as count FROM todo; + ` + return result.rows[0].count === 3 + }, + { timeout: 5000 }, + ) + + // Verify callback was not called again + expect(callbackCalled).toBe(false) + + shape.unsubscribe() + }) + + it('uses the specified metadata schema for subscription metadata', async () => { + // Close the default PGlite instance + await pg.close() + + // Create a new PGlite instance with a custom metadata schema + const metadataSchema = 'custom_metadata' + pg = await PGlite.create({ + extensions: { + electric: electricSync({ + metadataSchema, + }), + }, + }) + + // Initialize metadata tables + await pg.electric.initMetadataTables() + + // Create the todo table + await pg.exec(` + CREATE TABLE todo ( + id SERIAL PRIMARY KEY, + task TEXT, + done BOOLEAN + ); + `) + + // Verify the custom schema was created + const schemaResult = await pg.query( + `SELECT schema_name FROM information_schema.schemata WHERE schema_name = $1`, + [metadataSchema], + ) + expect(schemaResult.rows).toHaveLength(1) + expect(schemaResult.rows[0]).toEqual({ schema_name: metadataSchema }) + + // Verify the subscription table exists in the custom schema + const tableResult = await pg.query( + `SELECT table_name FROM information_schema.tables + WHERE table_schema = $1 AND table_name = 'subscriptions_metadata'`, + [metadataSchema], + ) + expect(tableResult.rows).toHaveLength(1) + expect(tableResult.rows[0]).toEqual({ + table_name: 'subscriptions_metadata', + }) + + // Test that we can create a subscription with the custom schema + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: 'custom_schema_test', + }) + + // We don't persist any metadata untill some data has been synced + await pgClient.query(` + INSERT INTO todo (id, task, done) VALUES + (1, 'task1', false); + `) + await vi.waitFor( + async () => { + const result = await pg.sql<{ count: number }>` + SELECT COUNT(*) as count FROM todo; + ` + expect(result.rows[0].count).toBe(1) + }, + { timeout: 5000 }, + ) + + // Check the data was inserted into the todo table + const todoResult = await pg.sql`SELECT * FROM todo WHERE id = 1;` + expect(todoResult.rows[0]).toEqual({ + id: 1, + task: 'task1', + done: false, + }) + + // Verify the subscription was stored in the custom schema + const subscriptionResult = await pg.query( + `SELECT * FROM ${metadataSchema}.subscriptions_metadata WHERE key = $1`, + ['custom_schema_test'], + ) + expect(subscriptionResult.rows).toHaveLength(1) + + // Clean up + shape.unsubscribe() + await pg.electric.deleteSubscription('custom_schema_test') + }) + + it('handles transactions across multiple tables with syncShapesToTables', async () => { + // Clean up any existing data in the project table + await pgClient.query('TRUNCATE project;') + await pg.exec('TRUNCATE project;') + + // Set up sync for both tables using syncShapesToTables + const syncResult = await pg.electric.syncShapesToTables({ + key: 'transaction_test', + shapes: { + todo_shape: { + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + }, + project_shape: { + shape: { + url: ELECTRIC_URL, + params: { table: 'project' }, + fetchClient, + }, + table: 'project', + primaryKey: ['id'], + }, + }, + }) + + // Insert initial data in a transaction + await pgClient.query('BEGIN;') + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (1, 'Initial task', false); + `) + await pgClient.query(` + INSERT INTO project (id, name, active) + VALUES (1, 'Initial project', true); + `) + await pgClient.query('COMMIT;') + + // Wait for both inserts to be synced + await vi.waitFor( + async () => { + const todoCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + const projectCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM project;` + expect(todoCount.rows[0].count).toBe(1) + expect(projectCount.rows[0].count).toBe(1) + }, + { timeout: 5000 }, + ) + + // Verify initial data was inserted + const todoResult = await pg.sql`SELECT * FROM todo WHERE id = 1;` + expect(todoResult.rows[0]).toEqual({ + id: 1, + task: 'Initial task', + done: false, + }) + + const projectResult = await pg.sql`SELECT * FROM project WHERE id = 1;` + expect(projectResult.rows[0]).toEqual({ + id: 1, + name: 'Initial project', + active: true, + }) + + // Update both tables in a transaction + await pgClient.query('BEGIN;') + await pgClient.query(` + UPDATE todo SET task = 'Updated in transaction', done = true WHERE id = 1; + `) + await pgClient.query(` + UPDATE project SET name = 'Updated in transaction', active = false WHERE id = 1; + `) + await pgClient.query('COMMIT;') + + // Wait for both updates to be synced + await vi.waitFor( + async () => { + const todoResult = await pg.sql<{ + id: number + task: string + done: boolean + }>`SELECT * FROM todo WHERE id = 1;` + const projectResult = await pg.sql<{ + id: number + name: string + active: boolean + }>`SELECT * FROM project WHERE id = 1;` + expect(todoResult.rows[0].task).toBe('Updated in transaction') + expect(projectResult.rows[0].name).toBe('Updated in transaction') + }, + { timeout: 5000 }, + ) + + // Verify both updates were applied + const updatedTodoResult = await pg.sql`SELECT * FROM todo WHERE id = 1;` + expect(updatedTodoResult.rows[0]).toEqual({ + id: 1, + task: 'Updated in transaction', + done: true, + }) + + const updatedProjectResult = + await pg.sql`SELECT * FROM project WHERE id = 1;` + expect(updatedProjectResult.rows[0]).toEqual({ + id: 1, + name: 'Updated in transaction', + active: false, + }) + + // Clean up + syncResult.unsubscribe() + }) + + it('stops sync after unsubscribe', async () => { + // First sync session with a persistent key + let shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: 'refetch_test', + }) + + // Insert initial batch of data + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (1, 'Initial task', false); + `) + + // Wait 3 seconds to make sure the data is synced + await new Promise((resolve) => setTimeout(resolve, 3000)) + + // Wait for initial sync to complete + await vi.waitFor(async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(1) + }) + + // Check the data was inserted into the todo table + const todoResult = await pg.sql`SELECT * FROM todo WHERE id = 1;` + expect(todoResult.rows[0]).toEqual({ + id: 1, + task: 'Initial task', + done: false, + }) + + // Unsubscribe from first sync session + shape.unsubscribe() + + // Insert new data before we resume the sync + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (2, 'New task after refetch', true); + `) + + // Wait for sync to complete + await vi.waitFor( + async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).not.toBe(2) + }, + { timeout: 5000 }, + ) + + // Verify only the new data is present (old data was cleared) + const result = await pg.sql`SELECT * FROM todo ORDER BY id;` + expect(result.rows).toEqual([ + { + id: 1, + task: 'Initial task', + done: false, + }, + ]) + + // Clean up + shape.unsubscribe() + await pg.electric.deleteSubscription('refetch_test') + }) + + it('resumes sync after unsubscribe', async () => { + // First sync session with a persistent key + let shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: 'refetch_test', + }) + + // Insert initial batch of data + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (1, 'Initial task', false); + `) + + // Wait for initial sync to complete + await vi.waitFor(async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(1) + }) + + // Check the data was inserted into the todo table + const todoResult = await pg.sql`SELECT * FROM todo WHERE id = 1;` + expect(todoResult.rows[0]).toEqual({ + id: 1, + task: 'Initial task', + done: false, + }) + + // Unsubscribe from first sync session + shape.unsubscribe() + + // Insert new data before we resume the sync + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (2, 'New task after refetch', true); + `) + + // Start a new sync session with the same key + shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: 'refetch_test', + }) + + // Wait for sync to complete + await vi.waitFor( + async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(2) + }, + { timeout: 5000 }, + ) + + // Verify only the new data is present (old data was cleared) + const result = await pg.sql`SELECT * FROM todo ORDER BY id;` + expect(result.rows).toEqual([ + { + id: 1, + task: 'Initial task', + done: false, + }, + { + id: 2, + task: 'New task after refetch', + done: true, + }, + ]) + + // Clean up + shape.unsubscribe() + await pg.electric.deleteSubscription('refetch_test') + }) + + it('clears and restarts persisted shape stream state on refetch', async () => { + // First sync session with a persistent key + let shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: 'refetch_test', + }) + + // Insert initial batch of data + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (1, 'Initial task', false); + `) + + // Wait for initial sync to complete + await vi.waitFor(async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(1) + }) + + // Check the data was inserted into the todo table + const todoResult = await pg.sql`SELECT * FROM todo WHERE id = 1;` + expect(todoResult.rows[0]).toEqual({ + id: 1, + task: 'Initial task', + done: false, + }) + + // Unsubscribe from first sync session + shape.unsubscribe() + + // Delete the shape on the server to force a refetch + await deleteAllShapes() + + // Insert new data before we resume the sync + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (2, 'New task after refetch', true); + `) + + // Start a new sync session with the same key + shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + shapeKey: 'refetch_test', + }) + + // Wait for sync to complete + await vi.waitFor( + async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + expect(result.rows[0].count).toBe(2) + }, + { timeout: 5000 }, + ) + + // Verify only the new data is present (old data was cleared) + const result = await pg.sql`SELECT * FROM todo ORDER BY id;` + expect(result.rows).toEqual([ + { + id: 1, + task: 'Initial task', + done: false, + }, + { + id: 2, + task: 'New task after refetch', + done: true, + }, + ]) + + // Clean up + shape.unsubscribe() + await pg.electric.deleteSubscription('refetch_test') + }) + + it('handles must-refetch control message across multiple tables', async () => { + // Set up sync for both tables using syncShapesToTables + const syncResult = await pg.electric.syncShapesToTables({ + key: 'refetch_multi_test', + shapes: { + todo_shape: { + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + }, + project_shape: { + shape: { + url: ELECTRIC_URL, + params: { table: 'project' }, + fetchClient, + }, + table: 'project', + primaryKey: ['id'], + }, + }, + }) + + // Insert initial data + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (1, 'Initial todo', false); + `) + await pgClient.query(` + INSERT INTO project (id, name, active) + VALUES (1, 'Initial project', true); + `) + + // Wait for initial sync to complete + await vi.waitFor( + async () => { + const todoCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + const projectCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM project;` + return todoCount.rows[0].count === 1 && projectCount.rows[0].count === 1 + }, + { timeout: 5000 }, + ) + + // Unsubscribe from sync + syncResult.unsubscribe() + + // Delete the shapes on the server to force a refetch + await deleteAllShapesForTable('todo') + // we don't need to delete the project shape so we can test a must-refetch on + // just one of the tables + + // Insert new data after refetch + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (2, 'New todo after refetch', true); + `) + await pgClient.query(` + INSERT INTO project (id, name, active) + VALUES (2, 'New project after refetch', false); + `) + + // Start a new sync session with the same key + const newSyncResult = await pg.electric.syncShapesToTables({ + key: 'refetch_multi_test', + shapes: { + todo_shape: { + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + }, + project_shape: { + shape: { + url: ELECTRIC_URL, + params: { table: 'project' }, + fetchClient, + }, + table: 'project', + primaryKey: ['id'], + }, + }, + }) + + // Wait for sync to complete + await vi.waitFor( + async () => { + const todoCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM todo;` + const projectCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM project;` + expect(todoCount.rows[0].count).toBe(2) + expect(projectCount.rows[0].count).toBe(2) + }, + { timeout: 5000 }, + ) + + // Verify only the new data is present (old data was cleared) + const todoResult = await pg.sql`SELECT * FROM todo ORDER BY id;` + expect(todoResult.rows).toEqual([ + { + id: 1, + task: 'Initial todo', + done: false, + }, + { + id: 2, + task: 'New todo after refetch', + done: true, + }, + ]) + + const projectResult = await pg.sql`SELECT * FROM project ORDER BY id;` + expect(projectResult.rows).toEqual([ + { + id: 1, + name: 'Initial project', + active: true, + }, + { + id: 2, + name: 'New project after refetch', + active: false, + }, + ]) + + // Clean up + newSyncResult.unsubscribe() + await pg.electric.deleteSubscription('refetch_multi_test') }) }) From 9455066a95e6f3aa35f561529aac4ea419fceb54 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Wed, 12 Mar 2025 22:07:09 +0000 Subject: [PATCH 10/10] More tests --- .../pglite-sync/test-e2e/sync-e2e.test.ts | 504 +++++++++++++++++- 1 file changed, 498 insertions(+), 6 deletions(-) diff --git a/packages/pglite-sync/test-e2e/sync-e2e.test.ts b/packages/pglite-sync/test-e2e/sync-e2e.test.ts index fa17d45a..002a5eed 100644 --- a/packages/pglite-sync/test-e2e/sync-e2e.test.ts +++ b/packages/pglite-sync/test-e2e/sync-e2e.test.ts @@ -87,9 +87,9 @@ const deleteAllShapes = async () => { shapeHandles.clear() } -const deleteAllShapesForTable = async (table: string) => { +const deleteAllShapesForTable = async (targetTable: string) => { for (const [handle, table] of shapeHandles.entries()) { - if (table === table) { + if (table === targetTable) { await deleteShape(table, handle) } } @@ -179,16 +179,42 @@ describe('sync-e2e', () => { ); `) + // Create a large table with 10 columns in PostgreSQL + await pgClient.query(` + CREATE TABLE IF NOT EXISTS large_table ( + id SERIAL PRIMARY KEY, + col1 TEXT, + col2 INTEGER, + col3 BOOLEAN, + col4 TIMESTAMP, + col5 NUMERIC(10,2), + col6 TEXT, + col7 INTEGER, + col8 BOOLEAN, + col9 TEXT + ); + `) + + // Create a table for large operations + await pgClient.query(` + CREATE TABLE IF NOT EXISTS large_ops_table ( + id SERIAL PRIMARY KEY, + value TEXT, + number INTEGER, + flag BOOLEAN + ); + `) + // Clean up any existing data await pgClient.query( - 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt;', + 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt, large_table, large_ops_table;', ) }) afterAll(async () => { // Truncate all tables await pgClient.query( - 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt;', + 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt, large_table, large_ops_table;', ) await pgClient.end() @@ -197,7 +223,7 @@ describe('sync-e2e', () => { beforeEach(async () => { await pgClient.query( - 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt;', + 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt, large_table, large_ops_table;', ) // Create PGlite instance with electric sync extension @@ -268,6 +294,31 @@ describe('sync-e2e', () => { done BOOLEAN ); `) + + // Create the same table in PGlite + await pg.exec(` + CREATE TABLE large_table ( + id SERIAL PRIMARY KEY, + col1 TEXT, + col2 INTEGER, + col3 BOOLEAN, + col4 TIMESTAMP, + col5 NUMERIC(10,2), + col6 TEXT, + col7 INTEGER, + col8 BOOLEAN, + col9 TEXT + ); + `) + + await pg.exec(` + CREATE TABLE large_ops_table ( + id SERIAL PRIMARY KEY, + value TEXT, + number INTEGER, + flag BOOLEAN + ); + `) }) afterEach(async () => { @@ -276,7 +327,7 @@ describe('sync-e2e', () => { // Truncate all tables await pgClient.query( - 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt;', + 'TRUNCATE todo, project, alt_todo, test_syncing, todo_alt, large_table, large_ops_table;', ) }) @@ -1426,4 +1477,445 @@ newline', false); newSyncResult.unsubscribe() await pg.electric.deleteSubscription('refetch_multi_test') }) + + it('handles large initial load with multiple columns', async () => { + // Generate data in batches + const numRows = 5000; // Reduced from 10k to 5k for faster test execution + const batchSize = 500; + const batches = Math.ceil(numRows / batchSize); + + for (let batch = 0; batch < batches; batch++) { + const start = batch * batchSize; + const end = Math.min(start + batchSize, numRows); + + // Build a batch of INSERT statements + for (let i = start; i < end; i++) { + await pgClient.query(` + INSERT INTO large_table ( + id, col1, col2, col3, col4, col5, col6, col7, col8, col9 + ) VALUES ( + $1, $2, $3, $4, $5, $6, $7, $8, $9, $10 + ); + `, [ + i, + `text-${i}`, + i * 10, + i % 2 === 0, + new Date(2023, 0, 1, 12 + i), // 2023-01-01 12:00:00 + i hours + i * 1.5, + `long-text-value-${i}-with-some-additional-content`, + i * 5, + i % 3 === 0, + `another-text-value-${i}` + ]); + } + } + + // Set up sync with COPY enabled for efficiency + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'large_table' }, + fetchClient, + }, + table: 'large_table', + primaryKey: ['id'], + useCopy: true, + shapeKey: null, + }) + + // Wait for all data to be synced - increase timeout for large dataset + await vi.waitFor( + async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM large_table;` + expect(result.rows[0].count).toBe(numRows) + }, + { timeout: 60000 }, // 60 second timeout for large dataset + ) + + // Verify some sample data points + const firstRow = await pg.sql`SELECT * FROM large_table WHERE id = 0;` + expect(firstRow.rows[0]).toMatchObject({ + id: 0, + col1: 'text-0', + col2: 0, + col3: true, + // Skip timestamp comparison as it might have timezone differences + col5: "0.00", + col6: 'long-text-value-0-with-some-additional-content', + col7: 0, + col8: true, + col9: 'another-text-value-0' + }) + + const middleRow = await pg.sql`SELECT * FROM large_table WHERE id = 2500;` + expect(middleRow.rows[0]).toMatchObject({ + id: 2500, + col1: 'text-2500', + col2: 25000, + col3: true, + // Skip timestamp comparison + col5: "3750.00", + col6: 'long-text-value-2500-with-some-additional-content', + col7: 12500, + col8: false, + col9: 'another-text-value-2500' + }) + + const lastRow = await pg.sql`SELECT * FROM large_table WHERE id = ${numRows - 1};` + expect(lastRow.rows[0]).toMatchObject({ + id: numRows - 1, + col1: `text-${numRows - 1}`, + col2: (numRows - 1) * 10, + col3: (numRows - 1) % 2 === 0, + // Skip timestamp comparison + col5: ((numRows - 1) * 1.5).toFixed(2), + col6: `long-text-value-${numRows - 1}-with-some-additional-content`, + col7: (numRows - 1) * 5, + col8: (numRows - 1) % 3 === 0, + col9: `another-text-value-${numRows - 1}` + }) + + // Clean up + shape.unsubscribe() + }) + + it('handles large update with inserts, deletes, and updates', async () => { + // Insert initial rows (some will be updated, some deleted, some unchanged) + const totalRows = 3000; + const batchSize = 500; + const batches = Math.ceil(totalRows / batchSize); + + for (let batch = 0; batch < batches; batch++) { + const start = batch * batchSize; + const end = Math.min(start + batchSize, totalRows); + + for (let i = start; i < end; i++) { + await pgClient.query(` + INSERT INTO large_ops_table (id, value, number, flag) + VALUES ($1, $2, $3, $4); + `, [i, `initial-value-${i}`, i, i % 2 === 0]); + } + } + + // Set up sync + const shape = await pg.electric.syncShapeToTable({ + shape: { + url: ELECTRIC_URL, + params: { table: 'large_ops_table' }, + fetchClient, + }, + table: 'large_ops_table', + primaryKey: ['id'], + useCopy: true, + shapeKey: null, + }) + + + const initialCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM large_ops_table;` + console.log('initialCount', initialCount.rows[0].count) + + // Wait for initial sync to complete + await vi.waitFor( + async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM large_ops_table;` + expect(result.rows[0].count).toBe(totalRows) + }, + { timeout: 30000 }, + ) + + // Begin transaction for large update + await pgClient.query('BEGIN;') + + // 1. Delete rows (ids 1-999) - leave id=1 in the table + await pgClient.query(` + DELETE FROM large_ops_table WHERE id BETWEEN 1 AND 999; + `) + + // 2. Update rows (ids 1000-1999) + await pgClient.query(` + UPDATE large_ops_table + SET value = 'updated-value', number = number * 10, flag = NOT flag + WHERE id BETWEEN 1000 AND 1999; + `) + + // 3. Insert new rows + for (let i = totalRows; i < totalRows + 1000; i++) { + await pgClient.query(` + INSERT INTO large_ops_table (id, value, number, flag) + VALUES ($1, $2, $3, $4); + `, [i, `new-value-${i}`, i * 2, i % 3 === 0]); + } + + // Commit the transaction + await pgClient.query('COMMIT;') + + // Wait for all changes to sync + await vi.waitFor( + async () => { + const result = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM large_ops_table;` + expect(result.rows[0].count).toBe(3001) // 3000 original - 999 deleted + 1000 new = 3001 + }, + { timeout: 30000 }, + ) + + // Verify deleted rows are gone + const deletedCount = await pg.sql<{ + count: number + }>`SELECT COUNT(*) as count FROM large_ops_table WHERE id BETWEEN 1 AND 999;` + expect(deletedCount.rows[0].count).toBe(0) + + // Verify updated rows have new values + const updatedRow = await pg.sql`SELECT * FROM large_ops_table WHERE id = 1500;` + expect(updatedRow.rows[0]).toEqual({ + id: 1500, + value: 'updated-value', + number: 15000, // 1500 * 10 + flag: 1500 % 2 !== 0, // NOT the original flag + }) + + // Verify new rows were inserted + const newRow = await pg.sql`SELECT * FROM large_ops_table WHERE id = 3500;` + expect(newRow.rows[0]).toEqual({ + id: 3500, + value: 'new-value-3500', + number: 7000, // 3500 * 2 + flag: 3500 % 3 === 0, + }) + + // Verify unchanged rows remain the same + const unchangedRow = await pg.sql`SELECT * FROM large_ops_table WHERE id = 2500;` + expect(unchangedRow.rows[0]).toEqual({ + id: 2500, + value: 'initial-value-2500', + number: 2500, + flag: 2500 % 2 === 0, + }) + + // Clean up + shape.unsubscribe() + }) + + it.skip('cycles through operations with todo and project tables', async () => { + // Set up sync for both tables using syncShapesToTables + const syncResult = await pg.electric.syncShapesToTables({ + key: 'cycle_test', + shapes: { + todo_shape: { + shape: { + url: ELECTRIC_URL, + params: { table: 'todo' }, + fetchClient, + }, + table: 'todo', + primaryKey: ['id'], + }, + project_shape: { + shape: { + url: ELECTRIC_URL, + params: { table: 'project' }, + fetchClient, + }, + table: 'project', + primaryKey: ['id'], + }, + }, + }) + + // Run 100 iterations of the cycle + for (let i = 1; i <= 100; i++) { + console.log(`Iteration ${i}/100`); + + // 1. Insert into todo, check + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (${i * 6 - 5}, 'Todo ${i}.1', false); + `) + + const originCount = await pgClient.query(`SELECT COUNT(*) FROM todo;`) + console.log('originCount', originCount.rows[0].count) + + await new Promise((resolve) => setTimeout(resolve, 10000)) + + const todoResult = await pg.sql<{ + id: number; + task: string; + done: boolean; + }>`SELECT * FROM todo;` + console.log('todoResult', todoResult.rows) + + // Wait for todo insert to sync + await vi.waitFor( + async () => { + const todoResult = await pg.sql<{ + id: number; + task: string; + done: boolean; + }>`SELECT * FROM todo WHERE id = ${i * 6 - 5};` + expect(todoResult.rows.length).toBe(1) + expect(todoResult.rows[0]).toEqual({ + id: i * 6 - 5, + task: `Todo ${i}.1`, + done: false, + }) + }, + { timeout: 5000 }, + ) + + // 2. Insert into todo and project in transaction, check + await pgClient.query('BEGIN;') + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (${i * 6 - 4}, 'Todo ${i}.2', true); + `) + await pgClient.query(` + INSERT INTO project (id, name, active) + VALUES (${i}, 'Project ${i}', true); + `) + await pgClient.query('COMMIT;') + + // Wait for transaction to sync + await vi.waitFor( + async () => { + const todoResult = await pg.sql<{ + id: number; + task: string; + done: boolean; + }>`SELECT * FROM todo WHERE id = ${i * 6 - 4};` + const projectResult = await pg.sql<{ + id: number; + name: string; + active: boolean; + }>`SELECT * FROM project WHERE id = ${i};` + expect(todoResult.rows).toHaveLength(1) + expect(projectResult.rows).toHaveLength(1) + }, + { timeout: 5000 }, + ) + + // 3. Update todo, check + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (${i * 6 - 3}, 'Todo ${i}.3', false); + `) + await pgClient.query(` + UPDATE todo SET task = 'Updated Todo ${i}.1', done = true WHERE id = ${i * 6 - 5}; + `) + + // Wait for update to sync + await vi.waitFor( + async () => { + const todoResult = await pg.sql<{ + id: number; + task: string; + done: boolean; + }>`SELECT * FROM todo WHERE id = ${i * 6 - 5};` + expect(todoResult.rows[0]).toEqual({ + id: i * 6 - 5, + task: `Updated Todo ${i}.1`, + done: true, + }) + }, + { timeout: 5000 }, + ) + + // 4. Update project and todo, check + await pgClient.query('BEGIN;') + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (${i * 6 - 2}, 'Todo ${i}.4', true); + `) + await pgClient.query(` + UPDATE todo SET task = 'Updated Todo ${i}.2', done = false WHERE id = ${i * 6 - 4}; + `) + await pgClient.query(` + UPDATE project SET name = 'Updated Project ${i}', active = false WHERE id = ${i}; + `) + await pgClient.query('COMMIT;') + + // Wait for updates to sync + await vi.waitFor( + async () => { + const todoResult = await pg.sql<{ + id: number; + task: string; + done: boolean; + }>`SELECT * FROM todo WHERE id = ${i * 6 - 4};` + const projectResult = await pg.sql<{ + id: number; + name: string; + active: boolean; + }>`SELECT * FROM project WHERE id = ${i};` + expect(todoResult.rows[0].task).toBe(`Updated Todo ${i}.2`) + expect(projectResult.rows[0].name).toBe(`Updated Project ${i}`) + }, + { timeout: 5000 }, + ) + + // 5. Delete a todo, check + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (${i * 6 - 1}, 'Todo ${i}.5', false); + `) + await pgClient.query(` + DELETE FROM todo WHERE id = ${i * 6 - 3}; + `) + + // Wait for delete to sync + await vi.waitFor( + async () => { + const todoResult = await pg.sql<{ + id: number; + task: string; + done: boolean; + }>`SELECT * FROM todo WHERE id = ${i * 6 - 3};` + expect(todoResult.rows).toHaveLength(0) + }, + { timeout: 5000 }, + ) + + // 6. Delete the project, check + await pgClient.query(` + INSERT INTO todo (id, task, done) + VALUES (${i * 6}, 'Todo ${i}.6', true); + `) + await pgClient.query(` + DELETE FROM project WHERE id = ${i}; + `) + + // Wait for delete to sync + await vi.waitFor( + async () => { + const projectResult = await pg.sql<{ + id: number; + name: string; + active: boolean; + }>`SELECT * FROM project WHERE id = ${i};` + expect(projectResult.rows).toHaveLength(0) + }, + { timeout: 5000 }, + ) + + // Verify that after each iteration: + // - project count is 0 + // - todo count increases by 1 (we add 6 todos and delete 1 per iteration) + const projectCount = await pg.sql<{ count: number }>`SELECT COUNT(*) as count FROM project;` + const todoCount = await pg.sql<{ count: number }>`SELECT COUNT(*) as count FROM todo;` + + expect(projectCount.rows[0].count).toBe(0) + expect(todoCount.rows[0].count).toBe(i * 5) // 6 inserts - 1 delete per iteration + } + + // Clean up + syncResult.unsubscribe() + await pg.electric.deleteSubscription('cycle_test') + }) })