Skip to content

Commit

Permalink
Expose batchSize parameter on various indexers (#23)
Browse files Browse the repository at this point in the history
* Add optional `batchSize` parameter for `JSONIndexer` opts

* Add optional `batchSize` parameter for `DirectoryIndexer` opts

* Add optional `batchSize` parameter for `TSVIndexer` opts 

* Add optional `batchSize` parameter for `ShopifyIndexer` opts
  • Loading branch information
chaosrealm authored Jul 3, 2024
1 parent d7089a8 commit 5396765
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 12 deletions.
10 changes: 6 additions & 4 deletions indexers/directory-indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { FileDocument } from "../document.js";
export type DirectoryIndexerOpts = {
rootDir: string;
urlBase?: string;
batchSize?: number;
getUrl?: (docPathList: string[], sitePathList: string[]) => string;
getId?: (docPathList: string[], sitePathList: string[]) => string;
getImageUrl?: (docPathList: string[], sitePathList: string[]) => string;
Expand All @@ -13,16 +14,17 @@ export type DirectoryIndexerOpts = {
};

export class DirectoryIndexer {
private parallelism = 25;
private rootDir: string;
private urlBase?: string;
private readonly batchSize: number;
private readonly rootDir: string;
private readonly urlBase?: string;
private files: FileDocument[] = [];
constructor(
public catalog: Catalog,
opts: DirectoryIndexerOpts,
) {
this.rootDir = opts.rootDir;
this.urlBase = opts.urlBase;
this.batchSize = opts.batchSize ?? 25;

if (opts.includeFile) {
this.includeFile = opts.includeFile;
Expand Down Expand Up @@ -86,7 +88,7 @@ export class DirectoryIndexer {
url,
});

if (this.files.length >= this.parallelism) {
if (this.files.length >= this.batchSize) {
await this.catalog.upsertDocuments(this.files);
this.files = [];
}
Expand Down
6 changes: 4 additions & 2 deletions indexers/json-indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ import { JSONDocument } from "../document.js";
/* eslint-disable @typescript-eslint/no-explicit-any */

export type JSONIndexerOpts = {
batchSize?: number;
getId?: (document: any) => string;
getUrl?: (document: any) => string | undefined;
getImageUrl?: (document: any) => string | undefined;
};

export class JSONIndexer {
private readonly parallelism = 50;
private readonly batchSize: number;
private readonly getId: (document: any) => string;
private readonly getImageUrl: (document: any) => string | undefined;
private readonly getUrl: (document: any) => string | undefined;
Expand All @@ -22,6 +23,7 @@ export class JSONIndexer {
private documents: any[],
opts?: JSONIndexerOpts,
) {
this.batchSize = opts?.batchSize ?? 50;
this.getId = opts?.getId ?? JSONIndexer.defaultGetId;
this.getUrl = opts?.getUrl ?? JSONIndexer.defaultGetUrl;
this.getImageUrl = opts?.getImageUrl ?? JSONIndexer.defaultGetImageUrl;
Expand Down Expand Up @@ -74,7 +76,7 @@ export class JSONIndexer {

private async indexItems(): Promise<void> {
for (const document of this.documents) {
if (this.batch.length > this.parallelism) {
if (this.batch.length > this.batchSize) {
await this.catalog.upsertDocuments(this.batch);
this.batch = [];
}
Expand Down
13 changes: 9 additions & 4 deletions indexers/shopify-indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,21 @@ type Product = {
export type ShopifyIndexerOpts = {
shopifyBaseUrl: string;
maxItems?: number;
batchSize?: number;
};

export class ShopifyIndexer {
private parallelism = 25;
private readonly batchSize: number;
private documents: Product[] = [];
private deletes: Promise<void>[] = [];
private idsToDelete: string[] = [];
private page = 1;
constructor(
private catalog: Catalog,
private opts: ShopifyIndexerOpts,
) {}
) {
this.batchSize = opts.batchSize ?? 25;
}

private stripHTML(input: string) {
return input.replace(/<\/?[^>]+(>|$)/g, "");
Expand Down Expand Up @@ -96,13 +99,15 @@ export class ShopifyIndexer {
}

private async indexProducts(): Promise<void> {
const indexer = this.catalog.jsonIndexer(this.documents);
const indexer = this.catalog.jsonIndexer(this.documents, {
batchSize: this.batchSize,
});
await indexer.index();
}

private async deleteProducts(): Promise<void> {
for (const id of this.idsToDelete) {
if (this.deletes.length >= this.parallelism) {
if (this.deletes.length >= this.batchSize) {
await Promise.all(this.deletes);
this.deletes = [];
}
Expand Down
9 changes: 7 additions & 2 deletions indexers/tsv-indexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { JSONIndexer } from "./json-indexer.js";
/* eslint-disable @typescript-eslint/no-explicit-any */

export type TSVIndexerOpts = {
batchSize?: number;
getId?: (item: any) => string;
getUrl?: (item: any) => string;
getImageUrl?: (item: any) => string;
Expand All @@ -16,8 +17,9 @@ export class TSVIndexer {
private readonly getId: (document: any) => string;
private readonly getImageUrl: (document: any) => string | undefined;
private readonly getUrl: (document: any) => string | undefined;
private readonly fieldMapping: undefined | { [key: string]: string };
private readonly batchSize?: number;
private documents: any[] = [];
private fieldMapping: undefined | { [key: string]: string };

constructor(
public catalog: Catalog,
Expand All @@ -27,6 +29,7 @@ export class TSVIndexer {
this.getId = opts?.getId ?? JSONIndexer.defaultGetId;
this.getUrl = opts?.getUrl ?? JSONIndexer.defaultGetUrl;
this.getImageUrl = opts?.getImageUrl ?? JSONIndexer.defaultGetImageUrl;
this.batchSize = opts?.batchSize;

if (opts?.fieldMapping) {
this.fieldMapping = opts?.fieldMapping;
Expand Down Expand Up @@ -64,7 +67,9 @@ export class TSVIndexer {
});
}

const jsonIndexer = this.catalog.jsonIndexer(this.documents);
const jsonIndexer = this.catalog.jsonIndexer(this.documents, {
batchSize: this.batchSize,
});

await jsonIndexer.index();
}
Expand Down

0 comments on commit 5396765

Please sign in to comment.