Skip to content

Commit

Permalink
enhance github loader
Browse files Browse the repository at this point in the history
  • Loading branch information
n4ze3m committed Jul 11, 2024
1 parent 4b65f4a commit fe8ebcc
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 131 deletions.
2 changes: 1 addition & 1 deletion app/ui/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "app",
"private": true,
"version": "1.8.6",
"version": "1.8.7",
"type": "module",
"scripts": {
"dev": "vite",
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "dialoqbase",
"version": "1.8.6",
"version": "1.8.7",
"description": "Create chatbots with ease",
"scripts": {
"ui:dev": "pnpm run --filter ui dev",
Expand Down
89 changes: 44 additions & 45 deletions server/src/handlers/api/v1/bot/bot/delete.handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,19 @@ export const deleteSourceByIdHandler = async (
}
}

await prisma.botDocument.deleteMany({
where: {
botId: bot.id,
sourceId: source_id,
},
});

await prisma.botSource.delete({
where: {
id: source_id,
},
});
await prisma.$transaction([
prisma.botDocument.deleteMany({
where: {
botId: bot.id,
sourceId: source_id,
},
}),
prisma.botSource.delete({
where: {
id: source_id,
},
}),
]);

return {
id: bot.id,
Expand Down Expand Up @@ -100,51 +101,49 @@ export const deleteBotByIdHandler = async (
}
});

await prisma.botIntegration.deleteMany({
where: {
bot_id: bot.id,
},
});

await prisma.botDocument.deleteMany({
where: {
botId: bot.id,
},
});

await prisma.botSource.deleteMany({
where: {
botId: bot.id,
},
});
await prisma.$transaction([
prisma.botIntegration.deleteMany({
where: {
bot_id: bot.id,
},
}),
prisma.botDocument.deleteMany({
where: {
botId: bot.id,
},
}),
prisma.botSource.deleteMany({
where: {
botId: bot.id,
},
}),
]);

const botPlayground = await prisma.botPlayground.findMany({
where: {
botId: bot.id,
},
});

if (botPlayground.length > 0) {
await prisma.botPlaygroundMessage.deleteMany({
await prisma.$transaction([
prisma.botPlaygroundMessage.deleteMany({
where: {
botPlaygroundId: {
in: botPlayground.map((bp) => bp.id),
},
},
});
}

await prisma.botPlayground.deleteMany({
where: {
botId: bot.id,
},
});

await prisma.bot.delete({
where: {
id: bot.id,
},
});
}),
prisma.botPlayground.deleteMany({
where: {
botId: bot.id,
},
}),
prisma.bot.delete({
where: {
id: bot.id,
},
}),
]);

return {
message: "Bot deleted",
Expand Down
166 changes: 83 additions & 83 deletions server/src/loader/github.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
import { BaseDocumentLoader } from "langchain/document_loaders/base";
import { Document } from "langchain/document";
import { execSync } from "node:child_process";
import { exec } from "node:child_process";
import * as fs from "fs/promises";
import * as path from "path";

export interface GithubRepoLoaderParams {
branch: string;
url: string;
is_private: boolean;
isPrivate: boolean;
}

export class DialoqbaseGithub extends BaseDocumentLoader
implements GithubRepoLoaderParams {
export class DialoqbaseGithub
extends BaseDocumentLoader
implements GithubRepoLoaderParams
{
branch: string;
url: string;
is_private: boolean;
output_folder = "./uploads/";
ignore_folders = ["node_modules", ".git", ".github"];
ignore_files = [
isPrivate: boolean;
private readonly outputFolder = "./uploads/";
private readonly ignoreFolders = new Set(["node_modules", ".git", ".github"]);
private readonly ignoreFiles = new Set([
".gitignore",
".gitattributes",
"package-lock.json",
Expand All @@ -30,108 +33,105 @@ export class DialoqbaseGithub extends BaseDocumentLoader
".env",
".env.local",
".eslintignore",
];

constructor(
{
branch,
url,
is_private,
}: GithubRepoLoaderParams,
) {
]);

constructor({ branch, url, isPrivate }: GithubRepoLoaderParams) {
super();
this.branch = branch;
this.url = url;
this.is_private = is_private;
this.isPrivate = isPrivate;
}

async load(): Promise<Document<Record<string, any>>[]> {
const path = await this._cloneRepo();
const data = await this._repoFilesData(
path,
const repoPath = await this.cloneRepo();
const filesData = await this.getRepoFilesData(repoPath);

return filesData.map(
({ path, content }) =>
new Document({
pageContent: content,
metadata: { path },
})
);
}

const docs = data.map((file) => {
const doc = new Document<Record<string, any>>({
pageContent: file.content,
metadata: {
path: file.path,
},
});
private async cloneRepo(): Promise<string> {
const sanitizedUrl = this.url.replace(/^https?:\/\//, "");
const repoUrl = this.isPrivate
? `https://${process.env.GITHUB_ACCESS_TOKEN}@${sanitizedUrl}`
: `https://${sanitizedUrl}`;
const outputPath = path.join(
this.outputFolder,
`${sanitizedUrl.replace("/", "-")}-${this.branch}`
);

return doc;
});
await this.deleteFolder(outputPath);

const command = `git clone --single-branch --branch ${this.branch} ${repoUrl} ${outputPath}`;
await this.execCommand(command);

return docs;
return outputPath;
}

private async is_folder(path: string) {
private async deleteFolder(folderPath: string): Promise<void> {
try {
await fs.access(path);
return true;
await fs.access(folderPath);
await fs.rm(folderPath, { recursive: true });
} catch (error) {
return false;
console.error(`Error: ${error.message}`);
}
}

private async deleteFolder(path: string) {
const is_folder = await this.is_folder(path);
if (!is_folder) {
return;
}
await fs.rm(path, { recursive: true });
private async execCommand(command: string): Promise<void> {
return new Promise((resolve, reject) => {
exec(command, (error, stdout, stderr) => {
if (error) {
console.error(`Error: ${error.message}`);
return reject(error);
}
if (stderr) {
console.error(`stderr: ${stderr}`);
}
resolve();
});
});
}

private async _cloneRepo() {
const url = this.url.replace("https://", "").replace("http://", "");
const repo_url = this.is_private
? `https://${process.env.GITHUB_ACCESS_TOKEN}@${url}`
: `https://${url}`;
const output = `${this.output_folder}${url.split("/")[1]}-${
url.split("/")[2]
}-${this.branch}`;
await this.deleteFolder(output);
const command =
`git clone --single-branch --branch ${this.branch} ${repo_url} ${output}`;
await Promise.resolve(execSync(command, { stdio: "inherit" }));
return output;
private async getRepoFilesData(
dir: string
): Promise<{ path: string; content: string }[]> {
const files = await this.readFiles(dir);
return Promise.all(
files.map(async (file) => ({
path: file,
content: await fs.readFile(file, "utf-8"),
}))
);
}

private async _readFiles(
private async readFiles(
dir: string,
filelist: string[] = [],
fileList: string[] = []
): Promise<string[]> {
const files = await fs.readdir(dir);
for (const file of files) {
const filepath = `${dir}/${file}`;
const stat = await fs.stat(filepath);
if (this.ignore_folders.includes(file) || this.ignore_files.includes(file)) {
const entries = await fs.readdir(dir, { withFileTypes: true });

for (const entry of entries) {
const fullPath = path.join(dir, entry.name);

if (
this.ignoreFolders.has(entry.name) ||
this.ignoreFiles.has(entry.name)
) {
continue;
}
if (stat.isDirectory()) {
filelist = await this._readFiles(filepath, filelist);

if (entry.isDirectory()) {
await this.readFiles(fullPath, fileList);
} else {
filelist.push(filepath);
fileList.push(fullPath);
}
}
return filelist;
}

private async _readFile(path: string) {
const content = await fs.readFile(path, "utf-8");
return content;
}

private async _repoFilesData(dir: string) {
const files = await this._readFiles(dir);
const data = await Promise.all(
files.map(async (file) => {
const content = await this._readFile(file);
return {
path: file,
content,
};
}),
);
return data;
return fileList;
}
}
4 changes: 3 additions & 1 deletion server/src/queue/controllers/github.controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ export const githubQueueController = async (
const loader = new DialoqbaseGithub({
branch: options.branch,
url: source.content!,
is_private: options.is_private,
isPrivate: options.is_private,
});
const docs = await loader.load();

console.log("github", docs.length)

const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: source.chunkSize,
chunkOverlap: source.chunkOverlap,
Expand Down

0 comments on commit fe8ebcc

Please sign in to comment.