Skip to content

Commit

Permalink
Merge pull request #272 from n4ze3m/next
Browse files Browse the repository at this point in the history
v1.8.5
  • Loading branch information
n4ze3m authored Jun 18, 2024
2 parents d7df246 + 83e634f commit 881e1c8
Show file tree
Hide file tree
Showing 20 changed files with 1,034 additions and 116 deletions.
15 changes: 14 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,28 @@ RUN pnpm install
RUN pnpm build

FROM node:18-slim

WORKDIR /app

ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true

RUN apt-get update && apt-get install gnupg wget -y && \
wget --quiet --output-document=- https://dl-ssl.google.com/linux/linux_signing_key.pub | gpg --dearmor > /etc/apt/trusted.gpg.d/google-archive.gpg && \
sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' && \
apt-get update && \
apt-get install google-chrome-stable -y --no-install-recommends && \
rm -rf /var/lib/apt/lists/*

RUN yarn config set registry https://registry.npmjs.org/
RUN yarn config set network-timeout 1200000

RUN apt update && apt -y install --no-install-recommends ca-certificates git git-lfs openssh-client curl jq cmake sqlite3 openssl psmisc python3


RUN apt -y install g++ make
# RUN npm install -g node-gyp

RUN apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/{apt,dpkg,cache,log}/

RUN npm --no-update-notifier --no-fund --global install pnpm
# Copy API
COPY --from=server /app/dist/ .
Expand Down
2 changes: 1 addition & 1 deletion app/ui/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "app",
"private": true,
"version": "1.8.4",
"version": "1.8.5",
"type": "module",
"scripts": {
"dev": "vite",
Expand Down
7 changes: 7 additions & 0 deletions app/ui/src/routes/settings/application.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,13 @@ export default function SettingsApplicationRoot() {
>
<Switch />
</Form.Item>
<Form.Item
label="Enhanced Website loader"
name="usePuppeteerFetch"
valuePropName="checked"
>
<Switch />
</Form.Item>
</div>
<div className="bg-gray-50 border-x border-b rounded-b-md rounded-x-md px-4 py-3 text-right sm:px-6 dark:bg-[#141414] dark:border-gray-600">
<button
Expand Down
10 changes: 5 additions & 5 deletions app/widget/src/hooks/useMessage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ export type BotResponse = {
};

const parsesStreamingResponse = (text: string) => {
// event: chunk or result\ndata: been or object\n\n
// console.log(`text: ${text}`);
const REGEX = /event: (.+)\ndata: (.+)/g;
const matches = text.matchAll(REGEX);
const result = [];
Expand Down Expand Up @@ -149,18 +147,20 @@ export const useMessage = () => {
if (type === "chunk") {
const jsonMessage = JSON.parse(message);
if (count === 0) {
newMessage[appendingIndex].message = jsonMessage.message;
newMessage[appendingIndex].message = jsonMessage.message + "▋";
setMessages(newMessage);
localStorage.setItem("DS_MESSAGE", JSON.stringify(newMessage));
} else {
newMessage[appendingIndex].message += jsonMessage.message;
newMessage[appendingIndex].message =
newMessage[appendingIndex].message.slice(0, -1) +
jsonMessage.message +
"▋";
setMessages(newMessage);
localStorage.setItem("DS_MESSAGE", JSON.stringify(newMessage));
}
count++;
} else if (type === "result") {
const responseData = JSON.parse(message) as BotResponse;
console.log(responseData);
newMessage[appendingIndex].message = responseData.bot.text;
newMessage[appendingIndex].sources =
responseData.bot.sourceDocuments;
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "dialoqbase",
"version": "1.8.4",
"version": "1.8.5",
"description": "Create chatbots with ease",
"scripts": {
"ui:dev": "pnpm run --filter ui dev",
Expand Down
6 changes: 6 additions & 0 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"@langchain/community": "^0.0.35",
"@langchain/google-genai": "^0.0.16",
"@langchain/openai": "^0.0.18",
"@mozilla/readability": "^0.5.0",
"@prisma/client": "^5.9.1",
"@slack/bolt": "^3.13.2",
"@supabase/supabase-js": "^2.24.0",
Expand Down Expand Up @@ -77,6 +78,11 @@
"pdf-parse": "^1.1.1",
"pdfjs-dist": "^3.7.107",
"pubsub-js": "^1.9.4",
"puppeteer": "^22.11.0",
"puppeteer-extra": "^3.3.6",
"puppeteer-extra-plugin-block-resources": "^2.4.3",
"puppeteer-extra-plugin-page-proxy": "^2.0.0",
"puppeteer-extra-plugin-stealth": "^2.11.2",
"replicate": "^0.26.0",
"sitemapper": "^3.2.6",
"ts-node": "^10.9.1",
Expand Down
2 changes: 2 additions & 0 deletions server/prisma/migrations/q_12_4/migration.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE "DialoqbaseSettings" ADD COLUMN "usePuppeteerFetch" BOOLEAN DEFAULT false;
1 change: 1 addition & 0 deletions server/prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ model DialoqbaseSettings {
defaultChatModel String @default("gpt-3.5-turbo-dbase")
defaultEmbeddingModel String @default("dialoqbase_eb_text-embedding-ada-002")
ollamaURL String? @default("http://host.docker.internal:11434")
usePuppeteerFetch Boolean? @default(false)
}

model BotIntegration {
Expand Down
118 changes: 59 additions & 59 deletions server/prisma/seed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -488,70 +488,70 @@ const removeTensorflowSupport = async () => {
});
};

const replaceOldEmbeddings = async () => {
await prisma.bot.updateMany({
where: {
embedding: "openai",
},
data: {
embedding: "dialoqbase_eb_text-embedding-ada-002",
},
});
// const replaceOldEmbeddings = async () => {
// await prisma.bot.updateMany({
// where: {
// embedding: "openai",
// },
// data: {
// embedding: "dialoqbase_eb_text-embedding-ada-002",
// },
// });

await prisma.bot.updateMany({
where: {
embedding: "cohere",
},
data: {
embedding: "dialoqbase_eb_small",
},
});
// await prisma.bot.updateMany({
// where: {
// embedding: "cohere",
// },
// data: {
// embedding: "dialoqbase_eb_small",
// },
// });

await prisma.bot.updateMany({
where: {
embedding: "transformer",
},
data: {
embedding: "dialoqbase_eb_Xenova/all-MiniLM-L6-v2",
},
});
// await prisma.bot.updateMany({
// where: {
// embedding: "transformer",
// },
// data: {
// embedding: "dialoqbase_eb_Xenova/all-MiniLM-L6-v2",
// },
// });

await prisma.bot.updateMany({
where: {
embedding: "google-gecko",
},
data: {
embedding: "dialoqbase_eb_models/embedding-gecko-001",
},
});
// await prisma.bot.updateMany({
// where: {
// embedding: "google-gecko",
// },
// data: {
// embedding: "dialoqbase_eb_models/embedding-gecko-001",
// },
// });

await prisma.bot.updateMany({
where: {
embedding: "jina-api",
},
data: {
embedding: "dialoqbase_eb_jina-embeddings-v2-base-en",
},
});
// await prisma.bot.updateMany({
// where: {
// embedding: "jina-api",
// },
// data: {
// embedding: "dialoqbase_eb_jina-embeddings-v2-base-en",
// },
// });

await prisma.bot.updateMany({
where: {
embedding: "jina",
},
data: {
embedding: "dialoqbase_eb_Xenova/jina-embeddings-v2-small-en",
},
});
// await prisma.bot.updateMany({
// where: {
// embedding: "jina",
// },
// data: {
// embedding: "dialoqbase_eb_Xenova/jina-embeddings-v2-small-en",
// },
// });

await prisma.bot.updateMany({
where: {
embedding: "google",
},
data: {
embedding: "dialoqbase_eb_embedding-001",
},
});
};
// await prisma.bot.updateMany({
// where: {
// embedding: "google",
// },
// data: {
// embedding: "dialoqbase_eb_embedding-001",
// },
// });
// };

const updateGeminiStreamingToTrue = async () => {
await prisma.dialoqbaseModels.update({
Expand All @@ -567,7 +567,7 @@ const updateGeminiStreamingToTrue = async () => {
const main = async () => {
await newModels();
await removeTensorflowSupport();
await replaceOldEmbeddings();
// await replaceOldEmbeddings();
await updateGeminiStreamingToTrue();
};

Expand Down
1 change: 1 addition & 0 deletions server/src/handlers/api/v1/admin/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ export type UpdateDialoqbaseSettingsRequest = {
noOfBotsPerUser: number;
allowUserToCreateBots: boolean;
allowUserToRegister: boolean;
usePuppeteerFetch: boolean;
};
};

Expand Down
20 changes: 17 additions & 3 deletions server/src/loader/web.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,37 @@
import { BaseDocumentLoader } from "langchain/document_loaders/base";
import { Document } from "langchain/document";
import { websiteParser } from "../utils/website-parser";
import puppeteerFetch, { closePuppeteer } from "../utils/puppeteer-fetch";

export interface WebLoaderParams {
url: string;
usePuppeteerFetch?: boolean;
doNotClosePuppeteer?: boolean;
}

export class DialoqbaseWebLoader
extends BaseDocumentLoader
implements WebLoaderParams
{
implements WebLoaderParams {
url: string;
usePuppeteerFetch?: boolean;
doNotClosePuppeteer?: boolean;

constructor({ url }: WebLoaderParams) {
constructor({ url, usePuppeteerFetch, doNotClosePuppeteer }: WebLoaderParams) {
super();
this.url = url;
this.usePuppeteerFetch = usePuppeteerFetch;
this.doNotClosePuppeteer = doNotClosePuppeteer;
}

async _fetchHTML(): Promise<string> {
if (this.usePuppeteerFetch) {
console.log(`[DialoqbaseWebLoader] Using puppeteer to fetch ${this.url}`)
const response = await puppeteerFetch(this.url, true);
if (!this.doNotClosePuppeteer) {
await closePuppeteer();
}
return response;
}
const response = await fetch(this.url);
return await response.text();
}
Expand Down
7 changes: 6 additions & 1 deletion server/src/queue/controllers/crawl.controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ import { PrismaClient } from "@prisma/client";
import { QSource } from "../type";
import { crawl } from "../../utils/crawl";
import { websiteQueueController } from "./website.controller";
import { closePuppeteer } from "../../utils/puppeteer-fetch";
const prisma = new PrismaClient();

export const crawlQueueController = async (source: QSource) => {
let maxDepth = source.maxDepth || 1;
let maxLinks = source.maxLinks || 1;
const data = await crawl(source.content!, maxDepth, maxLinks);
const data = await crawl(source.content!, maxDepth, maxLinks, source.usePuppeteerFetch);
const links = Array.from(data?.links || []);

for (const link of links) {
Expand All @@ -27,6 +28,8 @@ export const crawlQueueController = async (source: QSource) => {
embedding: source.embedding,
chunkOverlap: source.chunkOverlap,
chunkSize: source.chunkSize,
usePuppeteerFetch: source.usePuppeteerFetch,
doNotClosePuppeteer: true,
},
prisma
);
Expand All @@ -41,4 +44,6 @@ export const crawlQueueController = async (source: QSource) => {
},
});
}

await closePuppeteer()
};
13 changes: 10 additions & 3 deletions server/src/queue/controllers/website.controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@ export const websiteQueueController = async (
source: QSource,
prisma: PrismaClient
) => {
const response = await axios.get(source.content!);

const type = response.headers["content-type"];
let type = "text/html";

try {
const response = await axios.get(source.content!);
type = response.headers["content-type"];
} catch (error) {
console.error(`[websiteQueueController] Error fetching ${source.content}`);
}

console.log("website type is", type);

if (type.includes("application/pdf")) {
const response = await axios.get(source.content!, {
Expand Down Expand Up @@ -68,6 +73,8 @@ export const websiteQueueController = async (
} else {
const loader = new DialoqbaseWebLoader({
url: source.content!,
usePuppeteerFetch: source.usePuppeteerFetch,
doNotClosePuppeteer: source.doNotClosePuppeteer,
});
docs = await loader.load();
}
Expand Down
3 changes: 2 additions & 1 deletion server/src/queue/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ export default async function queueHandler(job: SandboxedJob) {
status: "PROCESSING",
},
});
const { chunkOverlap, chunkSize } = await getRagSettings(prisma);
const { chunkOverlap, chunkSize , usePuppeteerFetch} = await getRagSettings(prisma);
source.chunkOverlap = chunkOverlap;
source.chunkSize = chunkSize;
source.usePuppeteerFetch = usePuppeteerFetch;
switch (source.type.toLowerCase()) {
case "website":
await websiteQueueController(source, prisma);
Expand Down
2 changes: 2 additions & 0 deletions server/src/queue/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ export interface QSource extends BotSource {
maxLinks?: number;
chunkSize: number;
chunkOverlap: number;
usePuppeteerFetch?: boolean;
doNotClosePuppeteer?: boolean;
}
Loading

0 comments on commit 881e1c8

Please sign in to comment.