diff --git a/app/ui/package.json b/app/ui/package.json index d5091283..8cdabaf9 100644 --- a/app/ui/package.json +++ b/app/ui/package.json @@ -1,7 +1,7 @@ { "name": "app", "private": true, - "version": "1.8.3", + "version": "1.8.4", "type": "module", "scripts": { "dev": "vite", diff --git a/app/ui/src/App.tsx b/app/ui/src/App.tsx index 773240a3..5bf231f4 100644 --- a/app/ui/src/App.tsx +++ b/app/ui/src/App.tsx @@ -180,6 +180,9 @@ export default function App() { theme={{ algorithm: mode === "dark" ? theme.darkAlgorithm : theme.defaultAlgorithm, + token: { + fontFamily: "Inter", + }, }} > diff --git a/app/ui/src/assets/Inter-Medium.ttf b/app/ui/src/assets/Inter-Medium.ttf new file mode 100644 index 00000000..a01f3777 Binary files /dev/null and b/app/ui/src/assets/Inter-Medium.ttf differ diff --git a/app/ui/src/index.css b/app/ui/src/index.css index 72663d74..9b28db5a 100644 --- a/app/ui/src/index.css +++ b/app/ui/src/index.css @@ -1,4 +1,8 @@ -@import url("https://fonts.googleapis.com/css2?family=Inter:wght@500&display=swap"); + +@font-face { + font-family: "Inter"; + src: url("./assets/Inter-Medium.ttf") format("truetype"); +} * { font-family: "Inter", sans-serif !important; diff --git a/package.json b/package.json index 298daa3a..57967fac 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "dialoqbase", - "version": "1.8.3", + "version": "1.8.4", "description": "Create chatbots with ease", "scripts": { "ui:dev": "pnpm run --filter ui dev", diff --git a/server/package.json b/server/package.json index 1e5bb1df..44207efe 100644 --- a/server/package.json +++ b/server/package.json @@ -41,10 +41,10 @@ "@google-ai/generativelanguage": "^2.0.0", "@grammyjs/files": "^1.0.4", "@huggingface/inference": "1", - "@langchain/anthropic": "^0.1.4", + "@langchain/anthropic": "0.1.4", "@langchain/cohere": "^0.0.6", "@langchain/community": "^0.0.35", - "@langchain/google-genai": "^0.0.10", + "@langchain/google-genai": "^0.0.16", "@langchain/openai": "^0.0.18", "@prisma/client": "^5.9.1", "@slack/bolt": "^3.13.2", diff --git a/server/prisma/seed.ts b/server/prisma/seed.ts index 5f1505e2..1c2968a6 100644 --- a/server/prisma/seed.ts +++ b/server/prisma/seed.ts @@ -354,6 +354,22 @@ const LLMS: { stream_available: true, model_provider: "OpenAI", config: "{}", + }, + { + model_id: "gemini-1.5-flash-dbase", + name: "Gemini 1.5 Flash (Google)", + model_type: "chat", + stream_available: true, + model_provider: "Google", + config: "{}", + }, + { + model_id: "gemini-1.5-pro-dbase", + name: "Gemini 1.5 Pro (Google)", + model_type: "chat", + stream_available: true, + model_provider: "Google", + config: "{}", } ]; diff --git a/server/src/chain/index.ts b/server/src/chain/index.ts index 2ab5c696..cf63f0c1 100644 --- a/server/src/chain/index.ts +++ b/server/src/chain/index.ts @@ -1,4 +1,5 @@ import { BaseLanguageModel } from "@langchain/core/language_models/base"; +import { BaseChatModel } from "@langchain/core/language_models/chat_models"; import { Document } from "@langchain/core/documents"; import { ChatPromptTemplate, @@ -14,6 +15,7 @@ import { RunnableMap, RunnableSequence, } from "@langchain/core/runnables"; + type RetrievalChainInput = { chat_history: string; question: string; @@ -107,8 +109,8 @@ export const createChain = ({ retriever, response_template, }: { - llm: BaseLanguageModel; - question_llm: BaseLanguageModel; + llm: BaseLanguageModel | BaseChatModel ; + question_llm: BaseLanguageModel | BaseChatModel; retriever: Runnable; question_template: string; response_template: string; diff --git a/server/src/queue/controllers/crawl.controller.ts b/server/src/queue/controllers/crawl.controller.ts index bfb9e16e..494d0058 100644 --- a/server/src/queue/controllers/crawl.controller.ts +++ b/server/src/queue/controllers/crawl.controller.ts @@ -7,7 +7,8 @@ const prisma = new PrismaClient(); export const crawlQueueController = async (source: QSource) => { let maxDepth = source.maxDepth || 1; let maxLinks = source.maxLinks || 1; - const links = Array.from(await crawl(source.content!, maxDepth, 0, maxLinks)); + const data = await crawl(source.content!, maxDepth, maxLinks); + const links = Array.from(data?.links || []); for (const link of links) { const newSource = await prisma.botSource.create({ diff --git a/server/src/utils/crawl.ts b/server/src/utils/crawl.ts index 791e1f28..ee131263 100644 --- a/server/src/utils/crawl.ts +++ b/server/src/utils/crawl.ts @@ -1,85 +1,82 @@ import axios from "axios"; import { load } from "cheerio"; +type CrawlResult = { + links: Set; + errors: Set; +}; + const visitedLinks: Set = new Set(); +const errorLinks: Set = new Set(); +const queuedLinks: Set = new Set(); export const crawl = async ( - link: string, + startUrl: string, maxDepth = 2, - currentDepth = 0, - maxLinks = 20, -): Promise> => { - const parentUrl = new URL(link); - - if (currentDepth > maxDepth || visitedLinks.size >= maxLinks) { - return new Set(); - } - - if (visitedLinks.has(link)) { - return new Set(); - } + maxLinks = 20 +): Promise => { + const queue: { url: string; depth: number }[] = [{ url: startUrl, depth: 0 }]; + const fetchedLinks: Set = new Set(); - visitedLinks.add(link); + while (queue.length > 0 && visitedLinks.size < maxLinks) { + const batch = queue.splice(0, Math.min(queue.length, maxLinks - visitedLinks.size)); + + await Promise.all( + batch.map(async ({ url, depth }) => { + if (visitedLinks.has(url) || depth > maxDepth) { + return; + } - try { - const response = await axios.get(link, { - headers: { - Accept: "text/html", - }, - }); - - const contentType = response.headers["content-type"]; + try { + const response = await axios.get(url, { + headers: { Accept: "text/html" }, + }); - if (!contentType.includes("text/html")) { - console.log(`Skipping ${link} (content type: ${contentType})`); - return new Set(); - } + const contentType = response.headers['content-type']; + if (!contentType || !contentType.includes("text/html")) { + return; + } - const $ = load(response.data); - const links = $("a"); - const fetchedLinks: Set = new Set(); + const $ = load(response.data); - for (let i = 0; i < links.length; i++) { - const href = $(links[i]).attr("href"); + visitedLinks.add(url); + fetchedLinks.add(url); - if (!href) { - continue; - } + $("a").each((_, element) => { + const href = $(element).attr("href"); + if (!href) { + return; + } - let absolute: string; - if (href.startsWith("/")) { - absolute = new URL(href, parentUrl.origin).href; - } else if (!isWebUrl(href)) { - absolute = new URL(href, parentUrl.origin).href; - } else { - absolute = href; - } + const absoluteUrl = normalizeUrl(new URL(href, url).href); + if (isSameDomain(absoluteUrl, startUrl) && !visitedLinks.has(absoluteUrl) && !queuedLinks.has(absoluteUrl)) { + queue.push({ url: absoluteUrl, depth: depth + 1 }); + queuedLinks.add(absoluteUrl); + } + }); + } catch (error: any) { + console.error(`Failed to fetch ${url}:`, error?.message || error); + errorLinks.add(url); + } + }) + ); + } - if (new URL(absolute).host !== parentUrl.host) { - continue; - } + return { links: fetchedLinks, errors: errorLinks }; +}; - const childLinks = await crawl( - absolute, - maxDepth, - currentDepth + 1, - maxLinks, - ); - childLinks.forEach((childLink) => fetchedLinks.add(childLink)); - } - fetchedLinks.add(link); - return fetchedLinks; - } catch (error: any) { - console.log(`Error crawling ${link}: ${error?.message}`); - return new Set(); - } +const isSameDomain = (url1: string, url2: string): boolean => { + const { hostname: host1 } = new URL(url1); + const { hostname: host2 } = new URL(url2); + return host1 === host2; }; -function isWebUrl(url: string): boolean { +const normalizeUrl = (url: string): string => { try { - new URL(url); - return true; + const urlObj = new URL(url); + urlObj.hash = ''; + return urlObj.href; } catch (error) { - return false; + return url; } -} +}; diff --git a/server/src/utils/models.ts b/server/src/utils/models.ts index ee1fb64e..899d1425 100644 --- a/server/src/utils/models.ts +++ b/server/src/utils/models.ts @@ -33,7 +33,7 @@ export const chatModelProvider = ( modelName: modelName, temperature: temperature, ...otherFields, - }); + }) as any; case "google-bison": return new ChatGooglePaLM({ temperature: temperature, @@ -84,7 +84,7 @@ export const chatModelProvider = ( maxOutputTokens: 2048, apiKey: process.env.GOOGLE_API_KEY, ...otherFields, - }); + }) as any case "ollama": return new ChatOllama({ baseUrl: otherFields.baseURL, diff --git a/server/yarn.lock b/server/yarn.lock index 24dcfdb2..d6cfb974 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -559,10 +559,10 @@ dependencies: google-gax "^4.0.3" -"@google/generative-ai@^0.1.3": - version "0.1.3" - resolved "https://registry.yarnpkg.com/@google/generative-ai/-/generative-ai-0.1.3.tgz#8e529d4d86c85b64d297b4abf1a653d613a09a9f" - integrity sha512-Cm4uJX1sKarpm1mje/MiOIinM7zdUUrQp/5/qGPAgznbdd/B9zup5ehT6c1qGqycFcSopTA1J1HpqHS5kJR8hQ== +"@google/generative-ai@^0.7.0": + version "0.7.1" + resolved "https://registry.yarnpkg.com/@google/generative-ai/-/generative-ai-0.7.1.tgz#eb187c75080c0706245699dbc06816c830d8c6a7" + integrity sha512-WTjMLLYL/xfA5BW6xAycRPiAX7FNHKAxrid/ayqC1QMam0KAK0NbMeS9Lubw80gVg5xFMLE+H7pw4wdNzTOlxw== "@grammyjs/files@^1.0.4": version "1.0.4" @@ -707,7 +707,7 @@ "@jridgewell/resolve-uri" "3.1.0" "@jridgewell/sourcemap-codec" "1.4.14" -"@langchain/anthropic@^0.1.4": +"@langchain/anthropic@0.1.4": version "0.1.4" resolved "https://registry.yarnpkg.com/@langchain/anthropic/-/anthropic-0.1.4.tgz#49c2e4625860baea0b9b5035c4c7e93a81bed704" integrity sha512-4i25R0dHx+8N7ofI0NGE02LKG9UkhRiAjFS5iNbRcByCSIoovAuTBvdEqpwbDnqn+NkORnP/Wyw3tqFeMtMgYA== @@ -738,7 +738,7 @@ uuid "^9.0.0" zod "^3.22.3" -"@langchain/core@0.1.43", "@langchain/core@~0.1", "@langchain/core@~0.1.36", "@langchain/core@~0.1.41", "@langchain/core@~0.1.5": +"@langchain/core@0.1.43", "@langchain/core@>0.1.5 <0.3.0", "@langchain/core@~0.1", "@langchain/core@~0.1.36", "@langchain/core@~0.1.41": version "0.1.43" resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.1.43.tgz#2d0af42817f8d431bba5252b2ff667a9cb3a25e5" integrity sha512-owE+UU38e4TsUq5yoaKCF+ag6u0ppwgdaqEt2Q57pdcr9nEcy8/PgTunxB10Vksq4fTJgnwWEYf/wMGZnFlRow== @@ -755,13 +755,14 @@ zod "^3.22.4" zod-to-json-schema "^3.22.3" -"@langchain/google-genai@^0.0.10": - version "0.0.10" - resolved "https://registry.yarnpkg.com/@langchain/google-genai/-/google-genai-0.0.10.tgz#05459e668cd018f2e4b0fb639083014151b0ef08" - integrity sha512-neFuCoMew9t8IYM5srh6RVUFQsZxqPtAFVJ0mWtZqHXtb627MECs5FYr+xw1ptPKSbhIAN5H8sgdObqes4bN3A== +"@langchain/google-genai@^0.0.16": + version "0.0.16" + resolved "https://registry.yarnpkg.com/@langchain/google-genai/-/google-genai-0.0.16.tgz#aa1c580b27110f03ce9c5f896a3957419ba95489" + integrity sha512-aUHEeY7sTwxNqj7L5scvnOhNLOKPVSvf7HR6p1Y3M7BPyU63fXP7faB+qyuHmibtKU8pj+ApoXPpjRflYKSv4w== dependencies: - "@google/generative-ai" "^0.1.3" - "@langchain/core" "~0.1.5" + "@google/generative-ai" "^0.7.0" + "@langchain/core" ">0.1.5 <0.3.0" + zod-to-json-schema "^3.22.4" "@langchain/openai@^0.0.18", "@langchain/openai@~0.0.14": version "0.0.18" @@ -2996,9 +2997,9 @@ fast-uri@^2.0.0, fast-uri@^2.1.0: integrity sha512-eel5UKGn369gGEWOqBShmFJWfq/xSJvsgDzgLYC845GneayWvXBf0lJCBn5qTABfewy1ZDPoaR5OZCP+kssfuw== fast-xml-parser@^4.3.5: - version "4.3.5" - resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.3.5.tgz#e2f2a2ae8377e9c3dc321b151e58f420ca7e5ccc" - integrity sha512-sWvP1Pl8H03B8oFJpFR3HE31HUfwtX7Rlf9BNsvdpujD4n7WMhfmu8h9wOV2u+c1k0ZilTADhPqypzx2J690ZQ== + version "4.4.0" + resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.4.0.tgz#341cc98de71e9ba9e651a67f41f1752d1441a501" + integrity sha512-kLY3jFlwIYwBNDojclKsNAC12sfD6NwW74QB2CoNGPvtVxjliYehVunB3HYyNi+n4Tt1dAcgwYvmKF/Z18flqg== dependencies: strnum "^1.0.5"