Skip to content

Commit

Permalink
Merge pull request #41 from inaridiy/fix/cli
Browse files Browse the repository at this point in the history
Fix/cli
  • Loading branch information
inaridiy authored Oct 2, 2024
2 parents cb9953f + 5e82341 commit f3f493c
Show file tree
Hide file tree
Showing 9 changed files with 185 additions and 122 deletions.
5 changes: 5 additions & 0 deletions .changeset/lazy-actors-carry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"webforai": patch
---

Fix with { type : "json" } Error
2 changes: 1 addition & 1 deletion packages/webforai/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ const args = arg({
const isWatch = args["--watch"];

const entryPoints = glob.sync("./src/**/*.ts", {
ignore: ["./src/**/*.test.ts"],
ignore: ["./src/**/*.test.ts", "./src/cli/**/*.ts"],
});

const addExtension = (extension = ".js", fileExtension = ".ts"): Plugin => ({
Expand Down
5 changes: 0 additions & 5 deletions packages/webforai/example.md

This file was deleted.

38 changes: 9 additions & 29 deletions packages/webforai/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,7 @@
"description": "A library that provides a web interface for AI",
"author": "inaridiy",
"license": "MIT",
"keywords": [
"web",
"ai",
"html",
"html2md",
"markdown",
"mdast",
"hast"
],
"keywords": ["web", "ai", "html", "html2md", "markdown", "mdast", "hast"],
"repository": {
"type": "git",
"url": "https://github.com/inaridiy/webforai.git"
Expand All @@ -26,10 +18,7 @@
"prerelease": "pnpm build",
"release": "np"
},
"files": [
"dist",
"!dist/types/**/*.js"
],
"files": ["dist", "!dist/types/**/*.js"],
"main": "dist/cjs/index.js",
"type": "module",
"module": "dist/index.js",
Expand Down Expand Up @@ -69,21 +58,11 @@
},
"typesVersions": {
"*": {
"types": [
"./dist/types/index.d.ts"
],
"loaders/playwright": [
"./dist/types/loaders/playwright.d.ts"
],
"loaders/cf-puppeteer": [
"./dist/types/loaders/cf-puppeteer.d.ts"
],
"loaders/fetch": [
"./dist/types/loaders/fetch.d.ts"
],
"loaders/puppeteer": [
"./dist/types/loaders/puppeteer.d.ts"
]
"types": ["./dist/types/index.d.ts"],
"loaders/playwright": ["./dist/types/loaders/playwright.d.ts"],
"loaders/cf-puppeteer": ["./dist/types/loaders/cf-puppeteer.d.ts"],
"loaders/fetch": ["./dist/types/loaders/fetch.d.ts"],
"loaders/puppeteer": ["./dist/types/loaders/puppeteer.d.ts"]
}
},
"peerDependencies": {
Expand All @@ -104,6 +83,7 @@
},
"dependencies": {
"@clack/prompts": "^0.7.0",
"boxen": "^8.0.1",
"commander": "^12.1.0",
"hast-util-from-html": "^2.0.3",
"hast-util-select": "^6.0.2",
Expand All @@ -123,12 +103,12 @@
"devDependencies": {
"@cloudflare/puppeteer": "^0.0.6",
"@tsconfig/recommended": "^1.0.3",
"fastest-levenshtein": "^1.0.16",
"@types/hast": "^3.0.2",
"@types/mdast": "^4.0.2",
"@types/node": "^20.14.10",
"arg": "^5.0.2",
"esbuild": "^0.19.11",
"fastest-levenshtein": "^1.0.16",
"glob": "^10.3.10",
"ncp": "^2.0.0",
"np": "^9.2.0",
Expand Down
4 changes: 0 additions & 4 deletions packages/webforai/src/cli/bin.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import { intro } from "@clack/prompts";
import { program } from "commander";
import pc from "picocolors";
import packageInfo from "../../package.json";
import { webforaiCommand } from "./commands/webforai";
import { LOADERS, MODES } from "./constants";
Expand All @@ -18,6 +16,4 @@ program
.option("-d, --debug", "output extra debugging information")
.action(webforaiCommand);

intro(pc.inverse(`webforai CLI version ${packageInfo.version}`));

program.parse();
14 changes: 7 additions & 7 deletions packages/webforai/src/cli/commands/webforai/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import fs from "node:fs/promises";
import path from "node:path";
import { intro, outro, spinner } from "@clack/prompts";
import { intro, log, outro, spinner } from "@clack/prompts";
import pc from "picocolors";
import packageInfo from "../../../../package.json";
import { htmlToMarkdown } from "../../../html-to-markdown";
Expand All @@ -21,16 +21,16 @@ export const webforaiCommand = async (
intro(pc.bold(pc.green(`webforai CLI version ${packageInfo.version}`)));

const sourcePath = initialPath ?? (await inputSourcePath());
options.debug && console.debug(`sourcePath: ${sourcePath}`);
options.debug && log.info(`sourcePath: ${sourcePath}`);

const loader = isUrl(sourcePath) ? options.loader ?? (await selectLoader()) : "local";
options.debug && console.debug(`loader: ${loader}`);
options.debug && log.info(`loader: ${loader}`);

const outputPath = options.output ?? (await inputOutputPath(sourcePath));
options.debug && console.debug(`outputPath: ${outputPath}`);
options.debug && log.info(`outputPath: ${outputPath}`);

const mode = options.mode ?? (await selectExtractMode());
options.debug && console.debug(`mode: ${mode}`);
options.debug && log.info(`mode: ${mode}`);

let html: string;
const s = spinner();
Expand All @@ -43,13 +43,13 @@ export const webforaiCommand = async (
console.error(error);
process.exit(1);
}
options.debug && console.debug(`html: ${html}`);
options.debug && log.info(`html: ${html}`);

const markdown = htmlToMarkdown(html, {
baseUrl: isUrl(sourcePath) ? sourcePath : undefined,
...(mode === "ai" ? aiModeOptions : readabilityModeOptions),
});
options.debug && console.debug(`markdown: ${markdown}`);
options.debug && log.info(`markdown: ${markdown}`);

const directory = path.dirname(outputPath);
const isDirectoryExists = await fs.stat(directory).then((stat) => stat.isDirectory());
Expand Down
53 changes: 33 additions & 20 deletions packages/webforai/src/cli/commands/webforai/loadHtml.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import fs from "node:fs/promises";
import { log } from "@clack/prompts";
import boxen from "boxen";
import pc from "picocolors";
import { chromium } from "playwright-core";
import playwrightPackageJson from "playwright-core/package.json";
import { loadHtml as loadHtmlByFetch } from "../../../loaders/fetch";
import { loadHtml as loadHtmlByPlaywright } from "../../../loaders/playwright";

Expand All @@ -11,49 +12,61 @@ const checkPlaywrightAvailable = async () => {
return !!isAvailable;
};

const getPlaywrightVersion = async () => {
const path = await import.meta.resolve("playwright-core/package.json");
const pwPackageJson = await fs
.readFile(path.replace("file://", ""), "utf-8")
.then((res) => JSON.parse(res.toString()))
.catch(() => null);
return pwPackageJson?.version;
};

export const loadHtml = async (sourcePath: string, loader: string, options: { debug?: boolean }) => {
if (loader === "local") {
options.debug && console.debug(`Loading HTML from local file: ${sourcePath}`);
options.debug && log.info(`Loading HTML from local file: ${sourcePath}`);
const content = await fs.readFile(sourcePath, "utf-8");
options.debug && console.debug(`HTML loaded: ${content.slice(0, 100)}`);
options.debug && log.info(`HTML loaded: ${content.slice(0, 100)}`);
return content;
}

if (loader === "fetch") {
options.debug && console.debug(`Loading HTML from URL: ${sourcePath}`);
options.debug && log.info(`Loading HTML from URL: ${sourcePath}`);
const content = await loadHtmlByFetch(sourcePath);
options.debug && console.debug(`HTML loaded: ${content.slice(0, 100)}`);
options.debug && log.info(`HTML loaded: ${content.slice(0, 100)}`);
return content;
}

if (loader === "playwright") {
options.debug && console.debug(`Loading HTML from playwright: ${sourcePath}`);
options.debug && log.info(`Loading HTML from playwright: ${sourcePath}`);
const isPlaywrightAvailable = await checkPlaywrightAvailable();
options.debug && console.debug(`Playwright available: ${isPlaywrightAvailable}`);
options.debug && log.info(`Playwright available: ${isPlaywrightAvailable}`);

const pwVersion = await getPlaywrightVersion();

if (!isPlaywrightAvailable) {
const message = [
"To use playwright loader, you need to install playwright.",
"You can install it by running:",
pc.bold("Error: Playwright is not available"),
"",
` ${pc.bold(`npx playwright@${playwrightPackageJson.version} install chromium`)} `,
"To use the Playwright loader, please install Playwright by running:",
"",
"Hint1: If you get warning like this:",
"WARNING: It looks like you are running 'npx playwright install' without first installing your project's dependencies. ",
`${pc.bold("Ignore this warning.")}`,
` npx playwright@${pwVersion} install chromium`,
"",
"Hint2: If you get message like this:",
"Host system is missing dependencies to run browsers.",
"You should install dependencies by running:",
"Hint 1: If you receive a warning like this:",
` "WARNING: It looks like you are running 'npx playwright install' without first installing your project's dependencies."`,
"You can safely ignore this warning.",
"",
` ${pc.bold(`sudo npx playwright@${playwrightPackageJson.version} install-deps`)} `,
"Hint 2: If you encounter the following message:",
` "Host system is missing dependencies to run browsers."`,
"You should install the necessary dependencies by executing:",
"",
pc.gray("Note: Good luck with that."),
` sudo npx playwright@${pwVersion} install-deps`,
];
console.info(message.join("\n"));

log.error(boxen(message.join("\n"), { padding: 1, borderStyle: "round" }));
throw new Error("Playwright is not available");
}
const content = await loadHtmlByPlaywright(sourcePath);
options.debug && console.debug(`HTML loaded: ${content.slice(0, 100)}`);
options.debug && log.info(`HTML loaded: ${content.slice(0, 100)}`);
return content;
}

Expand Down
Loading

0 comments on commit f3f493c

Please sign in to comment.