From 98da946e271e4f05311452d596861fc2cf3be0ae Mon Sep 17 00:00:00 2001 From: dipu-bd Date: Tue, 10 Dec 2024 11:47:23 +0400 Subject: [PATCH] Remove duplicate --- sources/multi/wtrlab.py | 7 +++- sources/zh/wtrlab.py | 87 ----------------------------------------- 2 files changed, 6 insertions(+), 88 deletions(-) delete mode 100644 sources/zh/wtrlab.py diff --git a/sources/multi/wtrlab.py b/sources/multi/wtrlab.py index 8a3885ebe..e7d84b2f3 100644 --- a/sources/multi/wtrlab.py +++ b/sources/multi/wtrlab.py @@ -21,7 +21,12 @@ class WtrLab(Crawler): Essentially the same framework as webfic though with some other keys, urls, etc. """ - base_url = ["https://wtr-lab.com/"] + base_url = [ + "https://wtr-lab.com/", + "http://wtr-lab.com", + "https://www.wtr-lab.com", + "http://www.wtr-lab.com", + ] has_manga = False has_mtl = True host = "" diff --git a/sources/zh/wtrlab.py b/sources/zh/wtrlab.py deleted file mode 100644 index 90f49f3d5..000000000 --- a/sources/zh/wtrlab.py +++ /dev/null @@ -1,87 +0,0 @@ -import logging -from typing import Generator, Union - -from bs4 import BeautifulSoup, Tag - -from lncrawl.models import Chapter, Volume -from lncrawl.templates.soup.general import GeneralSoupTemplate - -logger = logging.getLogger(__name__) - - -class WtrLabCrawler(GeneralSoupTemplate): - base_url = ["https://wtr-lab.com/en", - "http://wtr-lab.com/en", - "https://www.wtr-lab.com/en", - "http://www.wtr-lab.com/en", - "https://wtr-lab.com", - "http://wtr-lab.com",] - - def initialize(self) -> None: - logger.info("Initializing WtrLabCrawler") - - def get_novel_soup(self) -> BeautifulSoup: - return self.get_soup(self.novel_url) - - def parse_title(self, soup: BeautifulSoup) -> str: - title_tag = soup.find("a", class_="title") - if not title_tag: - raise ValueError("Title not found on the page.") - return title_tag.text.strip() - - def parse_cover(self, soup: BeautifulSoup) -> str: - cover_tag = soup.find("a", class_="image-wrap").find("img") - if not cover_tag or not cover_tag.get("src"): - raise ValueError("Cover image not found on the page.") - return cover_tag["src"].strip() - - def parse_authors(self, soup: BeautifulSoup) -> Generator[str, None, None]: - author_tag = soup.find("div", class_="author-wrap") - if not author_tag: - raise ValueError("Author information not found on the page.") - author_link = author_tag.find("a") - if author_link: - yield author_link.text.strip() - else: - yield "Unknown Author" - - def parse_chapter_list( - self, soup: BeautifulSoup -) -> Generator[Union[Chapter, Volume], None, None]: - # Locate the chapter list container - chapters_section = soup.find("div", class_="chapter-list") - if not chapters_section: - logger.error("Chapter list not found on the page.") - raise ValueError("Chapter list not found on the page.") - - # Initialize volume - volume_id = 1 - volume = Volume(id=volume_id, title=f"Volume {volume_id}") - yield volume - - # Parse chapters - for chapter_tag in chapters_section.find_all("a", class_="chapter-item"): - chapter_title = chapter_tag.find("span").text.strip() - chapter_url = self.absolute_url(chapter_tag["href"]) - - # Generate Chapter object - chapter = Chapter( - id=len(self.chapters) + 1, - title=chapter_title, - url=chapter_url, - volume=volume_id, - ) - yield chapter - - - def select_chapter_body(self, soup: BeautifulSoup) -> Tag: - chapter_body = soup.find("div", class_="chapter-content") - if not chapter_body: - raise ValueError("Chapter content not found on the page.") - return chapter_body - - def login(self, username_or_email: str, password_or_token: str) -> None: - logger.info("Login method not implemented for WtrLabCrawler") - - def logout(self): - logger.info("Logout method not implemented for WtrLabCrawler")