Skip to content

Commit

Permalink
add method to use package as a cli command
Browse files Browse the repository at this point in the history
  • Loading branch information
RuslanUC committed Sep 22, 2023
1 parent 3a690cb commit ea36767
Show file tree
Hide file tree
Showing 10 changed files with 773 additions and 23 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.idea
__pycache__
__pycache__
dist
54 changes: 53 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,53 @@
## Telegram export tool.
## Telegram export tool.

### Installation
```shell
$ pip install t-export
```

### Usage
```shell
Usage: python -m texport [OPTIONS]

Options:
--api-id INTEGER Telegram api id. Saved in
~/.texport/config.json file.
--api-hash TEXT Telegram api hash. Saved in
~/.texport/config.json file.
-s, --session-name TEXT Pyrogram session name or path to existing file.
Saved in ~/.texport/<session_name>.session file.
-c, --chat-id TEXT Chat id or username or phone number. "me" or
"self" to export saved messages.
-o, --output TEXT Output directory.
-l, --size-limit INTEGER Media size limit in megabytes.
-f, --from-date TEXT Date from which messages will be saved.
-t, --to-date TEXT Date to which messages will be saved.
--photos / --no-photos Download photos or not.
--videos / --no-videos Download videos or not.
--voice / --no-voice Download voice messages or not.
--video-notes / --no-video-notes
Download video messages or not.
--stickers / --no-stickers Download stickers or not.
--gifs / --no-gifs Download gifs or not.
--documents / --no-documents Download documents or not.
--quiet BOOLEAN Do not print progress to console.
```
At first run you will need to specify api id and api hash and log in into your telegram account.
Or you can pass path of existing pyrogram session to "--session" argument (no need to logging in or specifying api id or api hash).

### Examples

#### Export all messages from private chat with user @example to directory example_export
```shell
$ t-export -c example -o example export
```

#### Export all messages from private chat with user @example to directory example_export without videos and with size limit of 100 megabytes
```shell
$ t-export -c example -o example export --no-videos --size-limit 100
```

#### Export all messages from start of 2023 from private chat with user @example to directory example_export
```shell
$ t-export -c example -o example export --size-limit 100 --from-date 01.01.2023
```
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 29 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,43 @@
[tool.poetry]
name = "t-export"
version = "0.1.0"
description = ""
description = "Telegram chats export tool."
authors = ["RuslanUC <dev_ruslan_uc@protonmail.com>"]
readme = "README.md"
license = "MIT"
classifiers = [
"Environment :: Console",
"Framework :: AsyncIO",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Typing :: Typed",
"Topic :: Internet",
]
packages = [
{ include = "texport" }
]

[tool.poetry.urls]
Homepage = "https://github.com/RuslanUC/t-export"
Repository = "https://github.com/RuslanUC/t-export"

[tool.poetry.scripts]
texport = "texport.main:main"
t_export = "texport.main:main"

[tool.poetry.dependencies]
python = "^3.9"
pyrogram = "^2.0.106"
tgcrypto = "^1.2.5"
click = "^8.1.7"
colorama = "^0.4.6"


[build-system]
Expand Down
4 changes: 3 additions & 1 deletion texport/export_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Union

from pyrogram.enums import MessageMediaType
Expand All @@ -18,7 +19,7 @@
@dataclass
class ExportConfig:
chat_id: Union[str, int] = "me"
output_dir: str = "./telegram_export"
output_dir: Path = Path("./telegram_export")
export_photos: bool = True
export_videos: bool = True
export_voice: bool = True
Expand All @@ -29,6 +30,7 @@ class ExportConfig:
size_limit: int = 32 # In megabytes
from_date: datetime = datetime(1970, 1, 1)
to_date: datetime = datetime.now()
print: bool = False

def excluded_media(self) -> set[MessageMediaType]:
result = set()
Expand Down
36 changes: 26 additions & 10 deletions texport/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@

from pyrogram import Client
from pyrogram.types import Message as PyroMessage
from pyrogram.utils import zero_datetime

from texport.export_config import ExportConfig
from texport.media import MEDIA_TYPES
from texport.messages_saver import MessagesSaver
from texport.progress_print import ProgressPrint


class ExportStatus:
def __init__(self):
self.biggest_message_id = None
self.approx_messages_count = None
self.last_message_id = None
self.last_date = None

Expand All @@ -24,6 +26,7 @@ def __init__(self, client: Client, export_config: ExportConfig=None):
self._client = client
self._task = None
self.status: Optional[ExportStatus] = None
self._progress: ProgressPrint = ProgressPrint(disabled=not self._config.print)
self._messages: list[PyroMessage] = []
self._media: dict[Union[int, str], str] = {}
self._saver = MessagesSaver(self._messages, self._media, export_config)
Expand All @@ -37,40 +40,53 @@ async def _export_media(self, message: PyroMessage) -> None:
if media.file_size > self._config.size_limit * 1024 * 1024:
return

path = await message.download(file_name=f"{self._config.output_dir}/{m.dir_name}/")
path = relpath(path, self._config.output_dir)
path = await message.download(file_name=f"{self._config.output_dir.absolute()}/{m.dir_name}/")
path = relpath(path, self._config.output_dir.absolute())
self._media[message.id] = path

if hasattr(media, "thumbs") and media.thumbs:
path = await self._client.download_media(media.thumbs[0].file_id,
file_name=f"{self._config.output_dir}/thumbs/")
path = relpath(path, self._config.output_dir)
file_name=f"{self._config.output_dir.absolute()}/thumbs/")
path = relpath(path, self._config.output_dir.absolute())
self._media[f"{message.id}_thumb"] = path

async def _export(self, chat_id: Union[int, str]):
offset_date = None if self._config.to_date.date() >= date.today() else self._config.to_date
# TODO: fix offset_date
async for message in self._client.get_chat_history(chat_id):
offset_date = zero_datetime() if self._config.to_date.date() >= date.today() else self._config.to_date
loaded = 0
self._progress.approx_messages_count = await self._client.get_chat_history_count(chat_id)
async for message in self._client.get_chat_history(chat_id, offset_date=offset_date):
if message.date < self._config.from_date:
break
if self.status.biggest_message_id is None:
self.status.biggest_message_id = message.id

loaded += 1
with self._progress.update():
self._progress.status = "Exporting messages..."
self._progress.messages_exported = loaded

if self.status.approx_messages_count is None:
self.status.approx_messages_count = message.id
self.status.last_message_id = message.id
self.status.last_date = message.date

if message.media:
self._progress.status = "Downloading media..."
await self._export_media(message)

if not message.text and not message.caption and message.id not in self._media:
continue

self._messages.append(message)
if len(self._messages) > 5000:
self._progress.status = "Writing messages to file..."
await self._saver.save()

if self._messages:
self._progress.status = "Writing messages to file..."
await self._saver.save()
self.status = self._task = None

self._progress.status = "Done!"

async def export(self, block: bool=True) -> None:
if self._task is not None or self.status is not None:
return
Expand Down
18 changes: 11 additions & 7 deletions texport/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,18 +44,20 @@ async def _main(session_name: str, api_id: int, api_hash: str, config: ExportCon
@click.option("--stickers/--no-stickers", default=True, help="Download stickers or not.")
@click.option("--gifs/--no-gifs", default=True, help="Download gifs or not.")
@click.option("--documents/--no-documents", default=True, help="Download documents or not.")
@click.option("--quiet", default=False, help="Do not print progress to console.")
def main(
session_name: str, api_id: int, api_hash: str, chat_id: str, output: str, size_limit: int, from_date: str,
to_date: str, photos: bool, videos: bool, voice: bool, video_notes: bool, stickers: bool, gifs: bool,
documents: bool,
documents: bool, quiet: bool,
) -> None:
texport_dir = Path.home() / ".texport"
home = Path.home()
texport_dir = home / ".texport"
makedirs(texport_dir, exist_ok=True)
makedirs(output, exist_ok=True)

config = ExportConfig(
chat_id=chat_id,
output_dir=output,
output_dir=Path(output),
size_limit=size_limit,
from_date=datetime.strptime(from_date, "%d.%m.%Y"),
to_date=datetime.strptime(to_date, "%d.%m.%Y"),
Expand All @@ -66,21 +68,23 @@ def main(
export_stickers=stickers,
export_gifs=gifs,
export_files=documents,
print=not quiet,
)

if session_name.endswith(".session"):
name = Path(session_name).name
copy(session_name, f"~/.texport/{name}")
copy(session_name, home / ".texport" / name)
session_name = name[:8]

if api_id is None or api_hash is None:
if (api_id is None or api_hash is None) and not exists(home / ".texport" / f"{session_name}.session"):
if not exists(texport_dir / "config.json"):
print("You should specify --api-id and --api-hash parameters!")
print("You should specify \"--api-id\" and \"--api-hash\" arguments or import existing pyrogram session "
"file by passing it's path to \"--session\" argument!")
return
with open(texport_dir / "config.json", "r", encoding="utf8") as f:
conf = json.load(f)
api_id, api_hash = conf["api_id"], conf["api_hash"]
else:
elif api_id is not None and api_hash is not None:
with open(texport_dir / "config.json", "w", encoding="utf8") as f:
json.dump({"api_id": api_id, "api_hash": api_hash}, f)

Expand Down
8 changes: 7 additions & 1 deletion texport/messages_saver.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from asyncio import get_running_loop
from os.path import exists
from typing import Union, Optional

from pyrogram.types import Message as PyroMessage

from .export_config import ExportConfig
from .html.base import Export
from .html.message import DateMessage, Message
from .resources import unpack_to


class MessagesSaver:
Expand All @@ -16,6 +18,10 @@ def __init__(self, messages: list[PyroMessage], media: dict[Union[int, str], str
self.config = config

def _save(self) -> None:
out_dir = self.config.output_dir
if not exists(out_dir / "js") or exists(out_dir / "images") or exists(out_dir / "css"):
unpack_to(out_dir)

output = ""
prev: Optional[PyroMessage] = None
dates = 0
Expand All @@ -33,7 +39,7 @@ def _save(self) -> None:
prev = message

output = Export(prev.chat.first_name, output).to_html()
with open(f"{self.config.output_dir}/messages{self.part}.html", "w", encoding="utf8") as f:
with open(f"{out_dir}/messages{self.part}.html", "w", encoding="utf8") as f:
f.write(output)

async def save(self) -> None:
Expand Down
Loading

0 comments on commit ea36767

Please sign in to comment.