From be06e6da1fbaedc2b3995d7e3d38178e5a658994 Mon Sep 17 00:00:00 2001 From: Maximilian Pult <17569567+MPult@users.noreply.github.com> Date: Sun, 30 Apr 2023 18:59:09 +0200 Subject: [PATCH] refactor(#22) - Created internal ReAPI Library * started work on ReAPI * more work * added users to ReAPI * login works * donkey * Fixed ReAPI impl in sb * moved login in base of ReAPI * added debug to login * something * something * Beginnings of SB logic * fix images * remove login from root * More progress * got images to work * Adding ReAPI rooms function * everything * everything * fix merge conflict * fix test * it works * this doesn't work * Batching Messages * reimpl exporting * Adding other export formats; fixing timestamp; image saving only impl. on .txt * Now with 100% more export choises (that actually work) * Fixing shit * Prepare for v1.0.0 release * Fix edition typo * Remove unessesary crates * Removing OneElectrons implicit panic from the test... --------- Co-authored-by: Electron --- Cargo.toml | 7 +- README.md | 10 +- justfile | 5 +- src/ReAPI/images.rs | 129 ++++++++++++ src/ReAPI/login.rs | 162 +++++++++++++++ src/ReAPI/messages.rs | 134 ++++++++++++ src/ReAPI/mod.rs | 48 +++++ src/ReAPI/rooms.rs | 77 +++++++ src/ReAPI/users.rs | 46 +++++ src/export.rs | 168 +++++++-------- src/id_translation.rs | 44 ---- src/images.rs | 59 ------ src/lib.rs | 39 ---- src/login.rs | 142 ------------- src/macros.rs | 6 + src/main.rs | 46 +++-- src/messages.rs | 192 ------------------ .../ReAPI/images/get_images/.gitignore | 4 + tests/tests.rs | 14 -- 19 files changed, 737 insertions(+), 595 deletions(-) create mode 100644 src/ReAPI/images.rs create mode 100644 src/ReAPI/login.rs create mode 100644 src/ReAPI/messages.rs create mode 100644 src/ReAPI/mod.rs create mode 100644 src/ReAPI/rooms.rs create mode 100644 src/ReAPI/users.rs delete mode 100644 src/id_translation.rs delete mode 100644 src/images.rs delete mode 100644 src/lib.rs delete mode 100644 src/login.rs delete mode 100644 src/messages.rs create mode 100644 test_resources/test_cases/ReAPI/images/get_images/.gitignore delete mode 100644 tests/tests.rs diff --git a/Cargo.toml b/Cargo.toml index 8456911..0e66ce7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rexit" -version = "0.1.1" +version = "1.0.0" edition = "2021" description = "Export your Reddit Chats" readme = "README.md" @@ -9,7 +9,7 @@ license = "GPL-3.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -chrono = "0.4.24" +chrono = { version = "0.4.24", features = ["serde"] } reqwest = {version = "0.11.16", features = ["blocking", "multipart", "cookies", "gzip"]} serde = { version = "1.0.160", features = ["derive"] } serde_json = "1.0" @@ -20,5 +20,4 @@ log = "0.4.0" pretty_env_logger = "0.4.0" inquire = "0.6.1" cached = "0.43.0" -console = { version = "0.15.5", features = ["windows-console-colors"] } -url = { version = "2.3.1", features = ["serde"] } +console = { version = "0.15.5", features = ["windows-console-colors"] } \ No newline at end of file diff --git a/README.md b/README.md index cfe02dd..4b38295 100644 --- a/README.md +++ b/README.md @@ -30,15 +30,21 @@ Options: Currently, you need to specify the formats, and it will ask for the username and password (or bearer token with that auth flow). ```bash -$ rexit --formats csv,json,txt +$ rexit --formats csv,json,txt --images > Your Reddit Username: > Your Reddit Password: ``` -It will save the files to the current directory. For CSV and TXT it is split by room; for JSON it's combined into one file. If an image (.jpg, .gif, .png, etc.) was sent the matrix URL (`mxc:///`) will be displayed as the message content. +It will save the files to the current directory. For CSV and TXT it is split by room. If an image (.jpg, .gif, .png, etc.) was sent the filename will be displayed as the message content, along with the prefix `FILE`. ## Installation You can use the files provided in the releases' page of this repository, or install via cargo. +### Manual Install + +1. Download the build for your system (Windows or M1 MacOS) +2. Use the terminal run Rexit with the arguments you want. (See Usage for details) + +### Cargo Install ```BASH $ cargo install rexit ``` diff --git a/justfile b/justfile index 8cc05c9..782eccc 100644 --- a/justfile +++ b/justfile @@ -7,4 +7,7 @@ test: cargo test test-creds: - cargo test --include-ignored \ No newline at end of file + cargo test -- --include-ignored + +doc: + cargo doc --no-deps --open \ No newline at end of file diff --git a/src/ReAPI/images.rs b/src/ReAPI/images.rs new file mode 100644 index 0000000..2c3c8fb --- /dev/null +++ b/src/ReAPI/images.rs @@ -0,0 +1,129 @@ +use super::Client; +use crate::exit; +use cached::SizedCache; +use console::style; +use serde::Serialize; +use std::path::PathBuf; + +#[derive(std::hash::Hash, Clone, Debug, Serialize)] +pub struct Image { + pub extension: String, + pub id: String, + pub data: Vec, +} + +impl Image { + pub fn export_to(&self, path: PathBuf) { + let mut path = path; + path.push(self.id.clone()); + + std::fs::write( + path.with_extension(self.extension.clone()), + self.data.clone(), + ) + .unwrap(); + } + + pub fn from(id: String, extension: String, data: Vec) -> Image { + Image { + extension, + id, + data, + } + } +} + +/// Gets images from a mxc:// URL as per [SPEC](https://spec.matrix.org/v1.6/client-server-api/#get_matrixmediav3downloadservernamemediaid) +#[cached::proc_macro::cached( + type = "SizedCache", + create = "{ SizedCache::with_size(10_000) }", + convert = r#"{ format!("{}", url) }"# +)] +pub fn get_image(client: &Client, url: String) -> Image { + info!(target: "get_image", "Getting image: {}", url); + let (url, id) = parse_matrix_image_url(url.as_str()); + + let data = client.reqwest_client.get(url).send().unwrap(); + + Image { + extension: get_image_extension(&data.headers()), + id, + data: data.bytes().unwrap().to_vec(), + } +} + +fn parse_matrix_image_url(url: &str) -> (String, String) { + let url = reqwest::Url::parse(url).unwrap(); // I assume that all urls given to this function are valid + + let output_url = + reqwest::Url::parse("https://matrix.redditspace.com/_matrix/media/r0/download/reddit.com/") + .unwrap(); + + let id = url.path_segments().unwrap().next().unwrap(); + + let output_url = output_url.join(id).unwrap(); + + (output_url.to_string(), id.to_string()) +} + +fn get_image_extension(headers: &reqwest::header::HeaderMap) -> String { + let mut extension: Option = None; + + // Iterate over headers to find content-type + for (header_name, header_value) in headers { + if header_name.as_str() != "content-type" { + continue; + } + let file_type = header_value.to_str().unwrap().to_string(); + + let mut file_type = file_type.split("/"); + + extension = match file_type.nth(1).unwrap() { + "jpeg" => Some("jpeg".to_string()), + "png" => Some("png".to_string()), + "gif" => Some("gif".to_string()), + _ => { + println!("{}", style("Failed to read image type").red().bold()); + exit!(0); + } + }; + } + + if extension.is_none() { + println!( + "{}", + style("Error: Something failed reading the image type") + .red() + .bold() + ); + error!("Something failed reading the image type"); + exit!(0); + } + + return extension.unwrap(); +} + +#[cfg(test)] +mod tests { + #[test] + fn get_image() { + let image = super::get_image( + &super::super::new_client(true), + "mxc://reddit.com/dwdprq7pxbva1/".to_string(), + ); + + image.export_to(std::path::PathBuf::from( + "./test_resources/test_cases/ReAPI/images/get_images/", + )); + + assert!(std::path::PathBuf::from( + "./test_resources/test_cases/ReAPI/images/get_images/dwdprq7pxbva1.gif" + ) + .exists()); + + std::fs::remove_file( + "./test_resources/test_cases/ReAPI/images/get_images/dwdprq7pxbva1.gif", + ) + .expect("Could not remove downloaded file"); + } +} diff --git a/src/ReAPI/login.rs b/src/ReAPI/login.rs new file mode 100644 index 0000000..275d734 --- /dev/null +++ b/src/ReAPI/login.rs @@ -0,0 +1,162 @@ +use console::style; +use regex::Regex; + +impl super::Client { + pub fn logged_in(&self) -> bool { + self.bearer.is_some() + } + + pub fn bearer_token(&self) -> String { + if let Some(token) = self.bearer.clone() { + return token.clone(); + } + + println!("{}", style("You are not logged in").red().bold()); + crate::exit!(0); + } + + pub fn login_with_token(&mut self, bearer: String) { + self.bearer = Some(bearer); + } + + /// Log into Reddit returning the Bearer + pub fn login(&mut self, username: String, password: String) { + // URL encode the password & username + let encoded_password: String; + let username = urlencoding::encode(&username); + + // Reddit is doing a weird thing where * is not urlencoded. Sorry for everyone that has * and %2A in their password + if password.contains("*") { + debug!("Password has *; URL-encode was rewritten"); + encoded_password = password.replace("%2A", "*"); + } else { + encoded_password = urlencoding::encode(&password).into_owned(); + } + + // Send an HTTP GET request to get the CSRF token + let resp = self + .reqwest_client + .get("https://www.reddit.com/login/") + .send() + .expect("Failed to send HTTP request; to obtain CSRF token"); + + debug!("CSRF Request Response: {:?}", resp); + let body = resp.text(); + let body = body.expect("Failed to read response body"); + + // Regex to find the CSRF token in the body of the HTML + let csrf = + Regex::new(r#" (String, String) { + let username = std::env::var("REXIT_USERNAME").expect("Could not find username in env"); + let password = std::env::var("REXIT_PASSWORD").expect("Could not find password in env"); + + (username, password) + } +} diff --git a/src/ReAPI/messages.rs b/src/ReAPI/messages.rs new file mode 100644 index 0000000..3bbb8aa --- /dev/null +++ b/src/ReAPI/messages.rs @@ -0,0 +1,134 @@ +use super::Client; +use chrono::{TimeZone, Utc}; +use serde::{Deserialize, Serialize}; + +/// Struct for a singular message. +#[derive(Debug, Clone, Serialize)] +pub struct Message { + pub author: String, + pub timestamp: chrono::DateTime, + pub content: Content, +} + +#[derive(Debug, Clone, Serialize)] +pub enum Content { + Image(super::Image), + Message(String), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct InternalMessages { + start: String, + end: String, + chunk: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct InternalMessage { + #[serde(rename = "type")] + messages_type: String, + sender: String, + room_id: String, + content: InternalContent, + + #[serde(rename = "origin_server_ts")] + timestamp: i64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct InternalContent { + body: Option, + url: Option, + info: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct InternalImageInfo { + mimetype: String, +} + +pub fn list_messages(client: &Client, id: String) -> Vec { + let mut output: Vec = vec![]; + let mut batch: String = String::new(); + // Loop over the batching + loop { + + let url = format!( + "https://matrix.redditspace.com/_matrix/client/r0/rooms/{id}/messages?limit=10000&dir=b&from={batch}"); + + // Send request to get messages + let response = client + .reqwest_client + .get(url) + .header("Authorization", format!("Bearer {}", client.bearer_token())) + .send() + .expect("Failed to send HTTP request; to obtain messages"); + + // Deserialize response + let messages: Result = + serde_json::from_str(response.text().unwrap().as_str()); + let messages = messages.unwrap(); + output.reserve(messages.chunk.len()); + + // Iterate over messages + for message in messages.chunk { + + // Detect if message is text or file + if message.content.url.is_some() { + // Is a file + output.push(Message { + author: super::get_user(client, message.sender).displayname, + timestamp: unix_millis_to_utc(message.timestamp), + content: Content::Image(super::images::get_image(&client, message.content.url.unwrap())), + }) + } else if message.content.body.is_some() { + // Text Message + output.push(Message { + author: super::get_user(client, message.sender).displayname, + timestamp: unix_millis_to_utc(message.timestamp), + content: Content::Message(message.content.body.unwrap()), + }) + } + } + + // Check for end condition + if messages.end == "t0_0" { + debug!("Found messages end"); + break; + } else { + // Update new batch variable + batch = messages.end; + } + } + return output; +} + +fn unix_millis_to_utc(unix_time: i64) -> chrono::DateTime { + Utc.timestamp_opt(unix_time / 1000, 0).unwrap() +} + +#[cfg(test)] +mod tests { + use super::super::new_client; + + #[test] + fn list_messages() { + let (username, password) = get_login(); + + let mut client = new_client(true); + + client.login(username, password); + + let rooms = super::super::download_rooms(&client); + + let messages =super::list_messages(&client, rooms[1].clone().id); + println!("{:#?}", messages); + } + + fn get_login() -> (String, String) { + let username = std::env::var("REXIT_USERNAME").expect("Could not find username in env"); + let password = std::env::var("REXIT_PASSWORD").expect("Could not find password in env"); + + (username, password) + } +} diff --git a/src/ReAPI/mod.rs b/src/ReAPI/mod.rs new file mode 100644 index 0000000..aaf70c3 --- /dev/null +++ b/src/ReAPI/mod.rs @@ -0,0 +1,48 @@ +//! Reddit matrix api +#![allow(non_snake_case, dead_code)] + +mod images; +mod login; +mod messages; +mod rooms; +mod users; + +pub use images::Image; + +pub use rooms::download_rooms; +pub use rooms::Room; + +pub use messages::Content; +pub use messages::Message; + +pub use users::get_user; +pub use users::User; + +pub struct Client { + reqwest_client: reqwest::blocking::Client, + bearer: Option, +} + +pub fn new_client(debug: bool) -> Client { + // Build the client + let client: reqwest::blocking::Client; + if debug { + client = reqwest::blocking::Client::builder() + .cookie_store(true) + .timeout(std::time::Duration::from_secs(60)) + .danger_accept_invalid_certs(true) // Used in development to trust a proxy + .build() + .expect("Error making Reqwest Client"); + } else { + client = reqwest::blocking::Client::builder() + .cookie_store(true) + .timeout(std::time::Duration::from_secs(60)) + .build() + .expect("Error making Reqwest Client"); + } + + Client { + reqwest_client: client, + bearer: None, + } +} diff --git a/src/ReAPI/rooms.rs b/src/ReAPI/rooms.rs new file mode 100644 index 0000000..007d3c4 --- /dev/null +++ b/src/ReAPI/rooms.rs @@ -0,0 +1,77 @@ +use super::Client; +use serde::Serialize; +use serde_json::Value; + +#[derive(Debug, Clone, Serialize)] +pub struct Room { + pub id: String, + messages: Option>, +} + +impl Room { + fn download(id: String, client: &Client) -> Room { + Room { id: id.clone(), messages: download_messages(&client, id.clone())} + } + + pub fn messages(&self) -> Vec { + return self.messages.clone().unwrap(); + } +} + + +fn download_messages(client: &Client, id: String) -> Option>{ + Some(super::messages::list_messages(client, id)) +} + +/// Returns list of all rooms that the user is joined to as per [SPEC](https://spec.matrix.org/v1.6/client-server-api/#get_matrixclientv3directorylistroomroomid) +pub fn download_rooms(client: &Client) -> Vec { + let resp = client + .reqwest_client + .get("https://matrix.redditspace.com/_matrix/client/v3/joined_rooms") + .header("Authorization", format!("Bearer {}", client.bearer_token())) + .send() + .expect("Failed to send HTTP request; to obtain rooms"); + + // Parse json + let json: Value = + serde_json::from_str(&resp.text().unwrap()).expect("Error parsing Rooms list JSON"); + + // Read rooms from json + let rooms = json["joined_rooms"] + .as_array() + .expect("Error parsing array") + .to_owned(); + + // Move rooms into a Vec + let rooms: Vec = rooms + .iter() + .map(|room| Room::download(room.to_string().replace("\"", ""), client)) + .collect(); + + info!("Found {} room(s) ", rooms.len()); + + return rooms; +} + +#[cfg(test)] +mod tests { + #[test] + #[ignore = "creds"] + fn list_rooms() { + let (username, password) = get_login(); + let mut client = super::super::new_client(true); + + client.login(username, password); + + let rooms = super::download_rooms(&client); + + println!("{:?}", rooms); + } + + fn get_login() -> (String, String) { + let username = std::env::var("REXIT_USERNAME").expect("Could not find username in env"); + let password = std::env::var("REXIT_PASSWORD").expect("Could not find password in env"); + + (username, password) + } +} diff --git a/src/ReAPI/users.rs b/src/ReAPI/users.rs new file mode 100644 index 0000000..7df5afe --- /dev/null +++ b/src/ReAPI/users.rs @@ -0,0 +1,46 @@ +use super::Client; +use cached::SizedCache; + +#[derive(Clone, Debug)] +pub struct User { + pub id: String, + pub displayname: String, +} + +#[cached::proc_macro::cached( + type = "SizedCache", + create = "{ SizedCache::with_size(10_000) }", + convert = r#"{ format!("{}", id) }"# +)] +pub fn get_user(client: &Client, id: String) -> User { + let url = format!("https://matrix.redditspace.com/_matrix/client/r0/profile/{id}/displayname",); + + let response = client + .reqwest_client + .get(url) + .send() + .expect("Failed to send HTTP request"); + + let value: serde_json::Value = serde_json::from_str(response.text().unwrap().as_str()).unwrap(); + + info!("Found user: {}", value["displayname"].clone()); + + User { + id: id, + displayname: value["displayname"].as_str().unwrap().to_string(), + } +} + +#[cfg(test)] +mod tests { + + #[test] + fn get_user() { + let client = super::super::new_client(true); + let id = "@t2_9b09u6gps:reddit.com".to_string(); + + let result = super::get_user(&client, id); + + assert_eq!(result.displayname, "rexitTest"); + } +} diff --git a/src/export.rs b/src/export.rs index 0ec3601..19419d1 100644 --- a/src/export.rs +++ b/src/export.rs @@ -1,106 +1,110 @@ -//! This module contains the functions that handle the exporting features of Rexit. - -use std::fs; -use std::fs::OpenOptions; +use std::fs::{self, OpenOptions}; use std::io::Write; -use crate::cli::Cli; -use crate::messages::AllChats; - -#[allow(dead_code)] -pub enum ExportFormat { - JSON, - CSV, - TXT -} +use crate::ReAPI; -/// Function to check what export format is desired and calls the appropriate export function. -#[allow(dead_code)] -pub fn decide_export(all_chats: AllChats, cli: Cli) { - // Split the comma separated format cli args into a array - let formats: Vec<&str> = cli.formats.split(",").collect(); - - // Run the appropriate function for each export format - for format in formats { - match format { - "json" => export_to_json(all_chats.clone()), - "csv" => export_to_csv(all_chats.clone()), - "txt" => export_to_txt(all_chats.clone()), - _ => println!("Not valid Format"), - } - } -} +/// Export the chats into a .txt file +pub fn export_room_chats_txt(room: ReAPI::Room) { + let mut output_buffer: String = String::new(); + let path = format!("./out/{}.txt", &room.id[1..10]); -/// Export the chats into .txt files. -pub fn export_to_txt(all_chats: AllChats) { - info!("Exporting to TXT"); - // Iterate over the individual chats / rooms (idk what to call it reddit uses the terms interchangeably) - for chat in all_chats.chats { - // Create the file for each chat / room - let filename = std::path::PathBuf::from("./out") - .join(std::path::PathBuf::from(&chat.id[1..10]).with_extension("txt")); + for message in room.messages() { + if let ReAPI::Content::Message(text) = message.content { + let line: String = format!( + "[{}] {}: {}\n", + message + .timestamp + .to_rfc3339_opts(chrono::SecondsFormat::Secs, true) + .to_string(), + message.author, + text + ); - std::fs::write(filename.clone(), "").unwrap(); + output_buffer.push_str(line.as_str()); + } else if let ReAPI::Content::Image(image) = message.content { + let image_text = format!("FILE: {}", image.id); - // Iterate over each message in the chat; append to the file - for message in chat.messages { - // Format for the line to be appended let line: String = format!( - "[{}] {}: {}", - message.timestamp, message.author, message.message + "[{}] {}: {}\n", + message + .timestamp + .to_rfc3339_opts(chrono::SecondsFormat::Secs, true) + .to_string(), + message.author, + image_text ); - let mut file = OpenOptions::new() - .write(true) - .append(true) - .open(filename.clone()) - .unwrap(); - - if let Err(e) = writeln!(file, "{}", line) { - eprintln!("Couldn't write to file: {}", e); - } + output_buffer.push_str(line.as_str()); } } + + std::fs::write(path, output_buffer).unwrap(); } /// Export the chats into .json files. -pub fn export_to_json(all_chats: AllChats) { - info!("Exporting to JSON"); +pub fn export_room_chats_json(room: ReAPI::Room) { + let path = format!("./out/{}.json", &room.id[1..10]); - let file_data = serde_json::to_string(&all_chats).unwrap(); + let file_data = serde_json::to_string(&room).unwrap(); - fs::write("./out/export.json", file_data).expect("Unable to write file"); + fs::write(path, file_data).expect("Unable to write file"); } -/// Export the chats into .csv files. -pub fn export_to_csv(all_chats: AllChats) { - info!("Exporting to CSV"); - - // Iterate over the individual chats / rooms (idk what to call it reddit uses the terms interchangeably) - for chat in all_chats.chats { - // Create the file for each chat / room - - let filename = std::path::PathBuf::from("./out") - .join(std::path::PathBuf::from(&chat.id[1..10]).with_extension("csv")); - std::fs::write(filename.clone(), "timestamp, author, message \n").unwrap(); - - // Iterate over each message in the chat; append to the file - for message in chat.messages { - // Format for the line to be appended - let line: String = format!( - "{}, {}, {}", - message.timestamp, message.author, message.message +pub fn export_room_chats_csv(room: ReAPI::Room) { + // Create the file for each chat / room + let path = format!("./out/{}.csv", &room.id[1..10]); + + std::fs::write(path.clone(), "timestamp, author, message \n").unwrap(); + + // Iterate over each message in the chat; append to the file + for message in room.messages() { + // Format for the line to be appended + let mut line: String = String::new(); + + if let ReAPI::Content::Message(text) = message.content { + line = format!( + "{}, {}, {},", + message + .timestamp + .to_rfc3339_opts(chrono::SecondsFormat::Secs, true) + .to_string(), + message.author, + text + ); + } else if let ReAPI::Content::Image(image) = message.content { + let image_text = format!("FILE: {}", image.id); + + line = format!( + "{}, {}, {},", + message + .timestamp + .to_rfc3339_opts(chrono::SecondsFormat::Secs, true) + .to_string(), + message.author, + image_text ); + } + + let mut file = OpenOptions::new() + .write(true) + .append(true) + .open(path.clone()) + .unwrap(); - let mut file = OpenOptions::new() - .write(true) - .append(true) - .open(filename.clone()) - .unwrap(); + if let Err(e) = writeln!(file, "{}", line) { + eprintln!("Couldn't write to file: {}", e); + } + } +} - if let Err(e) = writeln!(file, "{}", line) { - eprintln!("Couldn't write to file: {}", e); - } +pub fn export_room_images(room: ReAPI::Room) { + for message in room.messages() { + if let ReAPI::Content::Image(image) = message.content { + std::fs::write( + format!("./out/images/{}.{}", image.id, image.extension), + image.data, + ) + .unwrap(); } } } diff --git a/src/id_translation.rs b/src/id_translation.rs deleted file mode 100644 index a7e226d..0000000 --- a/src/id_translation.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! Converts the given userID into a displayname using the API. ([SPEC](https://spec.matrix.org/v1.6/client-server-api/#get_matrixclientv3profileuseriddisplayname)) - -use cached::proc_macro::cached; -use serde_json::Value; - -/// Converts the userids into displaynames; obtains data through a API request, uses function cache -#[cached] -pub fn id_to_displayname(id: String, debug: bool) -> String { - // Create a Reqwest client - let client: reqwest::blocking::Client; - if debug { - client = reqwest::blocking::Client::builder() - .cookie_store(true) - .danger_accept_invalid_certs(true) // Used in development to trust a proxy - .build() - .expect("Error making Reqwest Client"); - } else { - client = reqwest::blocking::Client::builder() - .cookie_store(true) - .build() - .expect("Error making Reqwest Client"); - } - - let url = format!("https://matrix.redditspace.com/_matrix/client/r0/profile/{id}/displayname"); - // Request name from API - let response = client - .get(url) - .send() - .expect("Failed to send HTTP request"); - - // Parse the json - let displayname: Value = serde_json::from_str( - &response - .text() - .expect("Error getting Displayname - HTTP Request"), - ) - .expect("Error getting Displayname - JSON parsing"); - - let displayname = displayname["displayname"].as_str().unwrap(); - - debug!("Got User lookup: {}, with ID: {}", displayname, id); - - return displayname.to_owned(); -} diff --git a/src/images.rs b/src/images.rs deleted file mode 100644 index 79cb04a..0000000 --- a/src/images.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::exit; -use console::style; -use reqwest::blocking::Client; -use std::path::PathBuf; -use url::Url; - -/// Gets images from a mxc:// URL as per [SPEC](https://spec.matrix.org/v1.6/client-server-api/#get_matrixmediav3downloadservernamemediaid) -pub fn export_image(client: &Client, url: String) { - info!(target: "export_image", "Getting image: {}", url); - let (url, id) = parse_matrix_image_url(url.as_str()); - - let data = client.get(url).send().unwrap(); - - let mut extension: Option = None; - for (header_name, header_value) in data.headers() { - if header_name.as_str() == "content-type" { - let file_type = header_value.to_str().unwrap().to_string(); - - let mut file_type = file_type.split("/"); - - extension = match file_type.nth(1).unwrap() { - "jpeg" => Some("jpeg".to_string()), - "png" => Some("png".to_string()), - "gif" => Some("gif".to_string()), - _ => { - exit!(0); - } - }; - } - } - if extension.is_none() { - println!( - "{}", - style("Error: Something failed reading the image type").red() - ); - error!("Something failed reading the image type"); - exit!(0); - } - - let data = data.bytes().unwrap(); - - let mut output_path = PathBuf::from("./out/images/"); - output_path.push(id); - - std::fs::write(output_path.with_extension(extension.unwrap()), data).unwrap(); -} - -fn parse_matrix_image_url(url: &str) -> (String, String) { - let url = Url::parse(url).unwrap(); // I assume that all urls given to this function are valid - - let output_url = - Url::parse("https://matrix.redditspace.com/_matrix/media/r0/download/reddit.com/").unwrap(); - - let id = url.path_segments().unwrap().next().unwrap(); - - let output_url = output_url.join(id).unwrap(); - - (output_url.to_string(), id.to_string()) -} diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index 98bc398..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,39 +0,0 @@ -//! This library is meant for testing only. Please do not use. - -#[macro_use] -extern crate log; -extern crate pretty_env_logger; - -mod export; -mod cli; -mod login; -mod id_translation; -mod images; -mod messages; -mod macros; - -pub type RexitToken = String; -pub type Client = reqwest::blocking::Client; - -pub use messages::AllChats; -pub use export::ExportFormat; - -pub fn login(username: String, password: String) -> RexitToken { - login::request_login(username, password, true) -} - -pub fn get_all_messages(bearer: RexitToken, export_images: bool) -> AllChats { - let rooms = messages::list_rooms(bearer.clone(), true); - - let all_chats = messages::iter_rooms(rooms, bearer, true, export_images); - - all_chats -} - -pub fn export(format: ExportFormat, chats: AllChats) { - match format { - ExportFormat::CSV => export::export_to_csv(chats), - ExportFormat::JSON => export::export_to_json(chats), - ExportFormat::TXT => export::export_to_txt(chats) - } -} \ No newline at end of file diff --git a/src/login.rs b/src/login.rs deleted file mode 100644 index 8573b92..0000000 --- a/src/login.rs +++ /dev/null @@ -1,142 +0,0 @@ -//! Module to perform the (insanely intricate) login process. -//! 1. GET `reddit.com/login` to obtain the CSRF token to give to the login. -//! 2. POST `reddit.com/login` to login providing username, CSRF token, Password. -//! 3. GET `reddit.com/` to obtain bearer token from the body of response. -//! 4. Perform matrix chat login à la [spec](https://spec.matrix.org/v1.6/client-server-api/#login) -use regex::Regex; -use urlencoding::encode; - -/// Performs the login ritual. -pub fn request_login(username: String, password: String, debug: bool) -> String { - // URL encode the password & username - let encoded_password: String; - let username = encode(&username); - - // Reddit is doing a weird thing where * is not urlencoded. Sorry for everyone that has * and %2A in their password - if password.contains("*") { - debug!("Password has *; URL-encode was rewritten"); - encoded_password = password.replace("%2A", "*"); - } else { - encoded_password = encode(&password).into_owned(); - } - - // Obtain the CSRF token - let client: reqwest::blocking::Client; - if debug { - client = reqwest::blocking::Client::builder() - .cookie_store(true) - .danger_accept_invalid_certs(true) // Used in development to trust a proxy - .build() - .expect("Error making Reqwest Client"); - } else { - client = reqwest::blocking::Client::builder() - .cookie_store(true) - .build() - .expect("Error making Reqwest Client"); - } - - // Send an HTTP GET request to get the CSRF token - let resp = client - .get("https://www.reddit.com/login/") - .send() - .expect("Failed to send HTTP request; to obtain CSRF token"); - - debug!("CSRF Request Response: {:?}", resp); - let body = resp.text(); - let body = body.expect("Failed to read response body"); - - // Regex to find the CSRF token in the body of the HTML - let csrf = - Regex::new(r#" { + #[cfg(test)] + panic!("{}", $y); + #[cfg(not(test))] + std::process::exit($x); + }; ($x: literal) => { #[cfg(test)] panic!(); diff --git a/src/main.rs b/src/main.rs index d04aa24..b5a2acb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,13 +11,9 @@ use std::env; use std::path::PathBuf; // import other files -mod export; -use export::decide_export; +mod ReAPI; mod cli; -mod login; -mod id_translation; -mod images; -mod messages; +mod export; mod macros; use cli::{Cli, Parser}; @@ -38,6 +34,9 @@ fn main() { // Parse the CLI args let args = Cli::parse(); + // Create an ReAPI client + let mut client = ReAPI::new_client(args.debug); + if args.debug { println!("{}\n{}", style("The --debug flag accepts untrusted HTTPS certificates which can be a potential security risk").red().bold(), @@ -45,14 +44,15 @@ fn main() { } // Decide what auth flow to use - let bearer_token: String; if args.token == true { // Use the bearer token flow trace!("Bearer token auth flow"); - bearer_token = Password::new("Your Bearer Token") - .prompt() - .expect("Error reading bearer token"); + client.login_with_token( + Password::new("Your Bearer Token") + .prompt() + .expect("Error reading bearer token"), + ); } else { // Use the username password auth flow trace!("Password auth flow"); @@ -67,12 +67,11 @@ fn main() { .prompt() .expect("Error reading password"); - bearer_token = login::request_login(username.to_owned(), password.to_owned(), args.debug); + client.login(username.to_owned(), password.to_owned()); } // Handle output folder stuff // Deletes ./out (we append the batches so this is necessary) - if PathBuf::from("./out").exists() { std::fs::remove_dir_all("./out").expect("Error deleting out folder"); } @@ -81,14 +80,29 @@ fn main() { std::fs::create_dir("./out").unwrap(); // Make sure there is an images folder to output to if images is true - if args.images && !PathBuf::from("./out/images").exists() { + if args.images { std::fs::create_dir("./out/images").unwrap(); } // Get list of rooms - let rooms = messages::list_rooms(bearer_token.clone(), args.debug); + let rooms = ReAPI::download_rooms(&client); + + // Exports messages to files. Add image if its set to args + let mut export_formats: Vec<&str> = args.formats.split(",").collect(); - let all_chats = messages::iter_rooms(rooms, bearer_token, args.debug, args.images); + if args.images == true { + export_formats.push("images") + } - decide_export(all_chats, args); + for room in rooms { + for format in export_formats.clone() { + match format { + "txt" => export::export_room_chats_txt(room.to_owned()), + "json" => export::export_room_chats_json(room.to_owned()), + "csv" => export::export_room_chats_csv(room.to_owned()), + "images" => export::export_room_images(room.to_owned()), + _ => println!("Not valid Format"), + } + } + } } diff --git a/src/messages.rs b/src/messages.rs deleted file mode 100644 index b17d570..0000000 --- a/src/messages.rs +++ /dev/null @@ -1,192 +0,0 @@ -use super::id_translation::id_to_displayname; -use super::images; -use chrono::SecondsFormat::Secs; -use chrono::{TimeZone, Utc}; -use serde::{Deserialize, Serialize}; -use serde_json::Value; -use std::time::Duration; - -/// Struct for a singular message. -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Message { - pub author: String, - pub message: String, - pub timestamp: String, -} - -/// Struct containing a chat/room. -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct Chat { - pub id: String, - pub messages: Vec, - pub next_batch: String, -} - -/// Contains all the chats/rooms. -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct AllChats { - pub chats: Vec, -} - -/// Returns list of all rooms that the user is joined to as per [SPEC](https://spec.matrix.org/v1.6/client-server-api/#get_matrixclientv3directorylistroomroomid) -pub fn list_rooms(bearer_token: String, debug: bool) -> Vec { - // Create a Reqwest client - let client: reqwest::blocking::Client; - if debug { - client = reqwest::blocking::Client::builder() - .cookie_store(true) - .danger_accept_invalid_certs(true) // Used in development to trust a proxy - .timeout(Duration::from_secs(60)) - .build() - .expect("Error making Reqwest Client"); - } else { - client = reqwest::blocking::Client::builder() - .cookie_store(true) - .timeout(Duration::from_secs(60)) - .build() - .expect("Error making Reqwest Client"); - } - - let resp = client - .get("https://matrix.redditspace.com/_matrix/client/v3/joined_rooms") - .header("Authorization", format!("Bearer {}", bearer_token)) - .send() - .expect("Failed to send HTTP request; to obtain rooms"); - - let body = resp.text().expect("Error parsing body"); - let json: Value = serde_json::from_str(&body).expect("Error parsing Rooms list JSON"); - let rooms = json["joined_rooms"] - .as_array() - .expect("Error parsing array"); - - info!("Found {} room(s) ", rooms.len()); - return rooms.to_vec(); -} - -/// Returns a Chat struct for this room as per [SPEC](https://spec.matrix.org/v1.6/client-server-api/#get_matrixclientv3roomsroomidmessages) -pub fn get_messages(bearer_token: String, room_id: &str, since: String, debug: bool, export_images: bool) -> Chat { - info!("Getting messages for room: {room_id}"); - - // Create a Reqwest client - let client: reqwest::blocking::Client; - if debug { - client = reqwest::blocking::Client::builder() - .cookie_store(true) - .danger_accept_invalid_certs(true) // Used in development to trust a proxy - .timeout(Duration::from_secs(60)) - .build() - .expect("Error making Reqwest Client"); - } else { - client = reqwest::blocking::Client::builder() - .cookie_store(true) - .timeout(Duration::from_secs(60)) - .build() - .expect("Error making Reqwest Client"); - } - - let url; - - // If it is a next batch then add the since - if since == "REXIT-INITIAL".to_owned() { - url = format!("https://matrix.redditspace.com/_matrix/client/r0/rooms/{room_id}/messages?limit=10000&dir=b"); - } else { - url =format!("https://matrix.redditspace.com/_matrix/client/r0/rooms/{room_id}/messages?limit=10000&dir=b&from={since}"); - } - - let response = client - .get(url) - .header("Authorization", format!("Bearer {}", bearer_token)) - .send() - .expect("Failed to send HTTP request; to obtain messages"); - - let body = response.text().expect("Error parsing request body"); - let json: Value = serde_json::from_str(&body).expect("Error parsing JSON response"); - - // Contains all the messages for this chat - let mut chat = Chat { - id: room_id.to_owned(), - messages: Vec::new(), - next_batch: String::new(), - }; - - // Loop through the messages within the chunk - for message in json["chunk"].as_array().unwrap() { - // Check if it is a text/image - if message["type"] == "m.room.message" { - // Parse the unix timestamp and convert to ISO - let timestamp = message["origin_server_ts"] - .as_i64() - .expect("Failed to parse timestamp") - / 1000; - - let timestamp = Utc - .timestamp_opt(timestamp, 0) - .unwrap() - .to_rfc3339_opts(Secs, true) - .to_string(); - - // If its a image show the MXC url as content - let message_content: String; - if export_images && message["content"]["msgtype"] == "m.image" { - message_content = message["content"]["url"].as_str().unwrap().to_string(); - images::export_image(&client, message_content.clone()); - } else { - let tmp = message["content"]["body"].as_str(); - if tmp.is_none() { - warn!("Failed to get message - may have been deleted"); - continue; - } - message_content = tmp.unwrap().to_string(); - } - - let message_struct = Message { - author: id_to_displayname(message["sender"].as_str().unwrap().to_string(), debug), - message: message_content, - timestamp: timestamp, - }; - chat.messages.push(message_struct); - } - } - // Append next batch to chat - debug!("End token {}", json["end"].as_str().unwrap().to_string()); - chat.next_batch = json["end"].as_str().unwrap().to_string(); - return chat; -} - -/// Iterate over all rooms to return chats -pub fn iter_rooms(rooms: Vec, bearer: String, debug: bool, export_images: bool) -> AllChats { - let mut all_chats = AllChats { chats: Vec::new() }; - - // Iterate over rooms and request their messages - for room in rooms { - let mut next_batch: String = "REXIT-INITIAL".to_owned(); - - while next_batch != "t0_0" { - let mut found_chat = false; - let chat_struct = get_messages( - bearer.clone(), - room.as_str().unwrap(), - next_batch, - debug, - export_images - ); - next_batch = chat_struct.next_batch.clone(); - - // Check if a chat with that ID already exits; if yes then append the messages - for chat in all_chats.chats.iter_mut() { - if chat.id == chat_struct.id { - chat.messages.extend_from_slice(&chat_struct.messages); - found_chat = true; - break; - } - } - - // If the chat is not already present, add it to the list of all chats - if !found_chat { - all_chats.chats.push(chat_struct.clone()); - } - } - } - - all_chats -} diff --git a/test_resources/test_cases/ReAPI/images/get_images/.gitignore b/test_resources/test_cases/ReAPI/images/get_images/.gitignore new file mode 100644 index 0000000..86d0cb2 --- /dev/null +++ b/test_resources/test_cases/ReAPI/images/get_images/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore \ No newline at end of file diff --git a/tests/tests.rs b/tests/tests.rs deleted file mode 100644 index e832665..0000000 --- a/tests/tests.rs +++ /dev/null @@ -1,14 +0,0 @@ -#[test] -#[ignore = "creds"] -fn login() { - let (username, password) = get_login(); - - rexit::login(username, password); -} - -fn get_login() -> (String, String) { - let username = std::env::var("REXIT_USERNAME").expect("Could not find username in env"); - let password = std::env::var("REXIT_PASSWORD").expect("Could not find password in env"); - - (username, password) -} \ No newline at end of file