From caea4369c399e7fc4a98207b36c9bcfd79ea7e4f Mon Sep 17 00:00:00 2001 From: fan-tastic-z Date: Mon, 3 Jun 2024 14:58:08 +0800 Subject: [PATCH] feat: support crawling anti data --- src/app.rs | 11 ++ src/grab/anti.rs | 247 +++++++++++++++++++++++++++++++++++++++ src/grab/avd.rs | 5 +- src/grab/kev.rs | 4 +- src/grab/mod.rs | 3 + src/grab/oscs.rs | 5 +- src/grab/seebug.rs | 5 +- src/grab/threatbook.rs | 7 +- src/grab/ti.rs | 4 +- src/push/msg_template.rs | 17 ++- src/utils/http_client.rs | 1 - src/utils/mod.rs | 7 ++ 12 files changed, 299 insertions(+), 17 deletions(-) create mode 100644 src/grab/anti.rs diff --git a/src/app.rs b/src/app.rs index d2f1a8b..ec3dfdb 100644 --- a/src/app.rs +++ b/src/app.rs @@ -154,10 +154,12 @@ impl WatchVulnApp { } async fn push_init_msg(&self, local_count: u64) -> Result<()> { + let grabs = self.get_all_grabs(); let init_msg = render_init( VERSION.to_string(), local_count, self.app_context.config.task.cron_config.clone(), + grabs, )?; self.push_all("WatchVuln-rs init success".to_string(), init_msg) @@ -165,6 +167,15 @@ impl WatchVulnApp { Ok(()) } + fn get_all_grabs(&self) -> Vec { + let grabs = self.grabs.clone(); + let mut res = Vec::new(); + for v in grabs.values() { + res.push(v.get_name()) + } + res + } + pub async fn push_all(&self, title: String, msg: String) -> bool { let mut set = JoinSet::new(); for bot in &self.app_context.bot_manager.bots { diff --git a/src/grab/anti.rs b/src/grab/anti.rs new file mode 100644 index 0000000..38ea83e --- /dev/null +++ b/src/grab/anti.rs @@ -0,0 +1,247 @@ +use crate::{ + error::{Error, Result}, + utils::data_str_format, +}; +use async_trait::async_trait; +use regex::Regex; +use reqwest::header; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use tracing::{info, warn}; + +use crate::utils::http_client::Help; + +use super::{Grab, VulnInfo}; + +const ANTI_LIST_URL: &str = "https://www.antiycloud.com/api/safeNotice/list"; +const ANTI_CVEID_REGEXP: &str = r"CVE-\d+\-\d+"; + +#[derive(Default)] +pub struct AntiCrawler { + pub name: String, + pub display_name: String, + pub link: String, + pub help: Help, +} + +#[async_trait] +impl Grab for AntiCrawler { + async fn get_update(&self, _page_limit: i32) -> Result> { + let anti_list_response = self.get_anti_list_response().await?; + // anti is the data summarized on a daily basis. Only the latest day’s data is obtained here. + let first_data = &anti_list_response.data.list[0].body; + let mut res = Vec::with_capacity(first_data.len()); + let disclosure_time = &anti_list_response.data.list[0].notice_time; + let disclosure = data_str_format(disclosure_time)?; + for data in first_data.iter() { + let description = self.get_description(data); + let title = data.title.clone().split_off(2); + let cve = self.get_cve(&title); + + let unique_key = match cve { + Ok(unique_key) => unique_key, + Err(e) => { + warn!("AntiCrawler get update not found cve error:{}", e); + continue; + } + }; + let solutions = self.nth(data, 3); + let from = format!( + "https://www.antiycloud.com/#/infodetail/{}", + disclosure_time + ); + let references = self.get_references(data); + let vuln = VulnInfo { + unique_key: unique_key.clone(), + title, + description, + severity: super::Severity::High, + cve: unique_key, + disclosure: disclosure.clone(), + references, + solutions, + from, + tags: vec![], + reasons: vec![], + is_valuable: true, + }; + res.push(vuln); + } + info!("{} crawling count {}", self.get_name(), res.len()); + Ok(res) + } + + fn get_name(&self) -> String { + self.display_name.to_owned() + } +} + +impl AntiCrawler { + pub fn new() -> Self { + let mut headers = header::HeaderMap::new(); + headers.insert( + "Origin", + header::HeaderValue::from_static("https://www.antiycloud.com"), + ); + headers.insert( + "Referer", + header::HeaderValue::from_static("https://www.antiycloud.com"), + ); + let help = Help::new(headers); + AntiCrawler { + name: "antiycloud".to_string(), + display_name: "安天威胁情报中心".to_string(), + link: "https://www.antiycloud.com/#/antiy/safenotice".to_string(), + help, + } + } + + async fn get_anti_list_response(&self) -> Result { + let params = json!({ + "search":{ + "value":"" + }, + "type":"", + "pagination":{ + "current":1, + "pageSize":10, + "total":0 + }, + "sorter":{ + "field":"ar_time", + "order":"descend" + }, + "dict":{ + "time_range":[] + } + }); + let anti_response: AntiResponse = self + .help + .post_json(ANTI_LIST_URL, ¶ms) + .await? + .json() + .await?; + Ok(anti_response) + } + + fn nth(&self, data: &Body, n: usize) -> String { + if n == 0 { + data.body[n].content[1].data.clone() + } else { + let mut res = "".to_string(); + for c in &data.body[n].content { + res += &c.data; + } + res + } + } + + fn get_references(&self, data: &Body) -> Vec { + let mut references = Vec::new(); + for c in &data.body[3].content { + if c.r#type == "link" { + references.push(c.data.clone()); + } + } + references + } + + fn get_description(&self, data: &Body) -> String { + let description = self.nth(data, 0); + let influence = self.get_sphere_of_influence(data); + let description = format!("{}\n{}", description, influence); + description + } + + fn get_sphere_of_influence(&self, data: &Body) -> String { + let influence = self.nth(data, 2); + let res = format!("影响范围: \n {}", influence); + res + } + + fn get_cve(&self, title: &str) -> Result { + let res = Regex::new(ANTI_CVEID_REGEXP)?.captures(title); + if let Some(cve) = res { + Ok(cve[0].to_string()) + } else { + Err(Error::Message("cve regex match not found".to_owned())) + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AntiResponse { + pub status: String, + pub data: Data, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Data { + pub current: i32, + pub total: i32, + pub list: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct List { + pub id: i32, + pub title: String, + #[serde(rename = "typeId")] + pub type_id: i32, + pub m_type: String, + pub visitcount: i32, + pub r#abstract: String, + pub content: String, + pub department: String, + pub r#type: String, + pub time: String, + pub notice_time: String, + pub body: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Body { + pub title: String, + pub body: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BodyDetail { + pub subtitle: String, + pub content: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ContentDetail { + pub data: String, + pub r#type: String, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_get_anti_response() -> Result<()> { + let anti = AntiCrawler::new(); + let res = anti.get_anti_list_response().await?; + println!("{:?}", res); + Ok(()) + } + + #[tokio::test] + async fn test_get_update() -> Result<()> { + let anti = AntiCrawler::new(); + let res = anti.get_update(1).await?; + println!("{:?}", res); + Ok(()) + } + + #[test] + fn test_get_cve() -> Result<()> { + let anti = AntiCrawler::new(); + let res = anti.get_cve("1 Check Point安全网关 MyCRL 任意文件读取漏洞(CVE-2024-24919)")?; + assert_eq!(res, "CVE-2024-24919"); + Ok(()) + } +} diff --git a/src/grab/avd.rs b/src/grab/avd.rs index ec4fea0..007f514 100644 --- a/src/grab/avd.rs +++ b/src/grab/avd.rs @@ -3,7 +3,7 @@ use eyre::eyre; use regex::Regex; use reqwest::{header, Url}; use scraper::{Html, Selector}; -use tracing::{debug, warn}; +use tracing::{debug, info, warn}; use crate::{ error::{Error, Result}, @@ -36,11 +36,12 @@ impl Grab for AVDCrawler { let data = self.parse_page(i).await?; res.extend(data) } + info!("{} crawling count {}", self.get_name(), res.len()); Ok(res) } fn get_name(&self) -> String { - self.name.to_owned() + self.display_name.to_owned() } } diff --git a/src/grab/kev.rs b/src/grab/kev.rs index f973d2d..18fe46a 100644 --- a/src/grab/kev.rs +++ b/src/grab/kev.rs @@ -4,6 +4,7 @@ use async_trait::async_trait; use chrono::{DateTime, FixedOffset}; use reqwest::header::{self}; use serde::{Deserialize, Serialize}; +use tracing::info; use super::{Grab, VulnInfo}; use crate::error::Result; @@ -61,11 +62,12 @@ impl Grab for KevCrawler { }; res.push(vuln_info) } + info!("{} crawling count {}", self.get_name(), res.len()); Ok(res) } fn get_name(&self) -> String { - self.name.to_owned() + self.display_name.to_owned() } } diff --git a/src/grab/mod.rs b/src/grab/mod.rs index 935fced..c1e31e5 100644 --- a/src/grab/mod.rs +++ b/src/grab/mod.rs @@ -1,3 +1,4 @@ +pub mod anti; pub mod avd; pub mod kev; pub mod oscs; @@ -8,6 +9,7 @@ pub mod ti; use std::{collections::HashMap, fmt}; use crate::{error::Result, models::_entities::vuln_informations::Model}; +use anti::AntiCrawler; use async_trait::async_trait; pub use avd::AVDCrawler; use serde::{Deserialize, Serialize}; @@ -120,5 +122,6 @@ pub fn init() -> GrabManager { manager.register(Box::new(KevCrawler::new())); manager.register(Box::new(TiCrawler::new())); manager.register(Box::new(ThreadBookCrawler::new())); + manager.register(Box::new(AntiCrawler::new())); manager } diff --git a/src/grab/oscs.rs b/src/grab/oscs.rs index 2294d97..23dcddb 100644 --- a/src/grab/oscs.rs +++ b/src/grab/oscs.rs @@ -2,7 +2,7 @@ use async_trait::async_trait; use chrono::{DateTime, FixedOffset}; use reqwest::header::{self}; use serde::{Deserialize, Serialize}; -use tracing::error; +use tracing::{error, info}; use crate::{ error::{Error, Result}, @@ -38,11 +38,12 @@ impl Grab for OscCrawler { let data = self.parse_page(i).await?; res.extend(data) } + info!("{} crawling count {}", self.get_name(), res.len()); Ok(res) } fn get_name(&self) -> String { - self.name.to_owned() + self.display_name.to_owned() } } diff --git a/src/grab/seebug.rs b/src/grab/seebug.rs index 02af6dd..e1eee92 100644 --- a/src/grab/seebug.rs +++ b/src/grab/seebug.rs @@ -2,7 +2,7 @@ use async_trait::async_trait; use eyre::eyre; use reqwest::header::{self}; use scraper::{ElementRef, Html, Selector}; -use tracing::warn; +use tracing::{info, warn}; use crate::error::{Error, Result}; use crate::grab::{Severity, VulnInfo}; @@ -32,11 +32,12 @@ impl Grab for SeeBugCrawler { let data = self.parse_page(i).await?; res.extend(data) } + info!("{} crawling count {}", self.get_name(), res.len()); Ok(res) } fn get_name(&self) -> String { - self.name.to_owned() + self.display_name.to_owned() } } diff --git a/src/grab/threatbook.rs b/src/grab/threatbook.rs index dc91128..1e385a5 100644 --- a/src/grab/threatbook.rs +++ b/src/grab/threatbook.rs @@ -28,10 +28,6 @@ impl Grab for ThreadBookCrawler { let crawler = ThreadBookCrawler::new(); let home_page_resp: ThreadBookHomePage = crawler.help.get_json(HOME_PAGE_URL).await?.json().await?; - info!( - "thread book get {} vulns", - home_page_resp.data.high_risk.len() - ); let mut res = Vec::with_capacity(home_page_resp.data.high_risk.len()); for v in home_page_resp.data.high_risk { let mut is_valuable = false; @@ -72,11 +68,12 @@ impl Grab for ThreadBookCrawler { }; res.push(vuln); } + info!("{} crawling count {}", self.get_name(), res.len()); Ok(res) } fn get_name(&self) -> String { - self.name.to_owned() + self.display_name.to_owned() } } diff --git a/src/grab/ti.rs b/src/grab/ti.rs index 9df86d5..97d742e 100644 --- a/src/grab/ti.rs +++ b/src/grab/ti.rs @@ -1,6 +1,7 @@ use async_trait::async_trait; use reqwest::header::{self}; use serde::{Deserialize, Serialize}; +use tracing::info; use super::{Grab, Severity, VulnInfo}; use crate::error::Result; @@ -23,7 +24,7 @@ impl Grab for TiCrawler { } fn get_name(&self) -> String { - self.name.to_owned() + self.display_name.to_owned() } } @@ -84,6 +85,7 @@ impl TiCrawler { } vuln_infos.push(vuln_info); } + info!("{} crawling count {}", self.get_name(), vuln_infos.len()); Ok(vuln_infos) } diff --git a/src/push/msg_template.rs b/src/push/msg_template.rs index 67fd2c4..54fdf06 100644 --- a/src/push/msg_template.rs +++ b/src/push/msg_template.rs @@ -24,8 +24,13 @@ const VULN_INFO_MSG_TEMPLATE: &str = r####" {% endfor %}{% endif %}"####; const INIT_MSG_TEMPLATE: &str = r#" -数据初始化完成,当前版本 {{ version }} 本地漏洞数量: {{ vuln_count }} 检查周期配置: {{ cron_config }} -"#; +数据初始化完成 +当前版本: {{ version }} +本地漏洞数量: {{ vuln_count }} +检查周期配置: {{ cron_config }} + +目前爬取的数据源:{% for v in grabs %} +{{ loop.index }}.{{ v }}{% endfor %}"#; const MAX_REFERENCE_LENGTH: usize = 8; @@ -60,12 +65,18 @@ pub fn escape_markdown(input: String) -> String { .replace('!', "\\!") } -pub fn render_init(version: String, vuln_count: u64, cron_config: String) -> Result { +pub fn render_init( + version: String, + vuln_count: u64, + cron_config: String, + grabs: Vec, +) -> Result { let json_value = serde_json::json!( { "version": version, "vuln_count": vuln_count, "cron_config": cron_config, + "grabs": grabs } ); let markdown = render_string(INIT_MSG_TEMPLATE, &json_value)?; diff --git a/src/utils/http_client.rs b/src/utils/http_client.rs index 74d1041..2a3fab2 100644 --- a/src/utils/http_client.rs +++ b/src/utils/http_client.rs @@ -14,7 +14,6 @@ impl Default for Help { } impl Help { pub fn new(mut headers: HeaderMap) -> Self { - // let mut headers = header::HeaderMap::new(); headers.insert("User-Agent", header::HeaderValue::from_static("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")); let client = reqwest::Client::builder() .redirect(reqwest::redirect::Policy::none()) diff --git a/src/utils/mod.rs b/src/utils/mod.rs index bfacaaf..a68cc72 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -16,6 +16,13 @@ pub fn check_over_two_week(date: &str) -> Result { Ok(false) } +// data_str_format convernt 20240603 to 2024-06-03 +pub fn data_str_format(date: &str) -> Result { + let date = NaiveDate::parse_from_str(date, "%Y%m%d")?; + let formatted_date = format!("{}", date.format("%Y-%m-%d")); + Ok(formatted_date) +} + pub fn timestamp_to_date(timestamp: i64) -> Result { let dt = DateTime::from_timestamp_millis(timestamp); if let Some(dt) = dt {