diff --git a/src/asinfo/mod.rs b/src/asinfo/mod.rs index fab130a..0204c7d 100644 --- a/src/asinfo/mod.rs +++ b/src/asinfo/mod.rs @@ -62,12 +62,13 @@ mod hegemony; mod population; +mod sibling_orgs; pub use crate::asinfo::hegemony::HegemonyData; pub use crate::asinfo::population::AsnPopulationData; +use crate::asinfo::sibling_orgs::SiblingOrgsUtils; use crate::BgpkitCommons; use anyhow::{anyhow, Result}; -use oneio::OneIoError; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use tracing::info; @@ -95,6 +96,7 @@ const BGPKIT_ASN_TXT_MIRROR_URL: &str = "https://data.bgpkit.com/commons/asn.txt pub struct AsInfoUtils { pub asinfo_map: HashMap, + pub sibling_orgs: Option, pub load_as2org: bool, pub load_population: bool, pub load_hegemony: bool, @@ -103,8 +105,14 @@ pub struct AsInfoUtils { impl AsInfoUtils { pub fn new(load_as2org: bool, load_population: bool, load_hegemony: bool) -> Result { let asinfo_map = get_asinfo_map(load_as2org, load_population, load_hegemony)?; + let sibling_orgs = if load_as2org { + Some(SiblingOrgsUtils::new()?) + } else { + None + }; Ok(AsInfoUtils { asinfo_map, + sibling_orgs, load_as2org, load_population, load_hegemony, @@ -225,7 +233,18 @@ impl BgpkitCommons { let org_1_opt = info_1_opt.unwrap().as2org; let org_2_opt = info_2_opt.unwrap().as2org; if org_1_opt.is_some() && org_2_opt.is_some() { - return Ok(org_1_opt.unwrap().org_id == org_2_opt.unwrap().org_id); + let org_id_1 = org_1_opt.unwrap().org_id; + let org_id_2 = org_2_opt.unwrap().org_id; + + return Ok(org_id_1 == org_id_2 + || self + .asinfo + .as_ref() + .unwrap() + .sibling_orgs + .as_ref() + .unwrap() + .are_sibling_orgs(org_id_1.as_str(), org_id_2.as_str())); } } Ok(false) diff --git a/src/asinfo/sibling_orgs.rs b/src/asinfo/sibling_orgs.rs new file mode 100644 index 0000000..bdd85a8 --- /dev/null +++ b/src/asinfo/sibling_orgs.rs @@ -0,0 +1,72 @@ +use anyhow::Result; +use std::collections::{HashMap, HashSet}; +use tracing::info; + +const BGPKIT_SIBLING_ORGS_URL: &str = "https://data.bgpkit.com/commons/sibling-orgs.txt"; + +pub struct SiblingOrgsUtils { + sibling_orgs_map: HashMap>, +} + +impl SiblingOrgsUtils { + pub fn new() -> Result { + info!( + "loading sibling orgs information from {}", + BGPKIT_SIBLING_ORGS_URL + ); + let mut sibling_orgs = vec![]; + for line in oneio::read_lines(BGPKIT_SIBLING_ORGS_URL)? { + let line_str = line?.trim().to_string(); + if line_str.is_empty() || line_str.starts_with('#') { + // skip empty line or line started with # + continue; + } + let orgs: Vec = line_str.split_whitespace().map(|x| x.to_owned()).collect(); + sibling_orgs.push(orgs); + } + + let mut res_map = HashMap::new(); + for sibling_lst in sibling_orgs { + let mut org_set: HashSet = HashSet::new(); + sibling_lst.iter().for_each(|org| { + org_set.insert(org.to_lowercase()); + }); + + sibling_lst.iter().for_each(|org| { + let org_id = org.to_owned(); + res_map.insert(org_id.to_lowercase(), org_set.clone()); + }); + } + + Ok(SiblingOrgsUtils { + sibling_orgs_map: res_map, + }) + } + + pub fn are_sibling_orgs(&self, org_1: &str, org_2: &str) -> bool { + if let Some(s) = self.sibling_orgs_map.get(org_1.to_lowercase().as_str()) { + if s.contains(org_2.to_lowercase().as_str()) { + return true; + } + } + + false + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sibling_orgs() { + let utils = SiblingOrgsUtils::new().unwrap(); + + // GTT + assert!(utils.are_sibling_orgs("GC-494-ARIN", "ORG-GCI2-RIPE")); + // GTT with random cases + assert!(utils.are_sibling_orgs("Gc-494-ArIn", "OrG-gCi2-RiPe")); + // GTT and Cogent (not sibling) + assert!(!utils.are_sibling_orgs("GC-494-ARIN", "COGC-ARIN")); + } +} diff --git a/src/lib.rs b/src/lib.rs index 7f49dda..fbb12f2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -148,3 +148,15 @@ impl BgpkitCommons { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_siblings() { + let mut commons = BgpkitCommons::new(); + commons.load_asinfo(true, false, false).unwrap(); + assert!(commons.asinfo_are_siblings(174, 1239).unwrap()); + } +}