diff --git a/src/booster/adaboost/adaboost_algorithm.rs b/src/booster/adaboost/adaboost_algorithm.rs index 2f10111..3f1e332 100644 --- a/src/booster/adaboost/adaboost_algorithm.rs +++ b/src/booster/adaboost/adaboost_algorithm.rs @@ -38,10 +38,10 @@ use std::ops::ControlFlow; /// /// # Related information /// - As some papers proved, -/// `AdaBoost` **approximately maximizes the hard margin.** +/// `AdaBoost` **approximately maximizes the hard margin.** /// /// - [`AdaBoostV`](crate::booster::AdaBoostV), -/// a successor of AdaBoost, maximizes the hard margin. +/// a successor of AdaBoost, maximizes the hard margin. /// /// /// ```no_run @@ -49,7 +49,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") @@ -259,7 +259,7 @@ impl Booster for AdaBoost<'_, F> let quit = if let Some(it) = self.force_quit_at { format!("At round {it}") } else { - format!("-") + "-".to_string() }; let info = Vec::from([ ("# of examples", format!("{}", n_sample)), diff --git a/src/booster/adaboostv/adaboostv_algorithm.rs b/src/booster/adaboostv/adaboostv_algorithm.rs index 792e51c..ada5e76 100644 --- a/src/booster/adaboostv/adaboostv_algorithm.rs +++ b/src/booster/adaboostv/adaboostv_algorithm.rs @@ -69,7 +69,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") @@ -291,7 +291,7 @@ impl Booster for AdaBoostV<'_, F> let quit = if let Some(it) = self.force_quit_at { format!("At round {it}") } else { - format!("-") + "-".to_string() }; let info = Vec::from([ ("# of examples", format!("{}", n_sample)), diff --git a/src/booster/cerlpboost/cerlpboost_algorithm.rs b/src/booster/cerlpboost/cerlpboost_algorithm.rs index 59753c9..1f02697 100644 --- a/src/booster/cerlpboost/cerlpboost_algorithm.rs +++ b/src/booster/cerlpboost/cerlpboost_algorithm.rs @@ -72,7 +72,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") diff --git a/src/booster/erlpboost/erlpboost_algorithm.rs b/src/booster/erlpboost/erlpboost_algorithm.rs index 7f443e4..6a636bb 100644 --- a/src/booster/erlpboost/erlpboost_algorithm.rs +++ b/src/booster/erlpboost/erlpboost_algorithm.rs @@ -64,7 +64,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") diff --git a/src/booster/erlpboost/qp_model.rs b/src/booster/erlpboost/qp_model.rs index 49629d7..b4a0c95 100644 --- a/src/booster/erlpboost/qp_model.rs +++ b/src/booster/erlpboost/qp_model.rs @@ -170,7 +170,7 @@ impl QPModel { /// Returns `true` if `dist[i] > 0` holds for all `i = 1, 2, ..., m.` pub(self) fn all_positive(&self, dist: &[f64]) -> bool { - dist.into_iter() + dist.iter() .copied() .all(|d| d > 0f64) } @@ -179,7 +179,7 @@ impl QPModel { pub(self) fn build_linear_part_objective(&self, dist: &[f64]) -> Vec { let mut linear = Vec::with_capacity(1 + self.n_examples); linear.push(1f64); - let iter = dist.into_iter() + let iter = dist.iter() .copied() .map(|di| (1f64 / self.eta) * di.ln()); linear.extend(iter); diff --git a/src/booster/gradient_boost/gbm.rs b/src/booster/gradient_boost/gbm.rs index ce042ef..0d8d01b 100644 --- a/src/booster/gradient_boost/gbm.rs +++ b/src/booster/gradient_boost/gbm.rs @@ -44,7 +44,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") @@ -149,7 +149,7 @@ impl<'a, F, L> GBM<'a, F, L> } -impl<'a, F, L> GBM<'a, F, L> { +impl GBM<'_, F, L> { /// Returns the maximum iteration /// of the `GBM` to find a combined hypothesis /// that has error at most `tolerance`. @@ -194,7 +194,7 @@ impl Booster for GBM<'_, F, L> ("# of examples", format!("{n_sample}")), ("# of features", format!("{n_feature}")), ("Tolerance", format!("{}", self.tolerance)), - ("Loss", format!("{}", self.loss.name())), + ("Loss", self.loss.name().to_string()), ("Max iteration", format!("{}", self.max_iter)), ]); Some(info) @@ -236,7 +236,7 @@ impl Booster for GBM<'_, F, L> let predictions = h.predict_all(self.sample); let coef = self.loss.best_coefficient( - &self.sample.target(), &predictions[..] + self.sample.target(), &predictions[..] ); // If the best coefficient is zero, diff --git a/src/booster/graph_separation_boosting/graph_separation_algorithm.rs b/src/booster/graph_separation_boosting/graph_separation_algorithm.rs index a5f17f5..ccd911e 100644 --- a/src/booster/graph_separation_boosting/graph_separation_algorithm.rs +++ b/src/booster/graph_separation_boosting/graph_separation_algorithm.rs @@ -45,7 +45,7 @@ use std::collections::HashSet; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") @@ -113,7 +113,7 @@ impl<'a, F> GraphSepBoost<'a, F> { } } -impl<'a, F> GraphSepBoost<'a, F> +impl GraphSepBoost<'_, F> where F: Classifier { /// Returns a weight on the new hypothesis. @@ -236,7 +236,7 @@ impl Booster for GraphSepBoost<'_, F> where W: WeakLearner { let hypotheses = std::mem::take(&mut self.hypotheses); - NaiveAggregation::new(hypotheses, &self.sample) + NaiveAggregation::new(hypotheses, self.sample) } } @@ -246,6 +246,6 @@ impl Research for GraphSepBoost<'_, H> { type Output = NaiveAggregation; fn current_hypothesis(&self) -> Self::Output { - NaiveAggregation::from_slice(&self.hypotheses, &self.sample) + NaiveAggregation::from_slice(&self.hypotheses, self.sample) } } diff --git a/src/booster/lpboost/lpboost_algorithm.rs b/src/booster/lpboost/lpboost_algorithm.rs index f62b2e0..355b256 100644 --- a/src/booster/lpboost/lpboost_algorithm.rs +++ b/src/booster/lpboost/lpboost_algorithm.rs @@ -58,8 +58,8 @@ use std::ops::ControlFlow; /// # Related information /// - Currently (2023), `LPBoost` has no convergence guarantee. /// - [`ERLPBoost`](crate::booster::ERLPBoost), -/// A stabilized version of `LPBoost` is -/// proposed by Warmuth et al. (2008). +/// A stabilized version of `LPBoost` is +/// proposed by Warmuth et al. (2008). /// /// # Example /// The following code shows a small example for running [`LPBoost`]. @@ -70,7 +70,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") @@ -262,7 +262,7 @@ impl Booster for LPBoost<'_, F> ("# of examples", format!("{n_sample}")), ("# of features", format!("{n_feature}")), ("Tolerance", format!("{}", self.tolerance)), - ("Max iteration", format!("-")), + ("Max iteration", "-".to_string()), ("Capping (outliers)", format!("{nu} ({ratio: >7.3} %)")) ]); Some(info) diff --git a/src/booster/madaboost/madaboost_algorithm.rs b/src/booster/madaboost/madaboost_algorithm.rs index 6e9c8e1..d98c87f 100644 --- a/src/booster/madaboost/madaboost_algorithm.rs +++ b/src/booster/madaboost/madaboost_algorithm.rs @@ -44,7 +44,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") @@ -274,7 +274,7 @@ impl Booster for MadaBoost<'_, F> let quit = if let Some(it) = self.force_quit_at { format!("At round {it}") } else { - format!("-") + "-".to_string() }; let info = Vec::from([ ("# of examples", format!("{}", n_sample)), diff --git a/src/booster/mlpboost/mlpboost_algorithm.rs b/src/booster/mlpboost/mlpboost_algorithm.rs index bf3c70f..d14387c 100644 --- a/src/booster/mlpboost/mlpboost_algorithm.rs +++ b/src/booster/mlpboost/mlpboost_algorithm.rs @@ -92,7 +92,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") @@ -388,7 +388,7 @@ impl Booster for MLPBoost<'_, F> ("Max iteration", format!("{}", self.max_iter)), ("Capping (outliers)", format!("{nu} ({ratio: >7.3} %)")), ("Primary", format!("{}", self.primary.current_type())), - ("Secondary", format!("LPBoost")) + ("Secondary", "LPBoost".to_string()) ]); Some(info) } diff --git a/src/booster/mlpboost/perturbed_lp_model.rs b/src/booster/mlpboost/perturbed_lp_model.rs index e288470..7c318d3 100644 --- a/src/booster/mlpboost/perturbed_lp_model.rs +++ b/src/booster/mlpboost/perturbed_lp_model.rs @@ -3,7 +3,6 @@ use clarabel::{ solver::*, }; -use rand; use rand::rngs::StdRng; use rand::prelude::*; diff --git a/src/booster/smoothboost/smoothboost_algorithm.rs b/src/booster/smoothboost/smoothboost_algorithm.rs index 9e3adba..545e210 100644 --- a/src/booster/smoothboost/smoothboost_algorithm.rs +++ b/src/booster/smoothboost/smoothboost_algorithm.rs @@ -49,7 +49,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") diff --git a/src/booster/softboost/qp_model.rs b/src/booster/softboost/qp_model.rs index 7f9a8f6..1fb95a3 100644 --- a/src/booster/softboost/qp_model.rs +++ b/src/booster/softboost/qp_model.rs @@ -154,13 +154,13 @@ impl QPModel { } old_objval = objval; } - return Some(()) + Some(()) } /// Returns `true` if `dist[i] > 0` holds for all `i = 1, 2, ..., m.` pub(self) fn all_positive(&self, dist: &[f64]) -> bool { - dist.into_iter() + dist.iter() .copied() .all(|d| d > 0f64) } @@ -168,7 +168,7 @@ impl QPModel { pub(self) fn build_linear_part_objective(&self, dist: &[f64]) -> Vec { let mut linear = Vec::with_capacity(self.n_examples); - let iter = dist.into_iter() + let iter = dist.iter() .copied() .map(|di| di.ln()); linear.extend(iter); diff --git a/src/booster/softboost/softboost_algorithm.rs b/src/booster/softboost/softboost_algorithm.rs index c398556..dfca910 100644 --- a/src/booster/softboost/softboost_algorithm.rs +++ b/src/booster/softboost/softboost_algorithm.rs @@ -64,7 +64,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") diff --git a/src/booster/totalboost/totalboost_algorithm.rs b/src/booster/totalboost/totalboost_algorithm.rs index 24589a8..b46007d 100644 --- a/src/booster/totalboost/totalboost_algorithm.rs +++ b/src/booster/totalboost/totalboost_algorithm.rs @@ -66,7 +66,7 @@ use std::ops::ControlFlow; /// /// // Read the training sample from the CSV file. /// // We use the column named `class` as the label. -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(path_to_file) /// .has_header(true) /// .target_feature("class") diff --git a/src/common/loss_functions.rs b/src/common/loss_functions.rs index 792a24b..2b31147 100644 --- a/src/common/loss_functions.rs +++ b/src/common/loss_functions.rs @@ -130,7 +130,7 @@ impl LossFunction for GBMLoss { fn hessian(&self, predictions: &[f64], target: &[f64]) -> Vec { let n_sample = predictions.len(); - assert_eq!(n_sample as usize, target.len()); + assert_eq!(n_sample, target.len()); match self { Self::L1 => { diff --git a/src/common/utils.rs b/src/common/utils.rs index 98b3475..fcf726f 100644 --- a/src/common/utils.rs +++ b/src/common/utils.rs @@ -321,7 +321,7 @@ pub(crate) fn total_weight_for_label( weight: &[f64], ) -> f64 { - target.into_iter() + target.iter() .copied() .zip(weight) .filter_map(|(t, w)| if t == y { Some(w) } else { None }) diff --git a/src/research/cross_validation.rs b/src/research/cross_validation.rs index aeb28be..6eeec32 100644 --- a/src/research/cross_validation.rs +++ b/src/research/cross_validation.rs @@ -152,7 +152,7 @@ impl<'a> CrossValidation<'a> { } -impl<'a> Iterator for CrossValidation<'a> { +impl Iterator for CrossValidation<'_> { type Item = (Sample, Sample); fn next(&mut self) -> Option { if self.current_fold >= self.n_folds { return None; } diff --git a/src/research/logger.rs b/src/research/logger.rs index d8d6907..523826e 100644 --- a/src/research/logger.rs +++ b/src/research/logger.rs @@ -62,7 +62,7 @@ impl<'a, B, W, F, G> Logger<'a, B, W, F, G> { } } -impl<'a, H, B, W, F, G, O> Logger<'a, B, W, F, G> +impl Logger<'_, B, W, F, G> where B: Booster + Research, O: Classifier, W: WeakLearner, @@ -106,8 +106,7 @@ impl<'a, H, B, W, F, G, O> Logger<'a, B, W, F, G> #[inline(always)] fn print_log_header(&self) { println!( - "{} {:>WIDTH$}\t\t{:>WIDTH$}\t{:>WIDTH$}\t{:>WIDTH$}\t{:>WIDTH$}", - " ", + " {:>WIDTH$}\t\t{:>WIDTH$}\t{:>WIDTH$}\t{:>WIDTH$}\t{:>WIDTH$}", "".bold().red(), "OBJ.".bold().blue(), "TRAIN".bold().green(), @@ -115,8 +114,7 @@ impl<'a, H, B, W, F, G, O> Logger<'a, B, W, F, G> "ACC.".bold().cyan(), ); println!( - "{} {:>WIDTH$}\t\t{:>WIDTH$}\t{:>WIDTH$}\t{:>WIDTH$}\t{:>WIDTH$}\n", - " ", + " {:>WIDTH$}\t\t{:>WIDTH$}\t{:>WIDTH$}\t{:>WIDTH$}\t{:>WIDTH$}\n", "ROUND".bold().red(), "VALUE".bold().blue(), "ERROR".bold().green(), @@ -184,7 +182,7 @@ impl<'a, H, B, W, F, G, O> Logger<'a, B, W, F, G> "\ + {:STAT_WIDTH$}\n\ + {:STAT_WIDTH$}\n\ - {:=^FULL_WIDTH$}\ + {:=^FULL_WIDTH$}\n\ ", "Objective".bold(), self.objective_func.name().bold().green(), @@ -192,7 +190,6 @@ impl<'a, H, B, W, F, G, O> Logger<'a, B, W, F, G> limit.bold().green(), "".bold(), ); - println!(""); } diff --git a/src/sample/feature_struct.rs b/src/sample/feature_struct.rs index b9ad91c..93c2a6d 100644 --- a/src/sample/feature_struct.rs +++ b/src/sample/feature_struct.rs @@ -422,7 +422,7 @@ impl SparseFeature { fn distinct_value_count(&self) -> usize { let values = self.sample[..] - .into_iter() + .iter() .map(|(_, v)| *v) .collect::>(); let mut uniq_value_count = inner_distinct_value_count(values); @@ -526,7 +526,7 @@ impl Index for Feature { /// Count the number of items in `src` that has the same value. /// The given vector `src` is assumed to be sorted in ascending order. fn inner_distinct_value_count(mut src: Vec) -> usize { - src.sort_by(|a, b| a.partial_cmp(&b).unwrap()); + src.sort_by(|a, b| a.partial_cmp(b).unwrap()); let mut iter = src.into_iter(); let mut value = match iter.next() { Some(v) => v, diff --git a/src/sample/sample_reader.rs b/src/sample/sample_reader.rs index 9a44f40..c4721ab 100644 --- a/src/sample/sample_reader.rs +++ b/src/sample/sample_reader.rs @@ -11,12 +11,13 @@ use super::sample_struct::Sample; /// The following code is a simple example to read a CSV file. /// ```no_run /// let filename = "/path/to/csv/file.csv"; -/// let sample = SampleReader::new() +/// let sample = SampleReader::default() /// .file(filename) /// .has_header(true) /// .target_feature("class") /// .read()?; /// ``` +#[derive(Default)] pub struct SampleReader { file: Option

, has_header: bool, @@ -25,16 +26,6 @@ pub struct SampleReader { impl SampleReader { - /// Construct a new instance of [`SampleReader`]. - pub fn new() -> Self { - Self { - file: None, - has_header: false, - target: None, - } - } - - /// Set the flag whether the file has the header row or not. /// Default is `false.` pub fn has_header(mut self, flag: bool) -> Self { diff --git a/src/sample/sample_struct.rs b/src/sample/sample_struct.rs index 28f711d..4d1a77f 100644 --- a/src/sample/sample_struct.rs +++ b/src/sample/sample_struct.rs @@ -4,6 +4,7 @@ use std::io::{self, BufRead, BufReader}; use std::collections::{HashMap, HashSet}; use std::ops::Index; use std::mem; +use std::cmp::Ordering; use polars::prelude::*; use rayon::prelude::*; @@ -39,8 +40,7 @@ impl Sample { let mut target = vec![1f64; n_sample]; target[half..].iter_mut() .for_each(|y| { *y = -1f64; }); - let mut features = Vec::with_capacity(1); - features.push(Feature::new_sparse("dummy")); + let features = vec![Feature::new_sparse("dummy")]; Self { name_to_index: HashMap::from([("dummy".to_string(), 0)]), features, @@ -184,7 +184,7 @@ impl Sample { /// Returns the unique target values. pub fn unique_target(&self) -> Vec { let mut target = self.target().to_vec(); - target.sort_by(|a, b| a.partial_cmp(&b).unwrap()); + target.sort_by(|a, b| a.partial_cmp(b).unwrap()); target.dedup(); target @@ -408,16 +408,20 @@ impl Sample { .map(|yi| yi as i32) .collect::>(); let n_label = set.len(); - if n_label > 2 { - panic!( - "The target values take more than 2 kinds. \ - Expected 2 kinds, got {n_label} kinds." - ); - } else if n_label < 2 { - panic!( - "The target values take less than 2 kinds. \ - Expected 2 kinds, got {n_label} kinds." - ); + match set.len().cmp(&2) { + Ordering::Greater => { + panic!( + "The target values take more than 2 kinds. \ + Expected 2 kinds, got {n_label} kinds." + ); + }, + Ordering::Less => { + panic!( + "The target values take less than 2 kinds. \ + Expected 2 kinds, got {n_label} kinds." + ); + }, + Ordering::Equal => {}, } @@ -541,7 +545,7 @@ impl Sample { let name_to_ix = self.name_to_index.clone(); let mut train = Self { n_sample: train_size, - n_feature: n_feature, + n_feature, name_to_index: name_to_ix.clone(), features: vec![Feature::new_sparse("dummy"); n_feature], target: Vec::with_capacity(train_size), @@ -549,7 +553,7 @@ impl Sample { let mut test = Self { n_sample: test_size, - n_feature: n_feature, + n_feature, name_to_index: name_to_ix, features: vec![Feature::new_sparse("dummy"); n_feature], target: Vec::with_capacity(test_size), @@ -567,22 +571,17 @@ impl Sample { } } - for i in 0..start { - let ii = ix[i]; + for (i, &ii) in ix.iter().enumerate().take(start) { let (x, y) = self.at(ii); train.append(i, x, y); } - - for i in start..end { - let ii = ix[i]; + for (i, &ii) in ix.iter().enumerate().take(end).skip(start) { let (x, y) = self.at(ii); test.append(i, x, y); } - - for i in end..self.n_sample { - let ii = ix[i]; + for (i, &ii) in ix.iter().enumerate().take(self.n_sample).skip(end) { let (x, y) = self.at(ii); train.append(i, x, y); } @@ -594,7 +593,7 @@ impl Sample { /// Parse the following type of `str` to the pair of `(usize, f64)`. /// `index:value`, where `index: usize` and `value: f64`. -pub(self) fn index_and_feature(word: &str) -> (usize, f64) { +fn index_and_feature(word: &str) -> (usize, f64) { let mut i_x = word.split(':'); let i = i_x.next() .unwrap() diff --git a/src/weak_learner/bad_learner/worstcase_classifier.rs b/src/weak_learner/bad_learner/worstcase_classifier.rs index 4d423d1..5bd8dcf 100644 --- a/src/weak_learner/bad_learner/worstcase_classifier.rs +++ b/src/weak_learner/bad_learner/worstcase_classifier.rs @@ -73,12 +73,10 @@ impl Classifier for BadClassifier { } else { (-1f64 + self.gap * self.eps) * y } + } else if row < self.index { + (-1f64 + self.eps) * y } else { - if row < self.index { - (-1f64 + self.eps) * y - } else { - (1f64 - self.eps) * y - } + (1f64 - self.eps) * y } } } diff --git a/src/weak_learner/decision_tree/bin.rs b/src/weak_learner/decision_tree/bin.rs index 37146a5..53e3c24 100644 --- a/src/weak_learner/decision_tree/bin.rs +++ b/src/weak_learner/decision_tree/bin.rs @@ -112,8 +112,8 @@ impl Bins { // If the minimum value equals to the maximum one, // slightly perturb them. if min == max { - min = min - EPS; - max = max + EPS; + min -= EPS; + max += EPS; } @@ -145,7 +145,7 @@ impl Bins { let mut min = f64::MAX; let mut max = f64::MIN; feature.sample[..] - .into_iter() + .iter() .copied() .for_each(|(_, val)| { min = min.min(val); @@ -166,8 +166,8 @@ impl Bins { // If the minimum value equals to the maximum one, // slightly perturb them. if min == max { - min = min - EPS; - max = max + EPS; + min -= EPS; + max += EPS; } diff --git a/src/weak_learner/decision_tree/criterion.rs b/src/weak_learner/decision_tree/criterion.rs index a4a91d2..3f510c1 100644 --- a/src/weak_learner/decision_tree/criterion.rs +++ b/src/weak_learner/decision_tree/criterion.rs @@ -22,7 +22,7 @@ use crate::weak_learner::common::{ /// This is just a wrapper for `f64`. #[repr(transparent)] #[derive(Copy, Clone, Debug, Serialize, Deserialize)] -pub(self) struct Score(f64); +struct Score(f64); impl From for Score { @@ -110,7 +110,6 @@ impl Criterion { ) -> (&'a str, f64) { let target = sample.target(); - let target = &target[..]; match self { Criterion::Entropy => { sample.features() @@ -356,9 +355,9 @@ fn split_by_twoing(pack: Vec<(Bin, LabelToWeight)>) -> (f64, Score) { /// Returns the entropic-impurity of the given map. #[inline(always)] -pub(self) fn entropic_impurity(map: &HashMap) -> f64 { +fn entropic_impurity(map: &HashMap) -> f64 { let total = map.values().sum::(); - if total <= 0f64 || map.is_empty() { return 0f64.into(); } + if total <= 0f64 || map.is_empty() { return 0f64; } map.par_iter() .map(|(_, &p)| { @@ -371,9 +370,9 @@ pub(self) fn entropic_impurity(map: &HashMap) -> f64 { /// Returns the gini-impurity of the given map. #[inline(always)] -pub(self) fn gini_impurity(map: &HashMap) -> f64 { +fn gini_impurity(map: &HashMap) -> f64 { let total = map.values().sum::(); - if total <= 0f64 || map.is_empty() { return 0f64.into(); } + if total <= 0f64 || map.is_empty() { return 0f64; } let correct = map.par_iter() .map(|(_, &w)| (w / total).powi(2)) @@ -385,7 +384,7 @@ pub(self) fn gini_impurity(map: &HashMap) -> f64 { /// Returns the gini-impurity of the given map. #[inline(always)] -pub(self) fn twoing_score( +fn twoing_score( labels: &HashSet, left: &HashMap, right: &HashMap, diff --git a/src/weak_learner/decision_tree/decision_tree_algorithm.rs b/src/weak_learner/decision_tree/decision_tree_algorithm.rs index 80f69da..de1455c 100644 --- a/src/weak_learner/decision_tree/decision_tree_algorithm.rs +++ b/src/weak_learner/decision_tree/decision_tree_algorithm.rs @@ -161,7 +161,7 @@ impl<'a> DecisionTree<'a> { } -impl<'a> WeakLearner for DecisionTree<'a> { +impl WeakLearner for DecisionTree<'_> { type Hypothesis = DecisionTreeClassifier; @@ -222,7 +222,7 @@ impl<'a> WeakLearner for DecisionTree<'a> { /// This function returns a tuple `(c, l)` where -/// - `c` is the **confidence** for some label `y` +/// `c` is the **confidence** for some label `y` /// that minimizes the training loss. /// - `l` is the training loss when the confidence is `y`. /// diff --git a/src/weak_learner/naive_bayes/nbayes.rs b/src/weak_learner/naive_bayes/nbayes.rs index 98842cb..30e1950 100644 --- a/src/weak_learner/naive_bayes/nbayes.rs +++ b/src/weak_learner/naive_bayes/nbayes.rs @@ -16,7 +16,7 @@ use super::nbayes_classifier::*; pub struct GaussianNB {} -impl<'a> GaussianNB { +impl GaussianNB { /// Initializes the GaussianNB instance. pub fn init() -> Self { Self {} diff --git a/src/weak_learner/neural_network/nn_weak_learner.rs b/src/weak_learner/neural_network/nn_weak_learner.rs index fa77627..1d12149 100644 --- a/src/weak_learner/neural_network/nn_weak_learner.rs +++ b/src/weak_learner/neural_network/nn_weak_learner.rs @@ -13,7 +13,6 @@ use super::{ nn_hypothesis::*, }; -use rand; use rand::seq::index; const N_EPOCH: usize = 100; diff --git a/src/weak_learner/regression_tree/bin.rs b/src/weak_learner/regression_tree/bin.rs index 5782d36..148b1ce 100644 --- a/src/weak_learner/regression_tree/bin.rs +++ b/src/weak_learner/regression_tree/bin.rs @@ -112,8 +112,8 @@ impl Bins { // If the minimum value equals to the maximum one, // slightly perturb them. if min == max { - min = min - EPS; - max = max + EPS; + min -= EPS; + max += EPS; } @@ -145,7 +145,7 @@ impl Bins { let mut min = f64::MAX; let mut max = f64::MIN; feature.sample[..] - .into_iter() + .iter() .copied() .for_each(|(_, val)| { min = min.min(val); @@ -166,8 +166,8 @@ impl Bins { // If the minimum value equals to the maximum one, // slightly perturb them. if min == max { - min = min - EPS; - max = max + EPS; + min -= EPS; + max += EPS; } diff --git a/src/weak_learner/regression_tree/regression_tree_algorithm.rs b/src/weak_learner/regression_tree/regression_tree_algorithm.rs index b5774f2..b3d331f 100644 --- a/src/weak_learner/regression_tree/regression_tree_algorithm.rs +++ b/src/weak_learner/regression_tree/regression_tree_algorithm.rs @@ -156,7 +156,7 @@ impl<'a, L> RegressionTree<'a, L> { } -impl<'a, L> WeakLearner for RegressionTree<'a, L> +impl WeakLearner for RegressionTree<'_, L> where L: LossFunction, { type Hypothesis = RegressionTreeRegressor; @@ -175,7 +175,7 @@ impl<'a, L> WeakLearner for RegressionTree<'a, L> let info = Vec::from([ ("# of bins (max)", format!("{n_bins}")), ("Max depth", format!("{}", self.max_depth)), - ("Split criterion", format!("{}", self.loss_func.name())), + ("Split criterion", self.loss_func.name().to_string()), ("Regularization param.", format!("{}", self.lambda_l2)), ]); Some(info) diff --git a/tests/adaboost.rs b/tests/adaboost.rs index 2bb92cb..baf2ff3 100644 --- a/tests/adaboost.rs +++ b/tests/adaboost.rs @@ -12,7 +12,7 @@ pub mod adaboost_tests { let mut path = env::current_dir().unwrap(); path.push("tests/dataset/german.csv"); - let sample = SampleReader::new() + let sample = SampleReader::default() .file(path) .has_header(true) .target_feature("class") diff --git a/tests/adaboostv.rs b/tests/adaboostv.rs index d34b829..a6e69e2 100644 --- a/tests/adaboostv.rs +++ b/tests/adaboostv.rs @@ -11,7 +11,7 @@ pub mod adaboostv_tests { let mut path = env::current_dir().unwrap(); path.push("tests/dataset/german.csv"); - let sample = SampleReader::new() + let sample = SampleReader::default() .file(path) .has_header(true) .target_feature("class") diff --git a/tests/cerlpboost.rs b/tests/cerlpboost.rs index 8791a3d..65a86ce 100644 --- a/tests/cerlpboost.rs +++ b/tests/cerlpboost.rs @@ -11,7 +11,7 @@ pub mod cerlpboost_tests { let mut path = env::current_dir().unwrap(); path.push("tests/dataset/german.csv"); - let sample = SampleReader::new() + let sample = SampleReader::default() .file(path) .has_header(true) .target_feature("class") diff --git a/tests/erlpboost.rs b/tests/erlpboost.rs index 2ea079e..4812d65 100644 --- a/tests/erlpboost.rs +++ b/tests/erlpboost.rs @@ -30,7 +30,7 @@ pub mod erlpboost_tests { const TOLERANCE: f64 = 0.001; let path = "img/csv/breast-cancer-train.csv"; - let train = SampleReader::new() + let train = SampleReader::default() .file(path) .has_header(true) .target_feature("class") @@ -44,7 +44,7 @@ pub mod erlpboost_tests { let path = "img/csv/breast-cancer-test.csv"; - let test = SampleReader::new() + let test = SampleReader::default() .file(path) .has_header(true) .target_feature("class") diff --git a/tests/gbm.rs b/tests/gbm.rs index 6b221a5..31cd511 100644 --- a/tests/gbm.rs +++ b/tests/gbm.rs @@ -12,7 +12,7 @@ pub mod gbm_boston { let mut path = env::current_dir().unwrap(); path.push(format!("tests/dataset/{file}")); - let sample = SampleReader::new() + let sample = SampleReader::default() .file(path) .has_header(true) .target_feature("MedHouseVal") diff --git a/tests/graphsepboost.rs b/tests/graphsepboost.rs index 9a1584f..d0e294b 100644 --- a/tests/graphsepboost.rs +++ b/tests/graphsepboost.rs @@ -13,7 +13,7 @@ pub mod graphsepboost_tests { path.push("tests/dataset/german.csv"); // path.push("tests/dataset/iris_binary.csv"); - let sample = SampleReader::new() + let sample = SampleReader::default() .file(path) .has_header(true) .target_feature("class") diff --git a/tests/lpboost.rs b/tests/lpboost.rs index f6e9585..7c4af26 100644 --- a/tests/lpboost.rs +++ b/tests/lpboost.rs @@ -30,7 +30,7 @@ pub mod lpboost_tests { const TOLERANCE: f64 = 0.001; let path = "img/csv/breast-cancer-train.csv"; - let train = SampleReader::new() + let train = SampleReader::default() .file(path) .has_header(true) .target_feature("class") @@ -43,7 +43,7 @@ pub mod lpboost_tests { let path = "img/csv/breast-cancer-test.csv"; - let test = SampleReader::new() + let test = SampleReader::default() .file(path) .has_header(true) .target_feature("class") diff --git a/tests/madaboost.rs b/tests/madaboost.rs index 7be12f2..2a922cb 100644 --- a/tests/madaboost.rs +++ b/tests/madaboost.rs @@ -12,7 +12,7 @@ pub mod madaboost_tests { let mut path = env::current_dir().unwrap(); path.push("tests/dataset/german.csv"); - let sample = SampleReader::new() + let sample = SampleReader::default() .file(path) .has_header(true) .target_feature("class") diff --git a/tests/mlpboost.rs b/tests/mlpboost.rs index 360731c..48292e6 100644 --- a/tests/mlpboost.rs +++ b/tests/mlpboost.rs @@ -29,7 +29,7 @@ pub mod mlpboost_tests { const TIME_LIMIT: u128 = 60_000; // 1 minute as millisecond. let path = "img/csv/breast-cancer-train.csv"; - let train = SampleReader::new() + let train = SampleReader::default() .file(path) .has_header(true) .target_feature("class") @@ -41,7 +41,7 @@ pub mod mlpboost_tests { // let nu = 1.0; let path = "img/csv/breast-cancer-test.csv"; - let test = SampleReader::new() + let test = SampleReader::default() .file(path) .has_header(true) .target_feature("class") diff --git a/tests/smoothboost.rs b/tests/smoothboost.rs index c36b348..a18037d 100644 --- a/tests/smoothboost.rs +++ b/tests/smoothboost.rs @@ -12,7 +12,7 @@ pub mod smoothboost_tests { println!("path: {:?}", path); path.push("tests/dataset/german.csv"); - let sample = SampleReader::new() + let sample = SampleReader::default() .file(path) .has_header(true) .target_feature("class") diff --git a/tests/softboost.rs b/tests/softboost.rs index 2004047..76b7a1c 100644 --- a/tests/softboost.rs +++ b/tests/softboost.rs @@ -30,7 +30,7 @@ pub mod softboost_tests { const TOLERANCE: f64 = 0.001; let path = "img/csv/breast-cancer-train.csv"; - let train = SampleReader::new() + let train = SampleReader::default() .file(path) .has_header(true) .target_feature("class") @@ -44,7 +44,7 @@ pub mod softboost_tests { let path = "img/csv/breast-cancer-test.csv"; - let test = SampleReader::new() + let test = SampleReader::default() .file(path) .has_header(true) .target_feature("class")