diff --git a/.gitattributes b/.gitattributes index e69de29bb..c3cdc1f56 100644 --- a/.gitattributes +++ b/.gitattributes @@ -0,0 +1,2 @@ +pixi.lock merge=binary linguist-language=YAML linguist-generated=true +Cargo.lock merge=binary linguist-generated=true diff --git a/.github/workflows/python-bindings.yml b/.github/workflows/python-bindings.yml index b3bb161df..a5c620d47 100644 --- a/.github/workflows/python-bindings.yml +++ b/.github/workflows/python-bindings.yml @@ -35,6 +35,7 @@ jobs: - uses: prefix-dev/setup-pixi@v0.8.3 with: manifest-path: py-rattler/pixi.toml + environments: test - uses: actions-rust-lang/setup-rust-toolchain@v1 with: components: clippy, rustfmt @@ -44,6 +45,9 @@ jobs: pixi run -e test lint pixi run -e test fmt-check - name: Run tests + env: + RATTLER_TEST_R2_ACCESS_KEY_ID: ${{ secrets.RATTLER_TEST_R2_ACCESS_KEY_ID }} + RATTLER_TEST_R2_SECRET_ACCESS_KEY: ${{ secrets.RATTLER_TEST_R2_SECRET_ACCESS_KEY }} run: | cd py-rattler pixi run -e test test --color=yes diff --git a/Cargo.toml b/Cargo.toml index ba1fde538..8edd7baf0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ chrono = { version = "0.4.39", default-features = false, features = [ "alloc", ] } clap = { version = "4.5.29", features = ["derive"] } +clap-verbosity-flag = "3.0.1" cmake = "0.1.54" console = { version = "0.15.10", features = ["windows-console-colors"] } criterion = "0.5" @@ -105,6 +106,7 @@ memmap2 = "0.9.5" netrc-rs = "0.1.2" nom = "7.1.3" num_cpus = "1.16.0" +opendal = { version = "0.51.2", default-features = false } once_cell = "1.20.3" ouroboros = "0.18.5" parking_lot = "0.12.3" @@ -161,6 +163,7 @@ tokio-util = "0.7.13" tower = { version = "0.5.2", default-features = false } tower-http = { version = "0.6.2", default-features = false } tracing = "0.1.41" +tracing-log = "0.2.0" tracing-subscriber = { version = "0.3.19", default-features = false } tracing-test = { version = "0.2.5" } trybuild = { version = "1.0.103" } diff --git a/crates/rattler_conda_types/Cargo.toml b/crates/rattler_conda_types/Cargo.toml index 580641364..2afab5ba9 100644 --- a/crates/rattler_conda_types/Cargo.toml +++ b/crates/rattler_conda_types/Cargo.toml @@ -24,17 +24,25 @@ itertools = { workspace = true } lazy-regex = { workspace = true } nom = { workspace = true } purl = { workspace = true, features = ["serde"] } -rattler_digest = { path = "../rattler_digest", version = "1.0.6", default-features = false, features = ["serde"] } +rattler_digest = { path = "../rattler_digest", version = "1.0.6", default-features = false, features = [ + "serde", +] } rattler_macros = { path = "../rattler_macros", version = "1.0.6", default-features = false } +rattler_redaction = { version = "0.1.6", path = "../rattler_redaction", default-features = false } regex = { workspace = true } -simd-json = { workspace = true , features = ["serde_impl"]} +simd-json = { workspace = true, features = ["serde_impl"] } serde = { workspace = true, features = ["derive", "rc"] } serde_json = { workspace = true } serde_repr = { workspace = true } serde_with = { workspace = true, features = ["indexmap_2"] } serde-untagged = { workspace = true } serde_yaml = { workspace = true } -smallvec = { workspace = true, features = ["serde", "const_new", "const_generics", "union"] } +smallvec = { workspace = true, features = [ + "serde", + "const_new", + "const_generics", + "union", +] } strum = { workspace = true, features = ["derive"] } tempfile = { workspace = true } thiserror = { workspace = true } @@ -42,15 +50,22 @@ tracing = { workspace = true } typed-path = { workspace = true } url = { workspace = true, features = ["serde"] } indexmap = { workspace = true } -rattler_redaction = { version = "0.1.6", path = "../rattler_redaction" } dirs = { workspace = true } rayon = { workspace = true, optional = true } fs-err = { workspace = true } [dev-dependencies] rand = { workspace = true } -insta = { workspace = true, features = ["yaml", "redactions", "toml", "glob", "filters"] } -rattler_package_streaming = { path = "../rattler_package_streaming", default-features = false, features = ["rustls-tls"] } +insta = { workspace = true, features = [ + "yaml", + "redactions", + "toml", + "glob", + "filters", +] } +rattler_package_streaming = { path = "../rattler_package_streaming", default-features = false, features = [ + "rustls-tls", +] } rstest = { workspace = true } assert_matches = { workspace = true } hex-literal = { workspace = true } diff --git a/crates/rattler_index/Cargo.toml b/crates/rattler_index/Cargo.toml index d3157f28c..7c235b212 100644 --- a/crates/rattler_index/Cargo.toml +++ b/crates/rattler_index/Cargo.toml @@ -3,23 +3,64 @@ name = "rattler_index" version = "0.20.13" edition.workspace = true authors = [] -description = "A crate that indexes directories containing conda packages to create local conda channels" +description = "A crate to index conda channels and create a repodata.json file." categories.workspace = true homepage.workspace = true repository.workspace = true license.workspace = true readme.workspace = true +default-run = "rattler-index" + +[features] +default = ["rustls-tls"] +native-tls = [ + "reqwest/native-tls", + "reqwest/native-tls-alpn", + "rattler_package_streaming/native-tls", + "rattler_networking/native-tls", + "tools/native-tls", +] +rustls-tls = [ + "reqwest/rustls-tls", + "reqwest/rustls-tls-native-roots", + "rattler_package_streaming/rustls-tls", + "rattler_networking/rustls-tls", + "tools/rustls-tls", +] + +[[bin]] +name = "rattler-index" +path = "src/main.rs" [dependencies] +anyhow = { workspace = true } +clap = { workspace = true, features = ["derive", "env"] } +clap-verbosity-flag = { workspace = true } +console = { workspace = true } fs-err = { workspace = true } -rattler_conda_types = { path="../rattler_conda_types", version = "0.31.0", default-features = false } -rattler_digest = { path="../rattler_digest", version = "1.0.6", default-features = false } -rattler_package_streaming = { path="../rattler_package_streaming", version = "0.22.29", default-features = false } +fxhash = { workspace = true } +futures = { workspace = true } +indicatif = { workspace = true } +opendal = { workspace = true, features = [ + "services-s3", + "services-fs", +], default-features = false } +rattler_networking = { path = "../rattler_networking", version = "0.22.4", default-features = false } +rattler_conda_types = { path = "../rattler_conda_types", version = "0.31.0" } +rattler_digest = { path = "../rattler_digest", version = "1.0.6", default-features = false } +rattler_package_streaming = { path = "../rattler_package_streaming", version = "0.22.29", default-features = false } +reqwest = { workspace = true, default-features = false, features = [ + "http2", + "macos-system-configuration", + "charset", +] } serde_json = { workspace = true } -tempfile = { workspace = true } +tokio = { workspace = true, features = ["full"] } tracing = { workspace = true } -walkdir = { workspace = true } +tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } +tracing-log = { workspace = true } +url = { workspace = true } [dev-dependencies] tempfile = { workspace = true } -tools = { path = "../tools" } +tools = { path = "../tools", default-features = false } diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index 37bdac722..6ea2c85dc 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -2,31 +2,43 @@ //! files #![deny(missing_docs)] -use fs_err as fs; +use anyhow::Result; +use fs_err::{self as fs}; +use futures::future::try_join_all; +use fxhash::FxHashMap; +use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use rattler_conda_types::{ package::{ArchiveType, IndexJson, PackageFile}, ChannelInfo, PackageRecord, Platform, RepoData, }; +use rattler_networking::{Authentication, AuthenticationStorage}; use rattler_package_streaming::{read, seek}; use std::{ collections::{HashMap, HashSet}, - ffi::OsStr, - io::{Read, Write}, + io::{Cursor, Read}, path::{Path, PathBuf}, + str::FromStr, + sync::Arc, +}; +use tokio::sync::Semaphore; +use url::Url; + +use opendal::{ + services::{FsConfig, S3Config}, + Configurator, Operator, }; -use tempfile::NamedTempFile; -use walkdir::WalkDir; /// Extract the package record from an `index.json` file. pub fn package_record_from_index_json( - file: &Path, + package_as_bytes: impl AsRef<[u8]>, index_json_reader: &mut T, ) -> std::io::Result { let index = IndexJson::from_reader(index_json_reader)?; - let sha256_result = rattler_digest::compute_file_digest::(file)?; - let md5_result = rattler_digest::compute_file_digest::(file)?; - let size = fs::metadata(file)?.len(); + let sha256_result = + rattler_digest::compute_bytes_digest::(&package_as_bytes); + let md5_result = rattler_digest::compute_bytes_digest::(&package_as_bytes); + let size = package_as_bytes.as_ref().len(); let package_record = PackageRecord { name: index.name, @@ -36,7 +48,7 @@ pub fn package_record_from_index_json( subdir: index.subdir.unwrap_or_else(|| "unknown".to_string()), md5: Some(md5_result), sha256: Some(sha256_result), - size: Some(size), + size: Some(size as u64), arch: index.arch, platform: index.platform, depends: index.depends, @@ -63,12 +75,21 @@ pub fn package_record_from_index_json( /// and extract the package record from it. pub fn package_record_from_tar_bz2(file: &Path) -> std::io::Result { let reader = fs::File::open(file)?; + package_record_from_tar_bz2_reader(reader) +} + +/// Extract the package record from a `.tar.bz2` package file. +/// This function will look for the `info/index.json` file in the conda package +/// and extract the package record from it. +pub fn package_record_from_tar_bz2_reader(reader: impl Read) -> std::io::Result { + let bytes = reader.bytes().collect::, _>>()?; + let reader = Cursor::new(bytes.clone()); let mut archive = read::stream_tar_bz2(reader); for entry in archive.entries()?.flatten() { let mut entry = entry; let path = entry.path()?; if path.as_os_str().eq("info/index.json") { - return package_record_from_index_json(file, &mut entry); + return package_record_from_index_json(bytes, &mut entry); } } Err(std::io::Error::new( @@ -82,13 +103,22 @@ pub fn package_record_from_tar_bz2(file: &Path) -> std::io::Result std::io::Result { let reader = fs::File::open(file)?; + package_record_from_conda_reader(reader) +} + +/// Extract the package record from a `.conda` package file content. +/// This function will look for the `info/index.json` file in the conda package +/// and extract the package record from it. +pub fn package_record_from_conda_reader(reader: impl Read) -> std::io::Result { + let bytes = reader.bytes().collect::, _>>()?; + let reader = Cursor::new(bytes.clone()); let mut archive = seek::stream_conda_info(reader).expect("Could not open conda file"); for entry in archive.entries()?.flatten() { let mut entry = entry; let path = entry.path()?; if path.as_os_str().eq("info/index.json") { - return package_record_from_index_json(file, &mut entry); + return package_record_from_index_json(bytes, &mut entry); } } Err(std::io::Error::new( @@ -97,117 +127,319 @@ pub fn package_record_from_conda(file: &Path) -> std::io::Result )) } -/// Create a new `repodata.json` for all packages in the given output folder. If -/// `target_platform` is `Some`, only that specific subdir is indexed. Otherwise -/// indexes all subdirs and creates a `repodata.json` for each. -pub fn index(output_folder: &Path, target_platform: Option<&Platform>) -> std::io::Result<()> { - let entries = WalkDir::new(output_folder).into_iter(); - let entries: Vec<(PathBuf, ArchiveType)> = entries - .filter_entry(|e| e.depth() <= 2) - .filter_map(Result::ok) - .filter_map(|e| { - ArchiveType::split_str(e.path().to_string_lossy().as_ref()) - .map(|(p, t)| (PathBuf::from(format!("{}{}", p, t.extension())), t)) +async fn index_subdir( + subdir: Platform, + op: Operator, + force: bool, + progress: MultiProgress, + semaphore: Arc, +) -> Result<()> { + let mut registered_packages: FxHashMap = HashMap::default(); + if !force { + let repodata_path = format!("{subdir}/repodata.json"); + let repodata_bytes = op.read(&repodata_path).await; + let repodata: RepoData = match repodata_bytes { + Ok(bytes) => serde_json::from_slice(&bytes.to_vec())?, + Err(e) => { + if e.kind() != opendal::ErrorKind::NotFound { + return Err(e.into()); + } + tracing::info!("Could not find repodata.json. Creating new one."); + RepoData { + info: Some(ChannelInfo { + subdir: subdir.to_string(), + base_url: None, + }), + packages: HashMap::default(), + conda_packages: HashMap::default(), + removed: HashSet::default(), + version: Some(2), + } + } + }; + registered_packages.extend(repodata.packages.into_iter()); + registered_packages.extend(repodata.conda_packages.into_iter()); + tracing::debug!( + "Found {} already registered packages in {}/repodata.json.", + registered_packages.len(), + subdir + ); + } + let uploaded_packages: HashSet = op + .list_with(&format!("{}/", subdir.as_str())) + .await? + .iter() + .filter_map(|entry| { + if entry.metadata().mode().is_file() { + let filename = entry.name().to_string(); + // Check if the file is an archive package file. + ArchiveType::try_from(&filename).map(|_| filename) + } else { + None + } }) .collect(); - // find all subdirs - let mut platforms = entries + tracing::debug!( + "Found {} already uploaded packages in subdir {}.", + uploaded_packages.len(), + subdir + ); + + let packages_to_delete = registered_packages + .keys() + .cloned() + .collect::>() + .difference(&uploaded_packages) + .cloned() + .collect::>(); + + tracing::debug!( + "Deleting {} packages from subdir {}.", + packages_to_delete.len(), + subdir + ); + + for filename in packages_to_delete { + registered_packages.remove(&filename); + } + + let packages_to_add = uploaded_packages + .difference(®istered_packages.keys().cloned().collect::>()) + .cloned() + .collect::>(); + + tracing::debug!( + "Adding {} packages to subdir {}.", + packages_to_add.len(), + subdir + ); + + let pb = Arc::new(progress.add(ProgressBar::new(packages_to_add.len() as u64))); + let sty = ProgressStyle::with_template( + "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}", + ) + .unwrap() + .progress_chars("##-"); + pb.set_style(sty); + + let tasks = packages_to_add .iter() - .filter_map(|(p, _)| { - p.parent().and_then(Path::file_name).and_then(|file_name| { - let name = file_name.to_string_lossy().to_string(); - if name == "src_cache" { - None - } else { - Some(name) + .map(|filename| { + tokio::spawn({ + let op = op.clone(); + let filename = filename.clone(); + let pb = pb.clone(); + let semaphore = semaphore.clone(); + { + async move { + let _permit = semaphore + .acquire() + .await + .expect("Semaphore was unexpectedly closed"); + pb.set_message(format!( + "Indexing {} {}", + subdir.as_str(), + console::style(filename.clone()).dim() + )); + let file_path = format!("{subdir}/{filename}"); + let buffer = op.read(&file_path).await?; + let bytes = buffer.to_vec(); + let cursor = Cursor::new(bytes); + // We already know it's not None + let archive_type = ArchiveType::try_from(&filename).unwrap(); + let record = match archive_type { + ArchiveType::TarBz2 => package_record_from_tar_bz2_reader(cursor), + ArchiveType::Conda => package_record_from_conda_reader(cursor), + }?; + pb.inc(1); + Ok::<(String, PackageRecord), std::io::Error>((filename.clone(), record)) + } } }) }) - .collect::>(); + .collect::>(); + let results = try_join_all(tasks).await?; - // Always create noarch subdir - if !output_folder.join("noarch").exists() { - fs::create_dir(output_folder.join("noarch"))?; - } + pb.finish_with_message(format!("Finished {}", subdir.as_str())); - // Make sure that we index noarch if it is not already indexed - if !output_folder.join("noarch/repodata.json").exists() { - platforms.insert("noarch".to_string()); - } + tracing::debug!( + "Successfully added {} packages to subdir {}.", + results.len(), + subdir + ); - // Create target platform dir if needed - if let Some(target_platform) = target_platform { - let platform_str = target_platform.to_string(); - if !output_folder.join(&platform_str).exists() { - fs::create_dir(output_folder.join(&platform_str))?; - platforms.insert(platform_str); - } + for result in results { + let (filename, record) = result?; + registered_packages.insert(filename, record); } - for platform in platforms { - if let Some(target_platform) = target_platform { - if platform != target_platform.to_string() { - if platform == "noarch" { - // check that noarch is already indexed if it is not the target platform - if output_folder.join("noarch/repodata.json").exists() { - continue; - } - } else { - continue; - } + let mut packages: FxHashMap = HashMap::default(); + let mut conda_packages: FxHashMap = HashMap::default(); + for (filename, package) in registered_packages { + match ArchiveType::try_from(&filename) { + Some(ArchiveType::TarBz2) => { + packages.insert(filename, package); } + Some(ArchiveType::Conda) => { + conda_packages.insert(filename, package); + } + _ => panic!("Unknown archive type"), } + } - let mut repodata = RepoData { - info: Some(ChannelInfo { - subdir: platform.clone(), - base_url: None, - }), - packages: HashMap::default(), - conda_packages: HashMap::default(), - removed: HashSet::default(), - version: Some(2), - }; + let repodata = RepoData { + info: Some(ChannelInfo { + subdir: subdir.to_string(), + base_url: None, + }), + packages, + conda_packages, + removed: HashSet::default(), + version: Some(2), + }; - for (p, t) in entries.iter().filter_map(|(p, t)| { - p.parent().and_then(|parent| { - parent.file_name().and_then(|file_name| { - if file_name == OsStr::new(&platform) { - // If the file_name is the platform we're looking for, return Some((p, t)) - Some((p, t)) - } else { - // Otherwise, we return None to filter out this item - None - } - }) - }) - }) { - let record = match t { - ArchiveType::TarBz2 => package_record_from_tar_bz2(p), - ArchiveType::Conda => package_record_from_conda(p), - }; - let (Ok(record), Some(file_name)) = (record, p.file_name()) else { - tracing::info!("Could not read package record from {:?}", p); - continue; - }; - match t { - ArchiveType::TarBz2 => repodata - .packages - .insert(file_name.to_string_lossy().to_string(), record), - ArchiveType::Conda => repodata - .conda_packages - .insert(file_name.to_string_lossy().to_string(), record), - }; + let repodata_path = format!("{subdir}/repodata.json"); + let repodata_bytes = serde_json::to_vec(&repodata)?; + op.write(&repodata_path, repodata_bytes).await?; + // todo: also write repodata.json.bz2, repodata.json.zst, repodata.json.jlap and sharded repodata once available in rattler + // https://github.com/conda/rattler/issues/1096 + + Ok(()) +} + +/// Create a new `repodata.json` for all packages in the channel at the given directory. +pub async fn index_fs( + channel: impl Into, + target_platform: Option, + force: bool, + max_parallel: usize, +) -> anyhow::Result<()> { + let mut config = FsConfig::default(); + config.root = Some(channel.into().canonicalize()?.to_string_lossy().to_string()); + index(target_platform, config, force, max_parallel).await +} + +/// Create a new `repodata.json` for all packages in the channel at the given S3 URL. +#[allow(clippy::too_many_arguments)] +pub async fn index_s3( + channel: Url, + region: String, + endpoint_url: Url, + force_path_style: bool, + access_key_id: Option, + secret_access_key: Option, + session_token: Option, + target_platform: Option, + force: bool, + max_parallel: usize, +) -> anyhow::Result<()> { + let mut s3_config = S3Config::default(); + s3_config.root = Some(channel.path().to_string()); + s3_config.bucket = channel + .host_str() + .ok_or(anyhow::anyhow!("No bucket in S3 URL"))? + .to_string(); + s3_config.region = Some(region); + s3_config.endpoint = Some(endpoint_url.to_string()); + s3_config.enable_virtual_host_style = !force_path_style; + // Use credentials from the CLI if they are provided. + if let (Some(access_key_id), Some(secret_access_key)) = (access_key_id, secret_access_key) { + s3_config.secret_access_key = Some(secret_access_key); + s3_config.access_key_id = Some(access_key_id); + s3_config.session_token = session_token; + } else { + // If they're not provided, check rattler authentication storage for credentials. + let auth_storage = AuthenticationStorage::from_env_and_defaults()?; + let auth = auth_storage.get_by_url(channel)?; + if let ( + _, + Some(Authentication::S3Credentials { + access_key_id, + secret_access_key, + session_token, + }), + ) = auth + { + s3_config.access_key_id = Some(access_key_id); + s3_config.secret_access_key = Some(secret_access_key); + s3_config.session_token = session_token; } + } + index(target_platform, s3_config, force, max_parallel).await +} + +/// Create a new `repodata.json` for all packages in the given configurator's root. +/// If `target_platform` is `Some`, only that specific subdir is indexed. +/// Otherwise indexes all subdirs and creates a `repodata.json` for each. +/// +/// The process is the following: +/// 1. Get all subdirs and create `noarch` and `target_platform` if they do not exist. +/// 2. Iterate subdirs and index each subdir. +/// Therefore, we need to: +/// 1. Collect all uploaded packages in subdir +/// 2. Collect all registered packages from `repodata.json` (if exists) +/// 3. Determine which packages to add to and to delete from `repodata.json` +/// 4. Write `repodata.json` back +pub async fn index( + target_platform: Option, + config: T, + force: bool, + max_parallel: usize, +) -> anyhow::Result<()> { + let builder = config.into_builder(); + + // Get all subdirs + let op = Operator::new(builder)?.finish(); + let entries = op.list_with("").await?; + let mut subdirs = entries + .iter() + .filter_map(|entry| { + if entry.metadata().mode().is_dir() && entry.name() != "/" { + // Directory entries always end with `/`. + Some(entry.name().trim_end_matches('/').to_string()) + } else { + None + } + }) + .map(|s| Platform::from_str(&s)) + .collect::, _>>()?; - let mut out_file = - NamedTempFile::with_prefix_in("repodata-", output_folder.join(&platform))?; - out_file.write_all(serde_json::to_string_pretty(&repodata)?.as_bytes())?; - out_file.persist(output_folder.join(&platform).join("repodata.json"))?; + // If requested `target_platform` subdir does not exist, we create it. + if let Some(target_platform) = target_platform { + tracing::debug!("Did not find {target_platform} subdir, creating."); + if !subdirs.contains(&target_platform) { + op.create_dir(&format!("{}/", target_platform.as_str())) + .await?; + } + // Limit subdirs to only the requested `target_platform`. + subdirs = HashSet::default(); + subdirs.insert(target_platform); + } else if !subdirs.contains(&Platform::NoArch) { + // If `noarch` subdir does not exist, we create it. + tracing::debug!("Did not find noarch subdir, creating."); + op.create_dir(&format!("{}/", Platform::NoArch.as_str())) + .await?; + subdirs.insert(Platform::NoArch); } + let multi_progress = MultiProgress::new(); + let semaphore = Semaphore::new(max_parallel); + let semaphore = Arc::new(semaphore); + + let tasks = subdirs + .iter() + .map(|subdir| { + tokio::spawn(index_subdir( + *subdir, + op.clone(), + force, + multi_progress.clone(), + semaphore.clone(), + )) + }) + .collect::>(); + try_join_all(tasks).await?; + Ok(()) } - -// TODO: write proper unit tests for above functions diff --git a/crates/rattler_index/src/main.rs b/crates/rattler_index/src/main.rs new file mode 100644 index 000000000..dadb4601d --- /dev/null +++ b/crates/rattler_index/src/main.rs @@ -0,0 +1,133 @@ +use clap::{arg, Parser, Subcommand}; +use clap_verbosity_flag::Verbosity; +use rattler_conda_types::Platform; +use rattler_index::{index_fs, index_s3}; +use tracing_log::AsTrace; +use url::Url; + +fn parse_s3_url(value: &str) -> Result { + let url: Url = Url::parse(value).map_err(|e| format!("`{value}` isn't a valid URL: {e}"))?; + if url.scheme() == "s3" && url.host_str().is_some() { + Ok(url) + } else { + Err(format!( + "Only S3 URLs of format s3://bucket/... can be used, not `{value}`" + )) + } +} + +/// The `rattler-index` CLI. +#[derive(Parser)] +#[command(version, about, long_about = None)] +struct Cli { + #[command(subcommand)] + command: Commands, + + #[command(flatten)] + verbose: Verbosity, + + /// Whether to force the re-indexing of all packages. + /// Note that this will create a new repodata.json instead of updating the existing one. + #[arg(short, long, default_value = "false", global = true)] + force: bool, + + /// The maximum number of packages to process in-memory simultaneously. + /// This is necessary to limit memory usage when indexing large channels. + #[arg(long, default_value = "128", global = true)] + max_parallel: usize, + + /// A specific platform to index. + /// Defaults to all platforms available in the channel. + #[arg(long, global = true)] + target_platform: Option, +} + +/// The subcommands for the `rattler-index` CLI. +#[derive(Subcommand)] +#[allow(clippy::large_enum_variant)] +enum Commands { + /// Index a channel stored on the filesystem. + #[command(name = "fs")] + FileSystem { + /// The path to the channel directory. + #[arg()] + channel: std::path::PathBuf, + }, + + /// Index a channel stored in an S3 bucket. + S3 { + /// The S3 channel URL, e.g. `s3://my-bucket/my-channel`. + #[arg(value_parser = parse_s3_url)] + channel: Url, + + /// The endpoint URL of the S3 backend + #[arg( + long, + env = "S3_ENDPOINT_URL", + default_value = "https://s3.amazonaws.com" + )] + endpoint_url: Url, + + /// The region of the S3 backend + #[arg(long, env = "S3_REGION", default_value = "eu-central-1")] + region: String, + + /// Whether to use path-style S3 URLs + #[arg(long, env = "S3_FORCE_PATH_STYLE", default_value = "false")] + force_path_style: bool, + + /// The access key ID for the S3 bucket. + #[arg(long, env = "S3_ACCESS_KEY_ID", requires_all = ["secret_access_key"])] + access_key_id: Option, + + /// The secret access key for the S3 bucket. + #[arg(long, env = "S3_SECRET_ACCESS_KEY", requires_all = ["access_key_id"])] + secret_access_key: Option, + + /// The session token for the S3 bucket. + #[arg(long, env = "S3_SESSION_TOKEN", requires_all = ["access_key_id", "secret_access_key"])] + session_token: Option, + }, +} + +/// Entry point of the `rattler-index` cli. +#[tokio::main] +async fn main() -> anyhow::Result<()> { + // Parse the command line arguments + let cli = Cli::parse(); + + tracing_subscriber::FmtSubscriber::builder() + .with_max_level(cli.verbose.log_level_filter().as_trace()) + .init(); + + match cli.command { + Commands::FileSystem { channel } => { + index_fs(channel, cli.target_platform, cli.force, cli.max_parallel).await + } + Commands::S3 { + channel, + region, + endpoint_url, + force_path_style, + access_key_id, + secret_access_key, + session_token, + } => { + index_s3( + channel, + region, + endpoint_url, + force_path_style, + access_key_id, + secret_access_key, + session_token, + cli.target_platform, + cli.force, + cli.max_parallel, + ) + .await + } + }?; + println!("Finished indexing channel."); + Ok(()) +} diff --git a/crates/rattler_index/tests/test_index.rs b/crates/rattler_index/tests/test_index.rs index e789253ef..5fd83aeec 100644 --- a/crates/rattler_index/tests/test_index.rs +++ b/crates/rattler_index/tests/test_index.rs @@ -5,31 +5,39 @@ use std::{ }; use rattler_conda_types::Platform; -use rattler_index::index; +use rattler_index::index_fs; use serde_json::Value; fn test_data_dir() -> PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data") } -#[test] -fn test_index() { +#[tokio::test] +async fn test_index() { let temp_dir = tempfile::tempdir().unwrap(); let subdir_path = Path::new("win-64"); - let conda_file_path = tools::download_and_cache_file( - "https://conda.anaconda.org/conda-forge/win-64/conda-22.11.1-py38haa244fe_1.conda" - .parse() - .unwrap(), - "a8a44c5ff2b2f423546d49721ba2e3e632233c74a813c944adf8e5742834930e", - ) + let conda_file_path = tokio::task::spawn_blocking(|| { + tools::download_and_cache_file( + "https://conda.anaconda.org/conda-forge/win-64/conda-22.11.1-py38haa244fe_1.conda" + .parse() + .unwrap(), + "a8a44c5ff2b2f423546d49721ba2e3e632233c74a813c944adf8e5742834930e", + ) + }) + .await + .unwrap() .unwrap(); let index_json_path = Path::new("conda-22.11.1-py38haa244fe_1-index.json"); - let tar_bz2_file_path = tools::download_and_cache_file( - "https://conda.anaconda.org/conda-forge/win-64/conda-22.9.0-py38haa244fe_2.tar.bz2" - .parse() - .unwrap(), - "3c2c2e8e81bde5fb1ac4b014f51a62411feff004580c708c97a0ec2b7058cdc4", - ) + let tar_bz2_file_path = tokio::task::spawn_blocking(|| { + tools::download_and_cache_file( + "https://conda.anaconda.org/conda-forge/win-64/conda-22.9.0-py38haa244fe_2.tar.bz2" + .parse() + .unwrap(), + "3c2c2e8e81bde5fb1ac4b014f51a62411feff004580c708c97a0ec2b7058cdc4", + ) + }) + .await + .unwrap() .unwrap(); fs::create_dir(temp_dir.path().join(subdir_path)).unwrap(); @@ -50,7 +58,7 @@ fn test_index() { ) .unwrap(); - let res = index(temp_dir.path(), Some(&Platform::Win64)); + let res = index_fs(temp_dir.path(), Some(Platform::Win64), true, 100).await; assert!(res.is_ok()); let repodata_path = temp_dir.path().join(subdir_path).join("repodata.json"); @@ -84,10 +92,16 @@ fn test_index() { ); } -#[test] -fn test_index_empty_directory() { +#[tokio::test] +async fn test_index_empty_directory_creates_noarch_repodata() { let temp_dir = tempfile::tempdir().unwrap(); - let res = index(temp_dir.path(), None); + let noarch_path = temp_dir.path().join("noarch"); + let repodata_path = noarch_path.join("repodata.json"); + + let res = index_fs(temp_dir.path(), None, true, 100).await; + assert!(res.is_ok()); - assert_eq!(fs::read_dir(temp_dir).unwrap().count(), 0); + assert!(noarch_path.is_dir()); + assert_eq!(fs::read_dir(&noarch_path).unwrap().count(), 1); + assert!(repodata_path.is_file()); } diff --git a/crates/rattler_menuinst/src/macos.rs b/crates/rattler_menuinst/src/macos.rs index 871b61172..c02ebdc6e 100644 --- a/crates/rattler_menuinst/src/macos.rs +++ b/crates/rattler_menuinst/src/macos.rs @@ -897,16 +897,16 @@ mod tests { for item in &parsed_schema.menu_items { let icon = item.command.icon.as_ref().unwrap(); for ext in &["icns", "png", "svg"] { - placeholders.insert("ICON_EXT".to_string(), ext.to_string()); + placeholders.insert("ICON_EXT".to_string(), (*ext).to_string()); let icon_path = icon.resolve(FakePlaceholders { placeholders: placeholders.clone(), }); - fs::write(&icon_path, &[]).unwrap(); + fs::write(&icon_path, []).unwrap(); } } fs::create_dir_all(prefix_path.join("bin")).unwrap(); - fs::write(prefix_path.join("bin/python"), &[]).unwrap(); + fs::write(prefix_path.join("bin/python"), []).unwrap(); Self { _tmp_dir: tmp_dir, @@ -926,8 +926,8 @@ mod tests { let fake_prefix = FakePrefix::new(Path::new("spyder/menu.json")); let placeholders = super::BaseMenuItemPlaceholders::new( - &fake_prefix.prefix(), - &fake_prefix.prefix(), + fake_prefix.prefix(), + fake_prefix.prefix(), rattler_conda_types::Platform::current(), ); diff --git a/crates/rattler_package_streaming/Cargo.toml b/crates/rattler_package_streaming/Cargo.toml index 34cbce506..c4baabd85 100644 --- a/crates/rattler_package_streaming/Cargo.toml +++ b/crates/rattler_package_streaming/Cargo.toml @@ -19,10 +19,15 @@ num_cpus = { workspace = true } rattler_conda_types = { path = "../rattler_conda_types", version = "0.31.0", default-features = false } rattler_digest = { path = "../rattler_digest", version = "1.0.6", default-features = false } rattler_networking = { path = "../rattler_networking", version = "0.22.5", default-features = false } -rattler_redaction = { version = "0.1.6", path = "../rattler_redaction", features = ["reqwest", "reqwest-middleware"] } +rattler_redaction = { version = "0.1.6", path = "../rattler_redaction", features = [ + "reqwest", + "reqwest-middleware", +], default-features = false } reqwest = { workspace = true, features = ["stream"], optional = true } reqwest-middleware = { workspace = true, optional = true } -simple_spawn_blocking = { version = "1.1.0", path = "../simple_spawn_blocking", features = ["tokio"] } +simple_spawn_blocking = { version = "1.1.0", path = "../simple_spawn_blocking", features = [ + "tokio", +] } serde_json = { workspace = true } tar = { workspace = true } tempfile = { workspace = true } @@ -36,8 +41,8 @@ zstd = { workspace = true, features = ["zstdmt"] } [features] default = ["native-tls"] -native-tls = ["rattler_networking/native-tls"] -rustls-tls = ["rattler_networking/rustls-tls"] +native-tls = ["rattler_networking/native-tls", "rattler_redaction/native-tls"] +rustls-tls = ["rattler_networking/rustls-tls", "rattler_redaction/rustls-tls"] wasm = ["zstd/wasm"] reqwest = ["dep:reqwest-middleware", "dep:reqwest"] diff --git a/crates/rattler_redaction/Cargo.toml b/crates/rattler_redaction/Cargo.toml index 6bd07737f..9aad3e28c 100644 --- a/crates/rattler_redaction/Cargo.toml +++ b/crates/rattler_redaction/Cargo.toml @@ -10,7 +10,12 @@ repository.workspace = true license.workspace = true readme.workspace = true +[features] +default = ["rustls-tls"] +native-tls = ["reqwest/native-tls", "reqwest/native-tls-alpn"] +rustls-tls = ["reqwest/rustls-tls", "reqwest/rustls-tls-native-roots"] + [dependencies] url = { workspace = true } -reqwest = { workspace = true, optional = true } +reqwest = { workspace = true, optional = true, default-features = false } reqwest-middleware = { workspace = true, optional = true } diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml index ec27e53ed..548278d68 100644 --- a/crates/tools/Cargo.toml +++ b/crates/tools/Cargo.toml @@ -4,6 +4,11 @@ version = "0.1.0" edition = "2021" publish = false +[features] +default = ["rustls-tls"] +native-tls = ["reqwest/native-tls"] +rustls-tls = ["reqwest/rustls-tls"] + [dependencies] anyhow = { workspace = true } bindgen = { workspace = true } @@ -15,7 +20,9 @@ fslock = { workspace = true } dirs = { workspace = true } thiserror = { workspace = true } rattler_digest = { path = "../rattler_digest" } -reqwest = { workspace = true, default-features = false, features = ["blocking", "rustls-tls"] } +reqwest = { workspace = true, default-features = false, features = [ + "blocking", +] } tempfile = { workspace = true } tokio = { workspace = true } diff --git a/py-rattler/pixi.lock b/py-rattler/pixi.lock index f2ca53c19..b48ad7eba 100644 --- a/py-rattler/pixi.lock +++ b/py-rattler/pixi.lock @@ -885,7 +885,14 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/zstandard-0.23.0-py39h08a7858_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda + - pypi: https://files.pythonhosted.org/packages/ac/6e/faf7c6c3ae59641c75023fb5dcc8a02c33752ac8ccadf9931e8d8364f2fe/boto3-1.7.84-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/01/b7/cb08cd1af2bb0d0dfb393101a93b6ab6fb80f109ab7b37f2f34386c11351/botocore-1.10.84-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7a/7c/d7b2a0417af6428440c0ad7cb9799073e507b1a465f827d058b826236964/numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl + - pypi: https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d7/14/2a0004d487464d120c9fb85313a75cd3d71a7506955be458eebfe19a6b1d/s3transfer-0.1.13-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c4/83/07c5cd92ead27ecf48e3d12d0164ac0f6a7990b4d0d47c297398c285627a/types_networkx-3.3.0.20241020-py3-none-any.whl osx-64: - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-python-1.1.0-py39h7c0e7c0_2.conda @@ -946,7 +953,14 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-64/xz-5.2.6-h775f41a_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-64/zstandard-0.23.0-py39hc23f734_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.6-h915ae27_0.conda + - pypi: https://files.pythonhosted.org/packages/ac/6e/faf7c6c3ae59641c75023fb5dcc8a02c33752ac8ccadf9931e8d8364f2fe/boto3-1.7.84-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/01/b7/cb08cd1af2bb0d0dfb393101a93b6ab6fb80f109ab7b37f2f34386c11351/botocore-1.10.84-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/9a/cd/d5b0402b801c8a8b56b04c1e85c6165efab298d2f0ab741c2406516ede3a/numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl + - pypi: https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d7/14/2a0004d487464d120c9fb85313a75cd3d71a7506955be458eebfe19a6b1d/s3transfer-0.1.13-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c4/83/07c5cd92ead27ecf48e3d12d0164ac0f6a7990b4d0d47c297398c285627a/types_networkx-3.3.0.20241020-py3-none-any.whl osx-arm64: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-python-1.1.0-py39hfa9831e_2.conda @@ -1007,7 +1021,14 @@ environments: - conda: https://conda.anaconda.org/conda-forge/osx-arm64/xz-5.2.6-h57fd34a_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstandard-0.23.0-py39hcf1bb16_1.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstd-1.5.6-hb46c0d2_0.conda + - pypi: https://files.pythonhosted.org/packages/ac/6e/faf7c6c3ae59641c75023fb5dcc8a02c33752ac8ccadf9931e8d8364f2fe/boto3-1.7.84-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/01/b7/cb08cd1af2bb0d0dfb393101a93b6ab6fb80f109ab7b37f2f34386c11351/botocore-1.10.84-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/14/27/638aaa446f39113a3ed38b37a66243e21b38110d021bfcb940c383e120f2/numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl + - pypi: https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d7/14/2a0004d487464d120c9fb85313a75cd3d71a7506955be458eebfe19a6b1d/s3transfer-0.1.13-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c4/83/07c5cd92ead27ecf48e3d12d0164ac0f6a7990b4d0d47c297398c285627a/types_networkx-3.3.0.20241020-py3-none-any.whl win-64: - conda: https://conda.anaconda.org/conda-forge/win-64/brotli-python-1.1.0-py39ha51f57c_2.conda @@ -1076,7 +1097,14 @@ environments: - conda: https://conda.anaconda.org/conda-forge/win-64/xz-5.2.6-h8d14728_0.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/win-64/zstandard-0.23.0-py39h9bf74da_1.conda - conda: https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.6-h0ea2cb4_0.conda + - pypi: https://files.pythonhosted.org/packages/ac/6e/faf7c6c3ae59641c75023fb5dcc8a02c33752ac8ccadf9931e8d8364f2fe/boto3-1.7.84-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/01/b7/cb08cd1af2bb0d0dfb393101a93b6ab6fb80f109ab7b37f2f34386c11351/botocore-1.10.84-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/63/38/6cc19d6b8bfa1d1a459daf2b3fe325453153ca7019976274b6f33d8b5663/numpy-1.24.4-cp39-cp39-win_amd64.whl + - pypi: https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d7/14/2a0004d487464d120c9fb85313a75cd3d71a7506955be458eebfe19a6b1d/s3transfer-0.1.13-py2.py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c4/83/07c5cd92ead27ecf48e3d12d0164ac0f6a7990b4d0d47c297398c285627a/types_networkx-3.3.0.20241020-py3-none-any.whl packages: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -1145,6 +1173,25 @@ packages: purls: [] size: 5682777 timestamp: 1729655371045 +- pypi: https://files.pythonhosted.org/packages/ac/6e/faf7c6c3ae59641c75023fb5dcc8a02c33752ac8ccadf9931e8d8364f2fe/boto3-1.7.84-py2.py3-none-any.whl + name: boto3 + version: 1.7.84 + sha256: 0ed4b107c3b4550547aaec3c9bb17df068ff92d1f6f4781205800e2cb8a66de5 + requires_dist: + - botocore>=1.10.84,<1.11.0 + - jmespath>=0.7.1,<1.0.0 + - s3transfer>=0.1.10,<0.2.0 +- pypi: https://files.pythonhosted.org/packages/01/b7/cb08cd1af2bb0d0dfb393101a93b6ab6fb80f109ab7b37f2f34386c11351/botocore-1.10.84-py2.py3-none-any.whl + name: botocore + version: 1.10.84 + sha256: 380852e1adb9ba4ba9ff096af61f88a6888197b86e580e1bd786f04ebe6f9c0c + requires_dist: + - python-dateutil>=2.1,<2.7.0 ; python_full_version == '2.6.*' + - python-dateutil>=2.1,<3.0.0 ; python_full_version >= '2.7' + - jmespath>=0.7.1,<1.0.0 + - docutils>=0.10 + - ordereddict==1.1 ; python_full_version == '2.6.*' + - simplejson==3.3.0 ; python_full_version == '2.6.*' - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py312h2ec8cdc_2.conda sha256: f2a59ccd20b4816dea9a2a5cb917eb69728271dbf1aeab4e1b7e609330a50b6f md5: b0b867af6fc74b2a0aa206da29c0f3cf @@ -1679,6 +1726,11 @@ packages: license_family: PSF size: 24062 timestamp: 1615232388757 +- pypi: https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl + name: docutils + version: 0.21.2 + sha256: dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 + requires_python: '>=3.9' - conda: https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.2.2-pyhd8ed1ab_0.conda sha256: e0edd30c4b7144406bb4da975e6bb97d6bc9c0e999aa4efe66ae108cada5d5b5 md5: d02ae936e42063ca46af6cdad2dbd1e0 @@ -2096,6 +2148,11 @@ packages: license_family: BSD size: 111565 timestamp: 1715127275924 +- pypi: https://files.pythonhosted.org/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl + name: jmespath + version: 0.10.0 + sha256: cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f + requires_python: '>=2.6,!=3.0.*,!=3.1.*,!=3.2.*' - conda: https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-3.10.0-he073ed8_17.conda sha256: c28d69ca84533f0e2093f17ae6d3e19ee3661dd397618630830b1b9afc3bfb4d md5: 285931bd28b3b8f176d46dd9fd627a09 @@ -4389,6 +4446,13 @@ packages: purls: [] size: 17024927 timestamp: 1727718943163 +- pypi: https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl + name: python-dateutil + version: 2.9.0.post0 + sha256: a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 + requires_dist: + - six>=1.5 + requires_python: '>=2.7,!=3.0.*,!=3.1.*,!=3.2.*' - conda: https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0-pyhd8ed1ab_0.conda sha256: f3ceef02ac164a8d3a080d0d32f8e2ebe10dd29e3a685d240e38b3599e146320 md5: 2cf4264fffb9e6eff6031c5b6884d61c @@ -4884,6 +4948,13 @@ packages: purls: [] size: 36747126 timestamp: 1740489714324 +- pypi: https://files.pythonhosted.org/packages/d7/14/2a0004d487464d120c9fb85313a75cd3d71a7506955be458eebfe19a6b1d/s3transfer-0.1.13-py2.py3-none-any.whl + name: s3transfer + version: 0.1.13 + sha256: c7a9ec356982d5e9ab2d4b46391a7d6a950e2b04c472419f5fdec70cc0ada72f + requires_dist: + - botocore>=1.3.0,<2.0.0 + - futures>=2.2.0,<4.0.0 ; python_full_version >= '2.6' and python_full_version < '2.8' - conda: https://conda.anaconda.org/conda-forge/noarch/setuptools-75.1.0-pyhd8ed1ab_0.conda sha256: 6725235722095c547edd24275053c615158d6163f396550840aebd6e209e4738 md5: d5cd48392c67fb6849ba459c2c2b671f @@ -4906,6 +4977,11 @@ packages: - pkg:pypi/shellingham?source=hash-mapping size: 14568 timestamp: 1698144516278 +- pypi: https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl + name: six + version: 1.17.0 + sha256: 4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 + requires_python: '>=2.7,!=3.0.*,!=3.1.*,!=3.2.*' - conda: https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2 sha256: a85c38227b446f42c5b90d9b642f2c0567880c15d72492d8da074a59c8f91dd6 md5: e5f25f8dbc060e9a8d912e432202afc2 diff --git a/py-rattler/pixi.toml b/py-rattler/pixi.toml index e5020de92..1b92812df 100644 --- a/py-rattler/pixi.toml +++ b/py-rattler/pixi.toml @@ -2,8 +2,8 @@ name = "py-rattler" description = "Add a short description here" authors = [ - "Bas Zalmstra ", - "Tarun Pratap Singh ", + "Bas Zalmstra ", + "Tarun Pratap Singh ", ] channels = ["conda-forge"] platforms = ["win-64", "linux-64", "osx-64", "osx-arm64"] @@ -39,6 +39,8 @@ typer = "*" [feature.test.pypi-dependencies] types-networkx = "*" +# boto3 on conda-forge requires python >=3.10 +boto3 = "*" [feature.test.tasks] test = { cmd = "pytest --doctest-modules", depends-on = ["build"] } diff --git a/py-rattler/rattler/index/__init__.py b/py-rattler/rattler/index/__init__.py index 9023b5dd1..060834cbb 100644 --- a/py-rattler/rattler/index/__init__.py +++ b/py-rattler/rattler/index/__init__.py @@ -1,3 +1,3 @@ -from rattler.index.index import index +from rattler.index.index import index_fs, index_s3 -__all__ = ["index"] +__all__ = ["index_s3", "index_fs"] diff --git a/py-rattler/rattler/index/index.py b/py-rattler/rattler/index/index.py index a55cce8e8..f8c716508 100644 --- a/py-rattler/rattler/index/index.py +++ b/py-rattler/rattler/index/index.py @@ -1,14 +1,17 @@ from __future__ import annotations + import os from typing import Optional from rattler.platform import Platform -from rattler.rattler import py_index +from rattler.rattler import py_index_fs, py_index_s3 -def index( +async def index_fs( channel_directory: os.PathLike[str], target_platform: Optional[Platform] = None, + force: bool = False, + max_parallel: int = 128, ) -> None: """ Indexes dependencies in the `channel_directory` for one or more subdirectories within said directory. @@ -21,8 +24,58 @@ def index( channel_directory: A `os.PathLike[str]` that is the directory containing subdirectories of dependencies to index. target_platform(optional): A `Platform` to index dependencies for. + force: Whether to forcefully re-index all subdirs. + max_parallel: The maximum number of packages to process in-memory simultaneously. """ - py_index( + await py_index_fs( channel_directory, target_platform._inner if target_platform else target_platform, + force, + max_parallel, + ) + + +async def index_s3( + channel_url: str, + region: str, + endpoint_url: str, + force_path_style: bool = False, + access_key_id: Optional[str] = None, + secret_access_key: Optional[str] = None, + session_token: Optional[str] = None, + target_platform: Optional[Platform] = None, + force: bool = False, + max_parallel: int = 128, +) -> None: + """ + Indexes dependencies in the `channel_url` for one or more subdirectories in the S3 directory. + Will generate repodata.json files in each subdirectory containing metadata about each present package, + or if `target_platform` is specified will only consider the subdirectory corresponding to this platform. + Will always index the "noarch" subdirectory, and thus this subdirectory should always be present, because + conda channels at a minimum must include this subdirectory. + + Arguments: + channel_url: An S3 URL (e.g., s3://my-bucket/my-channel that containins the subdirectories + of dependencies to index. + region: The region of the S3 bucket. + endpoint_url: The endpoint URL of the S3 bucket. + force_path_style: Whether to use path-style addressing for S3. + access_key_id(optional): The access key ID to use for authentication. + secret_access_key(optional): The secret access key to use for authentication. + session_token(optional): The session token to use for authentication. + target_platform(optional): A `Platform` to index dependencies for. + force: Whether to forcefully re-index all subdirs. + max_parallel: The maximum number of packages to process in-memory simultaneously. + """ + await py_index_s3( + channel_url, + region, + endpoint_url, + force_path_style, + access_key_id, + secret_access_key, + session_token, + target_platform._inner if target_platform else target_platform, + force, + max_parallel, ) diff --git a/py-rattler/src/index.rs b/py-rattler/src/index.rs index 17a04c4a3..5658441f9 100644 --- a/py-rattler/src/index.rs +++ b/py-rattler/src/index.rs @@ -1,23 +1,63 @@ -use pyo3::{pyfunction, PyResult, Python}; +use pyo3::{pyfunction, Bound, PyAny, PyResult, Python}; +use pyo3_async_runtimes::tokio::future_into_py; use rattler_conda_types::Platform; -use rattler_index::index; +use rattler_index::{index_fs, index_s3}; +use url::Url; use std::path::PathBuf; use crate::{error::PyRattlerError, platform::PyPlatform}; #[pyfunction] -#[pyo3(signature = (channel_directory, target_platform=None))] -pub fn py_index( +#[pyo3(signature = (channel_directory, target_platform=None, force=false, max_parallel=128))] +pub fn py_index_fs( py: Python<'_>, channel_directory: PathBuf, target_platform: Option, -) -> PyResult<()> { - py.allow_threads(move || { - let path = channel_directory.as_path(); - match index(path, target_platform.map(Platform::from).as_ref()) { - Ok(_v) => Ok(()), - Err(e) => Err(PyRattlerError::from(e).into()), - } + force: bool, + max_parallel: usize, +) -> PyResult> { + future_into_py(py, async move { + let target_platform = target_platform.map(Platform::from); + index_fs(channel_directory, target_platform, force, max_parallel) + .await + .map_err(|e| PyRattlerError::from(e).into()) + }) +} + +#[pyfunction] +#[allow(clippy::too_many_arguments)] +#[pyo3(signature = (channel_url, region, endpoint_url, force_path_style, access_key_id=None,secret_access_key=None, session_token=None, target_platform=None, force=false, max_parallel=128))] +pub fn py_index_s3( + py: Python<'_>, + channel_url: String, + region: String, + endpoint_url: String, + force_path_style: bool, + access_key_id: Option, + secret_access_key: Option, + session_token: Option, + target_platform: Option, + force: bool, + max_parallel: usize, +) -> PyResult> { + let channel_url = Url::parse(&channel_url).map_err(PyRattlerError::from)?; + let endpoint_url = Url::parse(&endpoint_url).map_err(PyRattlerError::from)?; + let target_platform = target_platform.map(Platform::from); + future_into_py(py, async move { + index_s3( + channel_url, + region, + endpoint_url, + force_path_style, + access_key_id, + secret_access_key, + session_token, + target_platform, + force, + max_parallel, + ) + .await + .map_err(|e| PyRattlerError::from(e).into()) }) } diff --git a/py-rattler/src/lib.rs b/py-rattler/src/lib.rs index 0416ec35a..fb25ea572 100644 --- a/py-rattler/src/lib.rs +++ b/py-rattler/src/lib.rs @@ -41,7 +41,7 @@ use error::{ }; use explicit_environment_spec::{PyExplicitEnvironmentEntry, PyExplicitEnvironmentSpec}; use generic_virtual_package::PyGenericVirtualPackage; -use index::py_index; +use index::{py_index_fs, py_index_s3}; use index_json::PyIndexJson; use installer::py_install; use lock::{ @@ -160,7 +160,8 @@ fn rattler<'py>(py: Python<'py>, m: Bound<'py, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(py_solve_with_sparse_repodata, &m).unwrap())?; m.add_function(wrap_pyfunction!(get_rattler_version, &m).unwrap())?; m.add_function(wrap_pyfunction!(py_install, &m).unwrap())?; - m.add_function(wrap_pyfunction!(py_index, &m).unwrap())?; + m.add_function(wrap_pyfunction!(py_index_fs, &m).unwrap())?; + m.add_function(wrap_pyfunction!(py_index_s3, &m).unwrap())?; m.add_function(wrap_pyfunction!(package_streaming::extract_tar_bz2, &m).unwrap())?; m.add_function(wrap_pyfunction!(package_streaming::extract, &m).unwrap())?; diff --git a/py-rattler/tests/unit/test_index.py b/py-rattler/tests/unit/test_index.py index f79bc6c1d..c8e894982 100644 --- a/py-rattler/tests/unit/test_index.py +++ b/py-rattler/tests/unit/test_index.py @@ -1,10 +1,18 @@ # type: ignore import os +import shutil +import uuid +from dataclasses import dataclass, field from pathlib import Path +from typing import Iterator + +import boto3 import pytest -import shutil -from rattler import Platform, index +from rattler import Platform +from rattler.index import index_fs, index_s3 + +# ------------------------------------ FILESYSTEM ------------------------------------ # @pytest.fixture @@ -18,8 +26,9 @@ def package_directory(tmp_path, package_file_ruff: Path, package_file_pytweening return tmp_path -def test_index(package_directory): - index(package_directory) +@pytest.mark.asyncio +async def test_index(package_directory): + await index_fs(package_directory) assert set(os.listdir(package_directory)) == {"noarch", "win-64"} assert "repodata.json" in os.listdir(package_directory / "win-64") @@ -30,19 +39,18 @@ def test_index(package_directory): assert "pytweening-1.0.4-pyhd8ed1ab_0" in f.read() -def test_index_specific_subdir_non_noarch(package_directory): - index(package_directory, Platform("win-64")) +@pytest.mark.asyncio +async def test_index_specific_subdir_non_noarch(package_directory): + await index_fs(package_directory, Platform("win-64")) assert "repodata.json" in os.listdir(package_directory / "win-64") with open(package_directory / "win-64/repodata.json") as f: assert "ruff-0.0.171-py310h298983d_0" in f.read() - assert "repodata.json" in os.listdir(package_directory / "noarch") - with open(package_directory / "noarch/repodata.json") as f: - assert "pytweening-1.0.4-pyhd8ed1ab_0" in f.read() -def test_index_specific_subdir_noarch(package_directory): - index(package_directory, Platform("noarch")) +@pytest.mark.asyncio +async def test_index_specific_subdir_noarch(package_directory): + await index_fs(package_directory, Platform("noarch")) win_files = os.listdir(package_directory / "win-64") assert "repodata.json" not in win_files @@ -50,3 +58,90 @@ def test_index_specific_subdir_noarch(package_directory): assert "repodata.json" in os.listdir(package_directory / "noarch") with open(package_directory / "noarch/repodata.json") as f: assert "pytweening-1.0.4-pyhd8ed1ab_0" in f.read() + + +# ---------------------------------------- S3 ---------------------------------------- # + + +@dataclass +class S3Config: + access_key_id: str + secret_access_key: str + region: str = "auto" + endpoint_url: str = "https://e1a7cde76f1780ec06bac859036dbaf7.r2.cloudflarestorage.com" + bucket_name: str = "rattler-build-upload-test" + channel_name: str = field(default_factory=lambda: f"channel{uuid.uuid4()}") + + +@pytest.fixture() +def s3_config() -> S3Config: + access_key_id = os.environ.get("RATTLER_TEST_R2_ACCESS_KEY_ID") + if not access_key_id: + pytest.skip("RATTLER_TEST_R2_ACCESS_KEY_ID environment variable is not set") + secret_access_key = os.environ.get("RATTLER_TEST_R2_SECRET_ACCESS_KEY") + if not secret_access_key: + pytest.skip("RATTLER_TEST_R2_SECRET_ACCESS_KEY environment variable is not set") + return S3Config( + access_key_id=access_key_id, + secret_access_key=secret_access_key, + ) + + +@pytest.fixture() +def s3_client(s3_config: S3Config): + return boto3.client( + service_name="s3", + endpoint_url=s3_config.endpoint_url, + aws_access_key_id=s3_config.access_key_id, + aws_secret_access_key=s3_config.secret_access_key, + region_name=s3_config.region, + ) + + +@pytest.fixture() +def s3_channel(s3_config: S3Config, s3_client) -> Iterator[str]: + channel_url = f"s3://{s3_config.bucket_name}/{s3_config.channel_name}" + + yield channel_url + + # Clean up the channel after the test + objects_to_delete = s3_client.list_objects_v2(Bucket=s3_config.bucket_name, Prefix=f"{s3_config.channel_name}/") + delete_keys = [{"Key": obj["Key"]} for obj in objects_to_delete.get("Contents", [])] + if delete_keys: + result = s3_client.delete_objects(Bucket=s3_config.bucket_name, Delete={"Objects": delete_keys}) + assert result["ResponseMetadata"]["HTTPStatusCode"] == 200 + + +@pytest.mark.asyncio +async def test_index_s3( + package_directory, + s3_config: S3Config, + s3_channel: str, + s3_client, +): + # Upload package to channel + filepath = package_directory / "noarch" / "pytweening-1.0.4-pyhd8ed1ab_0.tar.bz2" + s3_client.upload_file( + Filename=str(filepath), + Bucket=s3_config.bucket_name, + Key=f"{s3_config.channel_name}/noarch/pytweening-1.0.4-pyhd8ed1ab_0.tar.bz2", + ) + + # Run index command + await index_s3( + channel_url=s3_channel, + region=s3_config.region, + endpoint_url=s3_config.endpoint_url, + force_path_style=True, + access_key_id=s3_config.access_key_id, + secret_access_key=s3_config.secret_access_key, + force=True, + ) + + # Check if repodata.json was created + repodata_json = f"{s3_config.channel_name}/noarch/repodata.json" + result = s3_client.head_object( + Bucket=s3_config.bucket_name, + Key=repodata_json, + ) + assert result["ResponseMetadata"]["HTTPStatusCode"] == 200