From 3a1f2a7578c4110ed6f63294cbb10bb8633ffbe8 Mon Sep 17 00:00:00 2001 From: Yongting You <2010youy01@gmail.com> Date: Sun, 16 Feb 2025 11:43:18 +0800 Subject: [PATCH] fix extended test --- .github/workflows/extended.yml | 49 ++++++++++--------- .../sort_mem_validation.rs | 31 +++++++++++- 2 files changed, 54 insertions(+), 26 deletions(-) diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml index 19910957a85b9..c3c8393c5d0a6 100644 --- a/.github/workflows/extended.yml +++ b/.github/workflows/extended.yml @@ -52,30 +52,31 @@ jobs: cargo check --profile ci --all-targets cargo clean -# # Run extended tests (with feature 'extended_tests') -# # Disabling as it is running out of disk space -# # see https://github.com/apache/datafusion/issues/14576 -# linux-test-extended: -# name: cargo test 'extended_tests' (amd64) -# needs: linux-build-lib -# runs-on: ubuntu-latest -# container: -# image: amd64/rust -# steps: -# - uses: actions/checkout@v4 -# with: -# submodules: true -# fetch-depth: 1 -# - name: Setup Rust toolchain -# uses: ./.github/actions/setup-builder -# with: -# rust-version: stable -# - name: Run tests (excluding doctests) -# run: cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --workspace --lib --tests --bins --features avro,json,backtrace,extended_tests -# - name: Verify Working Directory Clean -# run: git diff --exit-code -# - name: Cleanup -# run: cargo clean + # Run extended tests (with feature 'extended_tests') + linux-test-extended: + name: cargo test 'extended_tests' (amd64) + needs: linux-build-lib + runs-on: ubuntu-latest + container: + image: amd64/rust + options: --user root + steps: + - uses: actions/checkout@v4 + with: + submodules: true + fetch-depth: 1 + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: stable + - name: Check free disk space + run: du -sh / + - name: Run tests (excluding doctests) + run: cargo test --profile ci --exclude datafusion-examples --exclude datafusion-benchmarks --workspace --lib --tests --bins --features avro,json,backtrace,extended_tests + - name: Verify Working Directory Clean + run: git diff --exit-code + - name: Cleanup + run: cargo clean # Check answers are correct when hash values collide hash-collisions: diff --git a/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs b/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs index 1789f37535a94..871dbe611facc 100644 --- a/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs +++ b/datafusion/core/tests/memory_limit/memory_limit_validation/sort_mem_validation.rs @@ -21,12 +21,14 @@ //! This file is organized as: //! - Test runners that spawn individual test processes //! - Test cases that contain the actual validation logic -use std::{process::Command, str}; - use log::info; +use std::sync::Once; +use std::{process::Command, str}; use crate::memory_limit::memory_limit_validation::utils; +static INIT: Once = Once::new(); + // =========================================================================== // Test runners: // Runners are splitted into multiple tests to run in parallel @@ -67,10 +69,35 @@ fn sort_with_mem_limit_2_cols_2_runner() { spawn_test_process("sort_with_mem_limit_2_cols_2"); } +/// `spawn_test_process` might trigger multiple recompilation, and the test binary +/// size might grow indifinitely. This initilizer ensures recompilation is only done +/// once and the target size is bounded. +/// +/// TODO: This is a hack, can be cleaned up if we have a better way to let multiple +/// test cases run in different processes (instead of different threads by default) +fn init_once() { + INIT.call_once(|| { + let _ = Command::new("cargo") + .arg("test") + .arg("--no-run") + .arg("--package") + .arg("datafusion") + .arg("--test") + .arg("core_integration") + .arg("--features") + .arg("extended_tests") + .env("DATAFUSION_TEST_MEM_LIMIT_VALIDATION", "1") + .output() + .expect("Failed to execute test command"); + }); +} + /// Helper function that executes a test in a separate process with the required environment /// variable set. Memory limit validation tasks need to measure memory resident set /// size (RSS), so they must run in a separate process. fn spawn_test_process(test: &str) { + init_once(); + let test_path = format!( "memory_limit::memory_limit_validation::sort_mem_validation::{}", test