diff --git a/perf_testing/scripts/highspeed_write_nonzero.py b/perf_testing/scripts/highspeed_write_nonzero.py new file mode 100644 index 000000000..4cfe95eaa --- /dev/null +++ b/perf_testing/scripts/highspeed_write_nonzero.py @@ -0,0 +1,77 @@ +import os +import shutil +import time +import argparse +import subprocess +import mmap +import io +from multiprocessing import Pool, cpu_count + + +# Function to create files using dd command +def create_file_dd(file_index, folder, source_file, timestamp): + filename = os.path.join(folder, f'ddFile_{timestamp}_{file_index}') + block_size = 1 # in GB + count = 36 + file_size_gb = (block_size * count) + + command = f"dd if={source_file} of={filename} bs={block_size}G count={count} oflag=direct" + + start_time = time.time() + result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + end_time = time.time() + + if result.returncode != 0: + return (filename, 0, 0.0, f"Error creating file {filename}: {result.stderr.decode().strip()}") + + write_time = end_time - start_time + write_speed = (file_size_gb * 1024) / write_time # MB/s + + return (filename, write_time, write_speed, file_size_gb, None) + +# Main function to handle parallel execution +def main(folder, num_files, source_file): + if not os.path.exists(folder): + os.makedirs(folder) + + timestamp = int(time.time()) # Get current timestamp for file naming + + start_time = time.time() + results = [] + + + with Pool(processes=cpu_count()) as pool: # Pool of workers based on the CPU count + futures=[] + futures += [pool.apply_async(create_file_dd, (i, folder, source_file, timestamp)) for i in range(num_files)] + + # Collect results from async operations + for future in futures: + result = future.get() + if result[4] is None: # No error + results.append(result) + else: + print(result[4]) # Print error messages + + end_time = time.time() + + total_time = end_time - start_time + total_data_written = sum(r[3] for r in results) # in GB + speed_gbps = (total_data_written *8 ) / total_time # Convert GB to Gigabits (1 GB = 8 Gb) + + throughput = (total_data_written * 1024) / total_time + print(f"Number of files written: {num_files}") + print(f"Total amount of data written: {total_data_written:.2f} GB") + print(f"Total time taken: {total_time:.2f} seconds") + print(f"Overall Speed: {speed_gbps:.2f} Gbps") + print(f"Throughput: {throughput:.2f} MiB/s") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Create multiple files using various methods in parallel.') + parser.add_argument('folder', type=str, help='The folder where the files will be written.') + parser.add_argument('num_files', type=int, help='The number of files to create.') + parser.add_argument('source_file', type=str, help='The source file to copy data from.') + + args = parser.parse_args() + main(args.folder, args.num_files, args.source_file) + +# python3 highspeed_write_nonzero.py ~/drs/random_data_test/ 5 /mnt/azcopy_test_180GB.log