Skip to content

Commit

Permalink
Merge branch 'Codetector1374:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Keniis0712 authored Aug 12, 2024
2 parents c66998b + 6a153a7 commit 7ce3ac9
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 42 deletions.
10 changes: 0 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ chrono = "0.4.38"
clap = { version = "4.5.9", features = ["derive"] }
crc = "3.2.1"
indicatif = "0.17.8"
memmap2 = "0.9.4"
num_enum = "0.7.2"
pretty-hex = "0.4.1"
sqlparser = "0.49.0"
Expand Down
125 changes: 94 additions & 31 deletions src/bin/tablespace_sort.rs
Original file line number Diff line number Diff line change
@@ -1,52 +1,115 @@
use clap::Parser;
use innodb::innodb::page::{Page, PageType, FIL_PAGE_SIZE};
use std::fs::File;
use std::io::{BufReader, Read, Seek, SeekFrom, Write};
use std::path::PathBuf;

use clap::Parser;
use memmap2::MmapMut;
use tracing::{info, warn, Level};

#[derive(Parser, Debug)]
struct Arguments {
file: PathBuf,
}
#[arg(short='n', long="dry-run", action = clap::ArgAction::SetTrue)]
dry_run: bool,

const PAGE_SIZE: usize = 16 * 1024; // 16K块大小
#[arg(long="no-color", action = clap::ArgAction::SetFalse)]
color: bool,

struct Page {
data: [u8; PAGE_SIZE],
}
#[arg(short='v', action = clap::ArgAction::Count, help="verbose level")]
verbose: u8,

impl Page {
fn offset(&self) -> u32 {
let num: [u8; 4] = self.data[4..8].try_into().unwrap();
u32::from_be_bytes(num)
}
file: PathBuf,
output: PathBuf,
}

fn main() -> std::io::Result<()> {
const ZEROS_BUFFER: [u8; FIL_PAGE_SIZE] = [0u8; FIL_PAGE_SIZE];

fn main() {
let args = Arguments::parse();

let file = File::options().read(true).write(true).open(args.file)?;
let subscriber = tracing_subscriber::FmtSubscriber::builder()
.with_max_level(match args.verbose {
0 => Level::INFO,
1 => Level::DEBUG,
_ => Level::TRACE,
})
.with_ansi(args.color)
.finish();
_ = tracing::subscriber::set_global_default(subscriber);

let mmap = unsafe { MmapMut::map_mut(&file)? };
let file = File::open(args.file).expect("Failed to open input file");

// 不知道怎么更科学的转类型,GPT 写的,
let pages: &mut [Page] = unsafe {
std::slice::from_raw_parts_mut(mmap.as_ptr() as *mut Page, mmap.len() / PAGE_SIZE)
let mut output_len: usize = 0;
let mut output_opt = if args.dry_run {
None
} else {
Some(File::create(args.output).expect("Failed to open output file for write"))
};

pages.sort_by_key(|f| f.offset());
let mut reader = BufReader::new(file);
let mut page_buffer: Vec<u8> = Vec::new();
page_buffer.resize(FIL_PAGE_SIZE, 0);

let mut missing = 0;
let mut expected = pages[0].offset();
for page in pages {
missing += page.offset() - expected;
expected = page.offset() + 1;
}
let mut pages_processed = 0u32;
let mut largest_page_number = 0u32;
let mut sorted = true;

loop {
match reader.read_exact(&mut page_buffer) {
Ok(_) => {
pages_processed += 1;

let page = Page::from_bytes(&page_buffer).expect("Failed to construct page");
// only allocated page is empty
if page.header.page_type == PageType::Allocated {
continue;
}

if page.crc32_checksum() != page.header.new_checksum {
warn!("Invalid page detected: {:?}", page)
} else {
largest_page_number = std::cmp::max(largest_page_number, page.header.offset);
}

println!("max: {}, missing: {}", expected - 1, missing);
if page.header.offset != (pages_processed - 1) {
sorted = false;
}

// 确保所有更改被刷到磁盘
mmap.flush()?;
let page_offset_in_file = page.header.offset as usize * FIL_PAGE_SIZE;

if let Some(output) = output_opt.as_mut() {
// If the target file is "shorter" than where we need to write, fill it with zeros
while output_len < page_offset_in_file {
output
.seek(SeekFrom::Start(output_len as u64))
.expect("Seek success");
output
.write_all(&ZEROS_BUFFER)
.expect("Failed to write spacer");
output_len += ZEROS_BUFFER.len();
}

debug_assert!((page_offset_in_file == output_len)
|| (page_offset_in_file + FIL_PAGE_SIZE < output_len),
"either we should be tacking on at the end, or completely within the current file");
output
.seek(SeekFrom::Start(page_offset_in_file as u64))
.expect("Failed to seek to page location");
output
.write_all(&page_buffer)
.expect("Failed to write page data");
if page_offset_in_file == output_len {
output_len += page_buffer.len();
}

debug_assert!(
output_len % FIL_PAGE_SIZE == 0,
"output must be page aligned"
);
}
}
Err(_) => break,
}
}

Ok(())
info!("Processed {} pages, max page number is {}", pages_processed, largest_page_number);
info!("Original file is sorted = {:?}", sorted);
}

0 comments on commit 7ce3ac9

Please sign in to comment.