Skip to content

Commit

Permalink
usrbio engine for fio (#62)
Browse files Browse the repository at this point in the history
* usrbio engine for fio

* Update README.md
  • Loading branch information
KuribohG authored Mar 3, 2025
1 parent ab5aa89 commit 799b9a5
Show file tree
Hide file tree
Showing 4 changed files with 346 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ The following figure demonstrates the throughput of read stress test on a large

![Large block read throughput under stress test on a 180-node cluster](docs/images/peak_throughput.jpg)

To benchmark 3FS, please use our [fio engine for USRBIO](benchmarks/fio_usrbio/README.md).

### 2. GraySort

We evaluated [smallpond](https://github.com/deepseek-ai/smallpond) using the GraySort benchmark, which measures sort performance on large-scale datasets. Our implementation adopts a two-phase approach: (1) partitioning data via shuffle using the prefix bits of keys, and (2) in-partition sorting. Both phases read/write data from/to 3FS.
Expand Down
23 changes: 23 additions & 0 deletions benchmarks/fio_usrbio/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
HF3FS_INCLUDE_DIR ?= /usr/include
HF3FS_LIB_DIR ?= /usr/lib
FIO_SRC_DIR ?= /usr/include

PLUGIN_NAME = hf3fs_usrbio
SO_NAME = ${PLUGIN_NAME}.so
SRC = ${PLUGIN_NAME}.cpp
OBJ = ${PLUGIN_NAME}.o

CXX = g++
CXXFLAGS = -fPIC -fpermissive -O3 -D_GNU_SOURCE -shared -rdynamic -I${HF3FS_INCLUDE_DIR} -I${FIO_SRC_DIR} -include config-host.h
LDFLAGS = -L${HF3FS_LIB_DIR} -lhf3fs_api_shared -Wl,-rpath=${HF3FS_LIB_DIR}

.PHONY: all clean

all: ${SO_NAME}

${SO_NAME}: ${SRC}
${CXX} ${CXXFLAGS} $^ -o $@ ${LDFLAGS}

clean:
rm -rf ${OBJ} ${SO_NAME}

35 changes: 35 additions & 0 deletions benchmarks/fio_usrbio/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# FIO engine for 3FS USRBIO

This repository contains the [fio] external plugin used for benchmarking [3FS] USRBIO.

## Build

First, build 3FS and fio.

Configure the following variables:
- `HF3FS_LIB_DIR`: directory contains `libhf3fs_api_shared.so`, the default path in 3FS repo is `3FS/build/src/lib/api`.
- `HF3FS_INCLUDE_DIR`: directory contains `hf3fs_usrbio.h`, the default path in 3FS repo is `3FS/src/lib/api`.
- `FIO_SRC_DIR`: directory contains `config-host.h`. After building fio, this header will be in the root of the fio repo.

Then run:
```
make HF3FS_LIB_DIR=${HF3FS_LIB_DIR} HF3FS_INCLUDE_DIR=${HF3FS_INCLUDE_DIR} FIO_SRC_DIR=${FIO_SRC_DIR}
```

You will get the external plugin as `hf3fs_usrbio.so`.

## Usage

To use this plugin, set the `ioengine` args in fio as `external:hf3fs_usrbio.so`. Please refer to [fio documentation] for further explanation.

To benchmarking batched small I/Os, please set these four parameters to `batch_size` simultaneously:
```
iodepth=1024
iodepth_batch_submit=1024
iodepth_batch_complete_min=1024
iodepth_batch_complete_max=1024
```

[fio]: https://github.com/axboe/fio
[3FS]: https://github.com/deepseek-ai/3FS
[fio documentation]: https://fio.readthedocs.io/en/latest/fio_doc.html
286 changes: 286 additions & 0 deletions benchmarks/fio_usrbio/hf3fs_usrbio.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,286 @@
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <unistd.h>
#include <chrono>
#include <iomanip>
#include <ctime>
#include "hf3fs_usrbio.h"
#include <atomic>
#include <errno.h>
#include <vector>
#include <sys/stat.h>

extern "C" {
#include "fio.h"
#include "optgroup.h"
}

struct hf3fs_usrbio_options {
int dummy;
char *mountpoint;
int ior_depth;
int ior_timeout;
};

static struct fio_option options[] = {
{
.name = "mountpoint",
.lname = "hf3fs mount point",
.type = FIO_OPT_STR_STORE,
.off1 = offsetof(struct hf3fs_usrbio_options, mountpoint),
.help = "Mount point (e.g. /hf3fs/mount/point)",
.def = "",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_NETIO,
},
{
.name = "ior_depth",
.lname = "hf3fs ior depth",
.type = FIO_OPT_INT,
.off1 = offsetof(struct hf3fs_usrbio_options, ior_depth),
.help = "Ior depth",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_NETIO,
},
{
.name = "ior_timeout",
.lname = "hf3fs ior timeout (in ms)",
.type = FIO_OPT_INT,
.off1 = offsetof(struct hf3fs_usrbio_options, ior_timeout),
.help = "Ior timeout",
.def = "1",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_NETIO,
},
{
.name = NULL,
},
};

#define LAST_POS(f) ((f)->engine_pos)

struct hf3fs_usrbio_data {
struct hf3fs_iov iov;
struct hf3fs_ior ior_r;
struct hf3fs_ior ior_w;
std::vector<struct io_u *> io_us;
int queued;
int events;
enum fio_ddir last_ddir;
};

static int hf3fs_usrbio_init(struct thread_data *td) {
td->io_ops_data = static_cast<void *>(new hf3fs_usrbio_data);
struct hf3fs_usrbio_options *options = td->eo;

auto &ior_r = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->ior_r;
auto res = hf3fs_iorcreate3(&ior_r, options->mountpoint, td->o.iodepth, true, options->ior_depth, 0, options->ior_timeout, -1);
if (res < 0) {
return res;
}

auto &ior_w = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->ior_w;
res = hf3fs_iorcreate(&ior_w, options->mountpoint, td->o.iodepth, false, options->ior_depth, -1);
if (res < 0) {
return res;
}

auto *data = static_cast<hf3fs_usrbio_data *>(td->io_ops_data);
data->io_us.resize(td->o.iodepth);
data->queued = 0;
data->events = 0;

return 0;
}

static int fio_io_end(struct thread_data *td, struct io_u *io_u, int ret) {
if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir)) {
LAST_POS(io_u->file) = io_u->offset + ret;
}

if (ret != (int) io_u->xfer_buflen) {
if (ret >= 0) {
io_u->resid = io_u->xfer_buflen - ret;
io_u->error = 0;
return FIO_Q_COMPLETED;
} else {
io_u->error = errno;
}
}

if (io_u->error) {
io_u_log_error(td, io_u);
td_verror(td, io_u->error, "xfer");
}

return FIO_Q_COMPLETED;
}

static enum fio_q_status hf3fs_usrbio_queue(struct thread_data *td, struct io_u *io_u) {
auto &vec = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->io_us;
auto *sd = static_cast<hf3fs_usrbio_data *>(td->io_ops_data);

if (io_u->ddir != sd->last_ddir) {
if (sd->queued != 0) {
return FIO_Q_BUSY;
} else {
vec[sd->queued++] = io_u;
sd->last_ddir = io_u->ddir;
return FIO_Q_QUEUED;
}
} else {
if (sd->queued == td->o.iodepth) {
return FIO_Q_BUSY;
}
vec[sd->queued++] = io_u;
return FIO_Q_QUEUED;
}
}

static int hf3fs_usrbio_commit(struct thread_data *td) {
auto &vec = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->io_us;
auto *sd = static_cast<hf3fs_usrbio_data *>(td->io_ops_data);
auto &ior_r = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->ior_r;
auto &ior_w = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->ior_w;
auto &iov = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->iov;

if (sd->queued == 0) {
return 0;
}

io_u_mark_submit(td, sd->queued);

int res = 0;
bool read = (sd->last_ddir == DDIR_READ);
auto &ior = read ? ior_r : ior_w;
for (int i = 0; i < sd->queued; i++) {
res = hf3fs_prep_io(&ior, &iov, read, vec[i]->xfer_buf, vec[i]->file->fd, vec[i]->offset, vec[i]->xfer_buflen, nullptr);
if (res < 0) {
std::cout << "prep " << res << " " << vec[i]->file->fd << std::endl;
return res;
}
}
res = hf3fs_submit_ios(&ior);
if (res < 0) {
std::cout << "submit " << res << std::endl;
return res;
}

std::vector<struct hf3fs_cqe> cqe(sd->queued);
res = hf3fs_wait_for_ios(&ior, cqe.data(), sd->queued, sd->queued, nullptr);
if (res < 0) {
std::cout << "wait " << res << std::endl;
return res;
}

for (int i = 0; i < sd->queued; i++) {
if (cqe[i].result < 0) {
std::cout << "cqe error " << res << std::endl;
return res;
}
}

sd->events = sd->queued;
sd->queued = 0;

return 0;
}

static int hf3fs_usrbio_getevents(struct thread_data *td, unsigned int min, unsigned int max, const struct timespec fio_unused *t) {
auto &vec = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->io_us;
auto *sd = static_cast<hf3fs_usrbio_data *>(td->io_ops_data);
int ret = 0;
if (min) {
ret = sd->events;
sd->events = 0;
}
return ret;
}

static struct io_u *hf3fs_usrbio_event(struct thread_data *td, int event) {
auto &vec = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->io_us;
return vec[event];
}

static void hf3fs_usrbio_cleanup(struct thread_data *td) {
delete static_cast<hf3fs_usrbio_data *>(td->io_ops_data);
}

static int hf3fs_usrbio_open(struct thread_data *td, struct fio_file *f) {
int flags = 0;
if (td_write(td)) {
if (!read_only) {
flags = O_RDWR;
}
} else if (td_read(td)) {
if (!read_only) {
flags = O_RDWR;
} else {
flags = O_RDONLY;
}
}

f->fd = open(f->file_name, flags);
hf3fs_reg_fd(f->fd, 0);
td->o.open_files++;
return 0;
}

static int hf3fs_usrbio_close(struct thread_data *td, struct fio_file *f) {
hf3fs_dereg_fd(f->fd);
close(f->fd);
f->fd = -1;
return 0;
}

static int hf3fs_usrbio_alloc(struct thread_data *td, size_t total_mem) {
struct hf3fs_usrbio_options *options = td->eo;

auto &iov = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->iov;
auto res = hf3fs_iovcreate(&iov, options->mountpoint, total_mem, 0, -1);
if (res < 0) {
return res;
}

td->orig_buffer = iov.base;
return 0;
}

static void hf3fs_usrbio_free(struct thread_data *td) {
auto &iov = static_cast<hf3fs_usrbio_data *>(td->io_ops_data)->iov;
hf3fs_iovdestroy(&iov);
}

static int hf3fs_invalidate(struct thread_data *td, struct fio_file *f) {
return 0;
}

extern "C" {

static struct ioengine_ops ioengine;
void get_ioengine(struct ioengine_ops **ioengine_ptr) {
*ioengine_ptr = &ioengine;

ioengine.name = "hf3fs_usrbio",
ioengine.version = FIO_IOOPS_VERSION;
ioengine.flags = FIO_SYNCIO | FIO_NODISKUTIL;
ioengine.init = hf3fs_usrbio_init;
ioengine.queue = hf3fs_usrbio_queue;
ioengine.commit = hf3fs_usrbio_commit;
ioengine.getevents = hf3fs_usrbio_getevents;
ioengine.event = hf3fs_usrbio_event;
ioengine.cleanup = hf3fs_usrbio_cleanup;
ioengine.open_file = hf3fs_usrbio_open;
ioengine.close_file = hf3fs_usrbio_close;
ioengine.invalidate = hf3fs_invalidate;
ioengine.get_file_size = generic_get_file_size;
ioengine.iomem_alloc = hf3fs_usrbio_alloc;
ioengine.iomem_free = hf3fs_usrbio_free;
ioengine.option_struct_size = sizeof(struct hf3fs_usrbio_options);
ioengine.options = options;
}

}

0 comments on commit 799b9a5

Please sign in to comment.