Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#5734] feat (gvfs-fuse): Gvfs-fuse basic FUSE-level implementation and code structure layout #5835

Merged
merged 12 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions .github/workflows/gvfs-fuse-build-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: Build gvfs-fuse and testing

# Controls when the workflow will run
on:
push:
branches: [ "main", "branch-*" ]
pull_request:
branches: [ "main", "branch-*" ]
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
changes:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dorny/paths-filter@v2
id: filter
with:
filters: |
source_changes:
- .github/**
- api/**
- bin/**
- catalogs/hadoop/**
- clients/filesystem-fuse/**
- common/**
- conf/**
- core/**
- dev/**
- gradle/**
- meta/**
- scripts/**
- server/**
- server-common/**
- build.gradle.kts
- gradle.properties
- gradlew
- setting.gradle.kts
outputs:
source_changes: ${{ steps.filter.outputs.source_changes }}

# Build for AMD64 architecture
Gvfs-Build:
needs: changes
if: needs.changes.outputs.source_changes == 'true'
runs-on: ubuntu-latest
timeout-minutes: 60
strategy:
matrix:
architecture: [linux/amd64]
java-version: [ 17 ]
env:
PLATFORM: ${{ matrix.architecture }}
steps:
- uses: actions/checkout@v3

- uses: actions/setup-java@v4
with:
java-version: ${{ matrix.java-version }}
distribution: 'temurin'
cache: 'gradle'

- name: Set up QEMU
uses: docker/setup-qemu-action@v2

- name: Check required command
run: |
dev/ci/check_commands.sh

- name: Build and test Gravitino
run: |
./gradlew :clients:filesystem-fuse:build -PenableFuse=true

- name: Free up disk space
run: |
dev/ci/util_free_space.sh

- name: Upload tests reports
uses: actions/upload-artifact@v3
if: ${{ (failure() && steps.integrationTest.outcome == 'failure') || contains(github.event.pull_request.labels.*.name, 'upload log') }}
with:
name: Gvfs-fuse integrate-test-reports-${{ matrix.java-version }}
path: |
clients/filesystem-fuse/build/test/log/*.log

2 changes: 1 addition & 1 deletion clients/filesystem-fuse/.cargo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@

[build]
target-dir = "build"

rustflags = ["-Adead_code", "-Aclippy::redundant-field-names"]
10 changes: 8 additions & 2 deletions clients/filesystem-fuse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,15 @@ repository = "https://github.com/apache/gravitino"
name = "gvfs-fuse"
path = "src/main.rs"

[lib]
name="gvfs_fuse"

[dependencies]
async-trait = "0.1"
bytes = "1.6.0"
futures-util = "0.3.30"
libc = "0.2.164"
fuse3 = { version = "0.8.1", "features" = ["tokio-runtime", "unprivileged"] }
log = "0.4.22"
tokio = { version = "1.38.0", features = ["full"] }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }

21 changes: 21 additions & 0 deletions clients/filesystem-fuse/rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[toolchain]
channel = "1.82.0"
components = ["rustfmt", "clippy", "rust-src"]
profile = "default"
241 changes: 241 additions & 0 deletions clients/filesystem-fuse/src/filesystem.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
use async_trait::async_trait;
use bytes::Bytes;
use fuse3::{Errno, FileType, Timestamp};

pub(crate) type Result<T> = std::result::Result<T, Errno>;

/// RawFileSystem interface for the file system implementation. it use by FuseApiHandle,
/// it ues the file id to operate the file system apis
/// the `file_id` and `parent_file_id` it is the unique identifier for the file system,
/// it is used to identify the file or directory
/// the `handle_id` it is the file handle, it is used to identify the opened file,
/// it is used to read or write the file content
/// the `file id` and `handle_id` need to mapping the `ino`/`inode` and `fh` in the fuse3
#[async_trait]
pub(crate) trait RawFileSystem: Send + Sync {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name of RawFileSystem is confusing, could you provide better name?

Copy link
Contributor Author

@diqiu50 diqiu50 Dec 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the name is OK, Could you have a good suggestion.

/// Init the file system
async fn init(&self) -> Result<()>;

/// Get the file path by file id, if the file id is valid, return the file path
async fn get_file_path(&self, file_id: u64) -> String;

/// Validate the file id and file handle, if file id and file handle is valid and it associated, return Ok
async fn valid_file_id(&self, file_id: u64, fh: u64) -> Result<()>;

/// Get the file stat by file id. if the file id is valid, return the file stat
async fn stat(&self, file_id: u64) -> Result<FileStat>;

/// Lookup the file by parent file id and file name, if the file is exist, return the file stat
async fn lookup(&self, parent_file_id: u64, name: &str) -> Result<FileStat>;

/// Read the directory by file id, if the file id is a valid directory, return the file stat list
async fn read_dir(&self, dir_file_id: u64) -> Result<Vec<FileStat>>;

/// Open the file by file id and flags, if the file id is a valid file, return the file handle
async fn open_file(&self, file_id: u64, flags: u32) -> Result<FileHandle>;

/// Open the directory by file id and flags, if successful, return the file handle
async fn open_dir(&self, file_id: u64, flags: u32) -> Result<FileHandle>;

/// Create the file by parent file id and file name and flags, if successful, return the file handle
async fn create_file(&self, parent_file_id: u64, name: &str, flags: u32) -> Result<FileHandle>;

/// Create the directory by parent file id and file name, if successful, return the file id
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about defining a new type FileID not using u64 directly here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use u64 is ok. It shouldn't be necessary.

async fn create_dir(&self, parent_file_id: u64, name: &str) -> Result<u64>;

/// Set the file attribute by file id and file stat
async fn set_attr(&self, file_id: u64, file_stat: &FileStat) -> Result<()>;

/// Remove the file by parent file id and file name
async fn remove_file(&self, parent_file_id: u64, name: &str) -> Result<()>;

/// Remove the directory by parent file id and file name
async fn remove_dir(&self, parent_file_id: u64, name: &str) -> Result<()>;

/// Close the file by file id and file handle, if successful
async fn close_file(&self, file_id: u64, fh: u64) -> Result<()>;

/// Read the file content by file id, file handle, offset and size, if successful, return the read result
async fn read(&self, file_id: u64, fh: u64, offset: u64, size: u32) -> Result<Bytes>;

/// Write the file content by file id, file handle, offset and data, if successful, return the written size
async fn write(&self, file_id: u64, fh: u64, offset: u64, data: &[u8]) -> Result<u32>;
}

/// PathFileSystem is the interface for the file system implementation, it use to interact with other file system
/// it is used file path to operate the file system
#[async_trait]
pub(crate) trait PathFileSystem: Send + Sync {
/// Init the file system
async fn init(&self) -> Result<()>;

/// Get the file stat by file path, if the file is exist, return the file stat
async fn stat(&self, name: &str) -> Result<FileStat>;

/// Get the file stat by parent file path and file name, if the file is exist, return the file stat
async fn lookup(&self, parent: &str, name: &str) -> Result<FileStat>;

/// Read the directory by file path, if the file is a valid directory, return the file stat list
async fn read_dir(&self, name: &str) -> Result<Vec<FileStat>>;

/// Open the file by file path and flags, if the file is exist, return the opened file
async fn open_file(&self, name: &str, flags: OpenFileFlags) -> Result<OpenedFile>;

/// Open the directory by file path and flags, if the file is exist, return the opened file
async fn open_dir(&self, name: &str, flags: OpenFileFlags) -> Result<OpenedFile>;

/// Create the file by parent file path and file name and flags, if successful, return the opened file
async fn create_file(
&self,
parent: &str,
name: &str,
flags: OpenFileFlags,
) -> Result<OpenedFile>;

/// Create the directory by parent file path and file name, if successful, return the file stat
async fn create_dir(&self, parent: &str, name: &str) -> Result<FileStat>;

/// Set the file attribute by file path and file stat
async fn set_attr(&self, name: &str, file_stat: &FileStat, flush: bool) -> Result<()>;

/// Remove the file by parent file path and file name
async fn remove_file(&self, parent: &str, name: &str) -> Result<()>;

/// Remove the directory by parent file path and file name
async fn remove_dir(&self, parent: &str, name: &str) -> Result<()>;
}

// FileSystemContext is the system environment for the fuse file system.
pub(crate) struct FileSystemContext {
// system user id
pub(crate) uid: u32,

// system group id
pub(crate) gid: u32,

// default file permission
pub(crate) default_file_perm: u16,

// default idr permission
pub(crate) default_dir_perm: u16,

// io block size
pub(crate) block_size: u32,
}

impl FileSystemContext {
pub(crate) fn new(uid: u32, gid: u32) -> Self {
FileSystemContext {
uid,
gid,
default_file_perm: 0o644,
default_dir_perm: 0o755,
block_size: 4 * 1024,
}
}
}

// FileStat is the file metadata of the file
#[derive(Clone, Debug)]
pub struct FileStat {
// file id for the file system.
pub(crate) file_id: u64,

// parent file id
pub(crate) parent_file_id: u64,

// file name
pub(crate) name: String,

// file path of the fuse file system root
pub(crate) path: String,

// file size
pub(crate) size: u64,

// file type like regular file or directory and so on
pub(crate) kind: FileType,

// file permission
pub(crate) perm: u16,

// file access time
pub(crate) atime: Timestamp,

// file modify time
pub(crate) mtime: Timestamp,

// file create time
pub(crate) ctime: Timestamp,

// file link count
pub(crate) nlink: u32,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need nlink?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a reference count of the file. we may use it later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you provide an example about how to use it ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be considered in a later PR. Let's first push the framework so others can join in.

}

/// Opened file for read or write, it is used to read or write the file content.
pub(crate) struct OpenedFile {
pub(crate) file_stat: FileStat,

pub(crate) handle_id: u64,

pub reader: Option<Box<dyn FileReader>>,

pub writer: Option<Box<dyn FileWriter>>,
}

// FileHandle is the file handle for the opened file.
pub(crate) struct FileHandle {
pub(crate) file_id: u64,

pub(crate) handle_id: u64,
}

// OpenFileFlags is the open file flags for the file system.
pub struct OpenFileFlags(u32);

/// File reader interface for read file content
#[async_trait]
pub(crate) trait FileReader: Sync + Send {
/// read the file content by offset and size, if successful, return the read result
async fn read(&mut self, offset: u64, size: u32) -> Result<Bytes>;

/// close the file
async fn close(&mut self) -> Result<()> {
Ok(())
}
}

/// File writer interface for write file content
#[async_trait]
pub trait FileWriter: Sync + Send {
/// write the file content by offset and data, if successful, return the written size
async fn write(&mut self, offset: u64, data: &[u8]) -> Result<u32>;

/// close the file
async fn close(&mut self) -> Result<()> {
Ok(())
}

/// flush the file
async fn flush(&mut self) -> Result<()> {
Ok(())
}
}
Loading
Loading