Skip to content

Commit

Permalink
chore: mem management
Browse files Browse the repository at this point in the history
  • Loading branch information
FL33TW00D committed Jan 22, 2024
1 parent d40910b commit a37a41b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 43 deletions.
74 changes: 36 additions & 38 deletions crates/ratchet-core/src/storage/cpu_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ use crate::DType;
pub struct RawCPUBuffer(*mut u8, Layout);

impl RawCPUBuffer {
pub fn from_raw_parts(ptr: *mut u8, layout: Layout) -> Self {
Self(ptr, layout)
}

pub fn into_raw_parts(&self) -> (*mut u8, Layout) {
(self.0, self.1)
}
Expand All @@ -21,19 +25,36 @@ impl RawCPUBuffer {
pub fn as_bytes(&self) -> &[u8] {
unsafe { std::slice::from_raw_parts(self.0, self.1.size()) }
}

pub fn as_bytes_mut(&mut self) -> &mut [u8] {
unsafe { std::slice::from_raw_parts_mut(self.0, self.1.size()) }
}

pub fn uninitialized(size: usize, alignment: usize) -> Self {
let layout = std::alloc::Layout::from_size_align(size, alignment).unwrap();
let data = if size == 0 {
std::ptr::null()
} else {
let ptr = unsafe { std::alloc::alloc(layout) };
assert!(!ptr.is_null());
ptr
} as *mut u8;
Self(data, layout)
}
}

impl Clone for RawCPUBuffer {
fn clone(&self) -> Self {
let data = if self.1.size() == 0 {
let (ptr, layout) = self.into_raw_parts();
let data = if layout.size() == 0 {
std::ptr::null()
} else {
let ptr = unsafe { std::alloc::alloc(self.1) };
let ptr = unsafe { std::alloc::alloc(layout) };
assert!(!ptr.is_null());
ptr
} as *mut u8;
unsafe { self.0.copy_to_nonoverlapping(data, self.1.size()) };
Self(data, self.1)
unsafe { ptr.copy_to_nonoverlapping(data, layout.size()) };
Self(data, layout)
}
}

Expand Down Expand Up @@ -65,45 +86,22 @@ impl CPUBuffer {
&self.inner
}

unsafe fn uninitialized(size: usize, alignment: usize) -> Self {
let layout = std::alloc::Layout::from_size_align(size, alignment).unwrap();
let data = if size == 0 {
std::ptr::null()
} else {
let ptr = std::alloc::alloc(layout);
assert!(!ptr.is_null());
ptr
} as *mut u8;
Self::from_raw_parts(data, layout)
}

pub fn from_raw_parts(data: *mut u8, layout: Layout) -> Self {
Self {
inner: Arc::new(RawCPUBuffer(data, layout)),
}
}

pub fn from_bytes(bytes: &[u8], alignment: usize) -> Self {
let layout = std::alloc::Layout::from_size_align(bytes.len(), alignment).unwrap();
let data = if bytes.len() == 0 {
std::ptr::null()
} else {
let ptr = unsafe { std::alloc::alloc(layout) };
assert!(!ptr.is_null());
unsafe { ptr.copy_from_nonoverlapping(bytes.as_ptr(), bytes.len()) };
ptr
} as *mut u8;
Self::from_raw_parts(data, layout)
let mut raw = RawCPUBuffer::uninitialized(bytes.len(), alignment);
raw.as_bytes_mut().copy_from_slice(bytes);
Self::from(raw)
}

pub fn deep_clone(&self) -> Self {
let (ptr, layout) = self.inner().into_raw_parts();
println!("before deep clone: {:p}", ptr);
let alloc = unsafe { std::alloc::alloc(layout) };
unsafe { ptr.copy_to_nonoverlapping(alloc, layout.size()) };
println!("after deep clone: {:p}", alloc);
todo!()
}
}

Self::from_raw_parts(alloc, layout)
impl From<RawCPUBuffer> for CPUBuffer {
fn from(raw: RawCPUBuffer) -> Self {
CPUBuffer {
inner: Arc::new(raw),
}
}
}

Expand Down
10 changes: 5 additions & 5 deletions crates/ratchet-core/src/tensor.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::gpu::{CpuUniform, WgpuDevice};
use crate::{
ops::*, CPUBuffer, CompiledOp, DType, Device, DeviceStorage, Executable, GPUBuffer, Operation,
OperationError, Shape, Storage, Strides, TensorDType, TensorId,
OperationError, RawCPUBuffer, Shape, Storage, Strides, TensorDType, TensorId,
};
use crate::{BinaryOp, LazyOp};

Expand Down Expand Up @@ -382,12 +382,12 @@ impl<T: TensorDType> From<ArrayD<T>> for Tensor {
let vec = it.into_raw_vec().into_boxed_slice();
let ptr = Box::into_raw(vec) as *mut u8;

let cpu_buf = CPUBuffer::from_raw_parts(ptr, layout);
let raw_buf = RawCPUBuffer::from_raw_parts(ptr, layout);
let meta = StorageView::new(shape, T::dt(), strides);
Tensor::new(
LazyOp::Const,
meta,
Some(Storage::CPU(cpu_buf)),
Some(Storage::CPU(CPUBuffer::from(raw_buf))),
Device::CPU,
)
} else {
Expand All @@ -414,8 +414,8 @@ mod tests {
#[test]
fn test_matmul() -> anyhow::Result<()> {
let device = Device::request_device(DeviceRequest::GPU)?;
let a = Tensor::randn::<f32>(shape![512, 512], device.clone());
let b = Tensor::randn::<f32>(shape![512, 512], device.clone());
let a = Tensor::randn::<f32>(shape![1024, 1024], device.clone());
let b = Tensor::randn::<f32>(shape![1024, 1024], device.clone());
let c = a.matmul(&b)?;
c.resolve()?;
println!("\nA: {:#?}", a);
Expand Down

0 comments on commit a37a41b

Please sign in to comment.