Skip to content

Commit

Permalink
Work on SharedTensor
Browse files Browse the repository at this point in the history
* Refactor Shape struct
* Implement realloc method
* Add write to memory and write to memory offset methods to SharedTensor
* Refactor native memory representation
  • Loading branch information
jonysy committed Mar 24, 2017
1 parent 5aee42a commit 4fdaf42
Show file tree
Hide file tree
Showing 13 changed files with 331 additions and 203 deletions.
3 changes: 3 additions & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ pub enum ErrorKind {
},
/// Maximum number of backing memories has been reached (`BitMap` - type alias for `u64`).
BitMapCapacityExceeded,
/// The tensor shape is incompatible with the shape of some data.
IncompatibleShape,
/// Invalid reshaped tensor size.
InvalidReshapedTensorSize,
/// An error returned when attempting to access uninitialized memory.
Expand Down Expand Up @@ -53,6 +55,7 @@ impl ErrorKind {
match *self {
Framework { name } => name,
BitMapCapacityExceeded => "the maximum number of backing memories has been reached",
IncompatibleShape => "the tensor shape is incompatible with the shape of the data",
InvalidReshapedTensorSize => "size of the provided shape is not equal to the size of the current shape",
UninitializedMemory => "uninitialized memory",
AllocatedMemoryNotFoundForDevice => "memory allocation was not found for the provided device",
Expand Down
11 changes: 6 additions & 5 deletions src/frameworks/native/device.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use {Alloc, ComputeDevice, Device, Memory, Result, Shape, Synch, Viewable};
use {Alloc, ComputeDevice, Device, Memory, Result, TensorShape, Synch, Viewable};
use ndarray::Array;
use super::NativeMemory;
use utility::Has;

Expand All @@ -23,7 +24,7 @@ impl Viewable for NativeDevice {
impl<T> Alloc<T> for NativeDevice {


fn alloc(&self, shape: &Shape) -> Result<Memory<T>> {
fn alloc(&self, shape: &TensorShape) -> Result<Memory<T>> {
// TODO

let mut buffer = Vec::with_capacity(shape.capacity());
Expand All @@ -33,14 +34,14 @@ impl<T> Alloc<T> for NativeDevice {
}

Ok(Memory::Native(
NativeMemory::from_shape_vec(shape.dimensions(), buffer).unwrap()))
NativeMemory::new(Array::from_shape_vec(shape.dimensions(), buffer).unwrap())))
}

fn allocwrite(&self, shape: &Shape, data: Vec<T>) -> Result<Memory<T>> {
fn allocwrite(&self, shape: &TensorShape, data: Vec<T>) -> Result<Memory<T>> {
// TODO

Ok(Memory::Native(
NativeMemory::from_shape_vec(shape.dimensions(), data).unwrap()))
NativeMemory::new(Array::from_shape_vec(shape.dimensions(), data).unwrap())))
}
}

Expand Down
67 changes: 65 additions & 2 deletions src/frameworks/native/memory.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,69 @@
use ndarray::{Array, IxDyn};
use std::fmt;
use std::ops::{Deref, DerefMut};

/// Represents a native array.
/// A newtype (with an internal type of an n-dimensional array) representing a native memory buffer.
///
/// note: named `Memory` for consistency across frameworks.
pub type NativeMemory<T> = Array<T, IxDyn>;
pub struct NativeMemory<T>(Array<T, IxDyn>);

impl<T> NativeMemory<T> {

/// Constructs a `NativeMemory` from the provided `array`.
pub fn new(array: Array<T, IxDyn>) -> NativeMemory<T> {
NativeMemory(array)
}

/// Returns the number of elements in the array.
pub fn len(&self) -> usize {
self.0.len()
}

/// Returns a flattened, linear representation of the tensor.
///
/// **caution**: this method uses `unwrap`.
pub fn as_flat(&self) -> &[T] {
self.0.as_slice_memory_order().expect("the array's data is not contiguous")
}

/// Returns a mutable flattened, linear representation of the tensor.
///
/// **caution**: this method uses `unwrap`.
pub fn as_mut_flat(&mut self) -> &mut [T] {
self.0.as_slice_memory_order_mut().expect("the array's data is not contiguous")
}
}

impl<T> Clone for NativeMemory<T> where Array<T, IxDyn>: Clone {

fn clone(&self) -> NativeMemory<T> {
NativeMemory(self.0.clone())
}

fn clone_from(&mut self, other: &Self) {
self.0.clone_from(&other.0)
}
}

impl<T> Deref for NativeMemory<T> {

type Target = Array<T, IxDyn>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<T> DerefMut for NativeMemory<T> {

fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl<T> fmt::Debug for NativeMemory<T> where T: fmt::Debug {

fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self.0)
}
}
29 changes: 22 additions & 7 deletions src/frameworks/opencl/high/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -778,23 +778,23 @@ pub type Event = foreign::cl_event;
pub struct Kernel(foreign::cl_kernel);

pub trait KernelArg {
fn size() -> usize;
fn size(&self) -> usize;
fn pointer(&self) -> *mut c_void;
}

impl KernelArg for Buffer {
fn size() -> usize { mem::size_of::<foreign::cl_mem>() }
fn size(&self) -> usize { mem::size_of::<foreign::cl_mem>() }
fn pointer(&self) -> foreign::cl_mem { unsafe { mem::transmute(self) } }
}

impl KernelArg for i32 {
fn size() -> usize { mem::size_of::<i32>() }
fn pointer(&self) -> foreign::cl_mem { unsafe { self as *const i32 as foreign::cl_mem } }
fn size(&self) -> usize { mem::size_of::<i32>() }
fn pointer(&self) -> foreign::cl_mem { self as *const i32 as foreign::cl_mem }
}

impl KernelArg for usize {
fn size() -> usize { mem::size_of::<usize>() }
fn pointer(&self) -> foreign::cl_mem { unsafe { self as *const usize as foreign::cl_mem } }
fn size(&self) -> usize { mem::size_of::<usize>() }
fn pointer(&self) -> foreign::cl_mem { self as *const usize as foreign::cl_mem }
}

impl Kernel {
Expand Down Expand Up @@ -832,13 +832,28 @@ impl Kernel {
/// data to be used as argument value.
pub fn set_arg<A>(&self, position: u32, buf: &A) -> Result where A: KernelArg {
unsafe {
let size = A::size();
let size = buf.size();
let ptr = buf.pointer();
let ret_value = foreign::clSetKernelArg(self.0, position, size, ptr);
return utility::check(ret_value, || {});
}
}

// dynamic-dispatch..? TODO reconsider
pub fn set_args(&self, args: &[&KernelArg]) -> Result {
unsafe {
for (position, arg) in args.iter().enumerate() {

let size = arg.size();
let ptr = arg.pointer();
let ret_value = foreign::clSetKernelArg(self.0, position as u32, size, ptr);
utility::check(ret_value, || {})?;
}

Ok(())
}
}

/// Increment the kernel reference count.
fn retain(&self) -> Result {
unsafe {
Expand Down
10 changes: 5 additions & 5 deletions src/frameworks/opencl/interface/device.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use {Alloc, ComputeDevice, ErrorKind, Memory, Result, Shape, Synch, Viewable};
use {Alloc, ComputeDevice, ErrorKind, Memory, Result, TensorShape, Synch, Viewable};
use std::os::raw::c_void;
use super::OpenCLMemory;
use super::super::{foreign, high};
Expand Down Expand Up @@ -53,7 +53,7 @@ impl Viewable for OpenCLDevice {
impl<T> Alloc<T> for OpenCLDevice {


fn alloc(&self, shape: &Shape) -> Result<Memory<T>> {
fn alloc(&self, shape: &TensorShape) -> Result<Memory<T>> {
// TODO

let flag = foreign::CL_MEM_READ_WRITE;
Expand All @@ -64,7 +64,7 @@ impl<T> Alloc<T> for OpenCLDevice {
Ok(Memory::OpenCL(cl_buffer))
}

fn allocwrite(&self, shape: &Shape, mut data: Vec<T>) -> Result<Memory<T>> {
fn allocwrite(&self, shape: &TensorShape, mut data: Vec<T>) -> Result<Memory<T>> {
// TODO

let flag = foreign::CL_MEM_READ_ONLY | foreign::CL_MEM_COPY_HOST_PTR;
Expand All @@ -88,7 +88,7 @@ impl<T> Synch<T> for OpenCLDevice {

let length = native_memory.len();
let size = utility::allocated::<T>(length);
let slice = native_memory.as_slice_memory_order().unwrap();
let slice = native_memory.as_flat();
let slice_pointer = slice.as_ptr();

let ref buf = cl_memory.buf;
Expand All @@ -112,7 +112,7 @@ impl<T> Synch<T> for OpenCLDevice {

let length = native_memory.len();
let size = utility::allocated::<T>(length);
let slice = native_memory.as_slice_memory_order_mut().unwrap();
let slice = native_memory.as_mut_flat();
let slice_pointer = slice.as_mut_ptr();

let ref buf = cl_memory.buf;
Expand Down
2 changes: 1 addition & 1 deletion src/frameworks/opencl/interface/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub struct OpenCLMemory {
}

impl<T> ::opencl::high::KernelArg for Memory<T> {
fn size() -> usize { ::std::mem::size_of::<::opencl::foreign::cl_mem>() }
fn size(&self) -> usize { ::std::mem::size_of::<::opencl::foreign::cl_mem>() }

fn pointer(&self) -> ::opencl::foreign::cl_mem {

Expand Down
6 changes: 3 additions & 3 deletions src/hardware.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use frameworks::native::NativeDevice;
use frameworks::opencl::OpenCLDevice;
use super::{Memory, Result, Shape};
use super::{Memory, Result, TensorShape};

/// An device capable of processing data.
///
Expand Down Expand Up @@ -59,10 +59,10 @@ impl ComputeDevice {
pub trait Alloc<T> {

/// Allocates memory on the device.
fn alloc(&self, shape: &Shape) -> Result<Memory<T>>;
fn alloc(&self, shape: &TensorShape) -> Result<Memory<T>>;

/// Allocates and transfers memory `data` to the device.
fn allocwrite(&self, shape: &Shape, data: Vec<T>) -> Result<Memory<T>>;
fn allocwrite(&self, shape: &TensorShape, data: Vec<T>) -> Result<Memory<T>>;
}

/// Synchronizer
Expand Down
6 changes: 3 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@
#![feature(libc, unsize, untagged_unions)]

#[macro_use] extern crate enum_primitive;
#[macro_use] extern crate lazy_static;
#[macro_use] extern crate log;
// #[macro_use] extern crate lazy_static;
// #[macro_use] extern crate log;

extern crate libc;
extern crate libloading as lib;
Expand All @@ -154,7 +154,7 @@ pub use self::frameworks::native::{HOST, Native};
pub use self::frameworks::opencl::OpenCL;
pub use self::hardware::{Alloc, ComputeDevice, Device, Hardware, HardwareKind, Synch, Viewable};
pub use self::memory::Memory;
pub use self::tensor::{Shape, SharedTensor};
pub use self::tensor::{SharedTensor, TensorShape};

mod backend;
mod context;
Expand Down
14 changes: 7 additions & 7 deletions src/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ impl<T> Memory<T> {
}
}

// /// Returns the OpenCL memory, consuming the convertee.
// pub fn into_opencl(self) -> Option<OpenCLMemory> {
// match self {
// Memory::OpenCL(opencl) => Some(opencl),
// _ => None
// }
// }
/// Returns the OpenCL memory, consuming the convertee.
pub fn into_opencl(self) -> Option<OpenCLMemory> {
match self {
Memory::OpenCL(opencl) => Some(opencl),
_ => None
}
}
}
48 changes: 48 additions & 0 deletions src/tensor/map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use std::cell::Cell;

/// A "newtype" with an internal type of `Cell<u64>`. `u64Map` uses [bit manipulation][1] to manage
/// memory versions.
///
/// [1]: http://stackoverflow.com/a/141873/2561805
#[allow(non_camel_case_types)]
#[derive(Debug)]
pub struct u64Map(Cell<u64>);

impl u64Map {
/// The maximum number of bits in the bit map can contain.
pub const CAPACITY: usize = 64;

/// Constructs a new `u64Map`.
pub fn new() -> u64Map {
u64Map::with(0)
}

/// Constructs a new `u64Map` with the supplied `n`.
pub fn with(n: u64) -> u64Map {
u64Map(Cell::new(n))
}

pub fn get(&self) -> u64 {
self.0.get()
}

pub fn set(&self, v: u64) {
self.0.set(v)
}

pub fn empty(&self) -> bool {
self.0.get() == 0
}

pub fn insert(&self, k: usize) {
self.0.set(self.0.get() | (1 << k))
}

pub fn contains(&self, k: usize) -> bool {
k < Self::CAPACITY && (self.0.get() & (1 << k) != 0)
}

pub fn latest(&self) -> u32 {
self.0.get().trailing_zeros()
}
}
Loading

0 comments on commit 4fdaf42

Please sign in to comment.