Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Option for using GPU #3

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ __pycache__/
*$py.class

# C extensions
*.so
# *.so

# Distribution / packaging
.Python
Expand All @@ -15,7 +15,7 @@ dist/
downloads/
eggs/
.eggs/
lib/
# lib/
lib64/
parts/
sdist/
Expand Down
50 changes: 23 additions & 27 deletions data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,31 @@

np.random.seed(42)

class DataLoader():
def load_dataset(flatten=False):
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

# normalize x
X_train = X_train / 255.
X_test = X_test / 255.

@staticmethod
def load_dataset(flatten=False):
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

# normalize x
X_train = X_train.astype(float) / 255.
X_test = X_test.astype(float) / 255.
# we reserve the last 10000 training examples for validation
X_train, X_val = X_train[:-10000], X_train[-10000:]
y_train, y_val = y_train[:-10000], y_train[-10000:]

# we reserve the last 10000 training examples for validation
X_train, X_val = X_train[:-10000], X_train[-10000:]
y_train, y_val = y_train[:-10000], y_train[-10000:]
if flatten:
X_train = X_train.reshape([X_train.shape[0], -1])
X_val = X_val.reshape([X_val.shape[0], -1])
X_test = X_test.reshape([X_test.shape[0], -1])

if flatten:
X_train = X_train.reshape([X_train.shape[0], -1])
X_val = X_val.reshape([X_val.shape[0], -1])
X_test = X_test.reshape([X_test.shape[0], -1])
return X_train, y_train, X_val, y_val, X_test, y_test

return X_train, y_train, X_val, y_val, X_test, y_test

@staticmethod
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.random.permutation(len(inputs))
for start_idx in trange(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
indices = np.random.permutation(len(inputs))
for start_idx in trange(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
91 changes: 91 additions & 0 deletions functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
""" FUNCTIONAL API: functions.py
* Purpose: Core functional API for performing computation on CPU/GPU.
* @author Prabhsimran Singh
* @version 2.0 17/10/18
"""
import numpy as np

from ops.cpu_ops import (
cpu_matmul,
cpu_matsum,
cpu_matprod,
cpu_sum,
cpu_prod,
cpu_maximum
)
from ops.numba_ops import (
numba_matmul,
numba_matsum,
numba_matprod,
numba_sum,
numba_prod,
numba_maximum
)

NUM_THREADS = 32

def get_cuda_execution_config(m, n):
gridBlock = (NUM_THREADS, NUM_THREADS)
gridDim = ((n // gridBlock[0]) + 1, (m // gridBlock[1]) + 1)
return gridDim, gridBlock

def matmul(a, b, method='cpu'):
# fall back to cpu if dim inconsistency (numpy handle)
if method == 'cpu' or len(a.shape) != len(b.shape) or len(a.shape) == 1 or len(b.shape) == 1:
return cpu_matmul(a, b)
elif method == 'gpu':
m, n, k = a.shape[0], a.shape[1], b.shape[1]
c = np.zeros(shape=(m, k))
gridDim, gridBlock = get_cuda_execution_config(m, k)
numba_matmul[gridDim, gridBlock](a, b, c, m, n, k)
return c

def matsum(a, b, method='cpu'):
if method == 'cpu' or len(a.shape) != len(b.shape) or len(a.shape) == 1 or len(b.shape) == 1:
return cpu_matsum(a, b)
if method == 'gpu':
m, n = a.shape[0], a.shape[1]
c = np.zeros(shape=(m, n))
gridDim, gridBlock = get_cuda_execution_config(m, n)
numba_matsum[gridDim, gridBlock](a, b, c, m, n)
return c.reshape((m, n))

def matprod(a, b, method='cpu'):
if method == 'cpu' or len(a.shape) != len(b.shape) or len(a.shape) == 1 or len(b.shape) == 1:
return cpu_matprod(a, b)
if method == 'gpu':
m, n = a.shape[0], a.shape[1]
c = np.zeros(shape=(m, n))
gridDim, gridBlock = get_cuda_execution_config(m, n)
numba_matprod[gridDim, gridBlock](a, b, c, m, n)
return c.reshape((m, n))

def add(a, value, method='cpu'):
if method == 'cpu' or len(a.shape) == 1:
return cpu_sum(a, value)
if method == 'gpu':
m, n = a.shape[0], a.shape[1]
c = np.zeros(shape=(m, n))
gridDim, gridBlock = get_cuda_execution_config(m, n)
numba_sum[gridDim, gridBlock](a, value, c, m, n)
return c.reshape((m, n))

def prod(a, value, method='cpu'):
if method == 'cpu' or len(a.shape) == 1:
return cpu_prod(a, value)
if method == 'gpu':
m, n = a.shape[0], a.shape[1]
c = np.zeros(shape=(m, n))
gridDim, gridBlock = get_cuda_execution_config(m, n)
numba_prod[gridDim, gridBlock](a, value, c, m, n)
return c.reshape((m, n))

def maximum(a, value, method='cpu'):
if method == 'cpu' or len(a.shape) == 1:
return cpu_maximum(a, value)
if method == 'gpu':
m, n = a.shape[0], a.shape[1]
c = np.zeros(shape=(m, n))
gridDim, gridBlock = get_cuda_execution_config(m, n)
numba_maximum[gridDim, gridBlock](a, value, c, m, n)
return c
76 changes: 45 additions & 31 deletions layers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import print_function
import numpy as np

import functions as F

np.random.seed(42)

class Layer:
Expand Down Expand Up @@ -34,23 +36,26 @@ def backward(self, inputs, gradients, **kwargs):
"""
pass

class Dense(Layer):
class Dense():
""" Dense layer.
A dense layer is a layer which performs a learned affine transformation:
f(x) = <W*x> + b
input shape: [batch, input_units]
output shape: [batch, output units]
"""
def __init__(self, input_units, output_units):

def __init__(self, input_units, output_units, method='cpu'):
self.type = 'dense'
self.method = method

# initialize weights with glorot/xavier uniform initialization
self.weights = np.random.randn(input_units, output_units) * np.sqrt(6. / (input_units + output_units))
# initialize weights with small random numbers. We use xavier initialization
self.weights = F.prod(np.random.randn(input_units, output_units), np.sqrt(2. / (input_units + output_units)), method=self.method)
self.biases = np.zeros(output_units)

def _init_g2(self):
self.g2_weights = np.zeros_like(self.weights)
self.g2_biases = np.zeros_like(self.biases)

def forward(self, inputs):
""" Forward pass of the dense layer.
Perform an affine transformation:
Expand All @@ -59,59 +64,68 @@ def forward(self, inputs):
input shape: [batch, input_units]
output shape: [batch, output units]
"""
return np.dot(inputs, self.weights) + self.biases

Wx = F.matmul(inputs, self.weights, method=self.method)
Z = F.matsum(Wx, self.biases, method=self.method)
return Z

def backward(self, inputs, gradients, **kwargs):
""" Backward pass of the layer.
Performs a backpropagation step through the layer, with respect to the given input.
To compute loss gradients w.r.t input, you need to apply chain rule (backprop):
dL / dx = (dL / dZ) * (dZ / dx)
"""
lr = kwargs.get('lr', 0.001)
gamma = kwargs.get('gamma', 0.9)
epsilon = kwargs.get('epsilon', 1e-7)
optim = kwargs.get('optim', 'rmsprop')

# dL / dx = dL / dZ * dZ / dx = gradients * W
grad_input = np.dot(gradients, self.weights.T)
grad_input = F.matmul(gradients, self.weights.T, method=self.method)
# m -> batch size
m = inputs.shape[0]

# compute gradient w.r.t. weights and biases
# dL / dW = dL / dZ * dZ / dW = gradients * inputs
grad_weights = np.dot(inputs.T, gradients) / m
grad_weights = F.prod(F.matmul(inputs.T, gradients, method=self.method), 1. / m, method=self.method)
# dL / db = dL / dZ * dZ / db = gradients * 1
grad_biases = gradients.sum(axis=0) / m

grad_biases = F.prod(gradients.sum(axis=0), 1. / m, method=self.method)
assert grad_weights.shape == self.weights.shape and grad_biases.shape == self.biases.shape

update_weights = lr * grad_weights
update_biases = lr * grad_biases
update_weights = F.prod(grad_weights, lr, method=self.method)
update_biases = F.prod(grad_biases, lr, method=self.method)

if optim == 'rmsprop':
if not hasattr(self, 'g2_weights'):
self._init_g2()
self.g2_weights = (self.g2_weights * gamma) + np.square(grad_weights) * (1 - gamma)
self.g2_biases = (self.g2_biases * gamma) + np.square(grad_biases) * (1 - gamma)

self.weights -= update_weights / (np.sqrt(self.g2_weights) + epsilon)
self.biases -= update_biases / (np.sqrt(self.g2_biases) + epsilon)
self.g2_weights = F.matsum(F.prod(self.g2_weights, gamma, method=self.method), F.prod(np.square(grad_weights), (1 - gamma), method=self.method), method=self.method)
self.g2_biases = F.matsum(F.prod(self.g2_biases, gamma, method=self.method), F.prod(np.square(grad_biases), (1 - gamma), method=self.method), method=self.method)
self.weights = F.matsum(self.weights, -F.matprod(update_weights, 1. / np.sqrt(F.add(self.g2_weights, epsilon, method=self.method)), method=self.method), method=self.method)
self.biases = F.matsum(self.biases, -F.matprod(update_biases, 1. / np.sqrt(F.add(self.g2_biases, epsilon, method=self.method)), method=self.method), method=self.method)
elif optim == 'gd':
self.weights -= update_weights
self.biases -= update_biases
self.weights = F.matsum(self.weights, -update_weights, method=self.method)
self.biases = F.matsum(self.biases, -update_biases, method=self.method)

# propagate back the gradients of Loss wrt to layer inputs
# dL / dx
return grad_input

class ReLU(Layer):
"""ReLU layer.
Simply applies elementwise rectified linear unit to all inputs.

class ReLU():
""" ReLU layer.

Applies elementwise rectified linear unit to all inputs:
f(x) = max(0, x)

input shape: [batch, input_units]
output shape: [batch, input_units]
"""

def __init__(self):
def __init__(self, method='cpu'):
self.type = 'relu'
self.method = method

def forward(self, inputs):
"""Apply elementwise ReLU to [batch, input_units] matrix"""
return np.maximum(0, inputs)
return F.maximum(inputs, 0., method=self.method)

def backward(self, inputs, gradients, **kwargs):
"""Compute gradient of loss w.r.t. ReLU input"""
grad_relu = inputs > 0
return gradients * grad_relu
grad_relu = inputs > 0.
return F.matprod(gradients, grad_relu, method=self.method)
Loading