Skip to content

Commit

Permalink
add cuda ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
danielsalim authored Mar 17, 2023
1 parent 5ad849e commit 09fa4d1
Showing 1 changed file with 277 additions and 0 deletions.
277 changes: 277 additions & 0 deletions src/cuda/cuda_colab.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "GY5LBx4UuxZl"
},
"source": [
"# Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "PExlVrs4uv4m"
},
"outputs": [],
"source": [
"# https://www.geeksforgeeks.org/how-to-run-cuda-c-c-on-jupyter-notebook-in-google-colaboratory/"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "M-AIy-7PsXXp"
},
"outputs": [],
"source": [
"!apt-get --purge remove cuda nvidia* libnvidia-*\n",
"!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge\n",
"!apt-get remove cuda-*\n",
"!apt autoremove\n",
"!apt-get update"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "ZNBZ95nauZ3u"
},
"outputs": [],
"source": [
"!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb\n",
"!dpkg -i cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb\n",
"!apt-key add /var/cuda-repo-9-2-local/7fa2af80.pub\n",
"!apt-get update\n",
"!apt-get install cuda-9.2"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KhW3T1YGu6Ew"
},
"source": [
"# Kode CUDA"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Co9T1Xpo3AnO",
"outputId": "b97898d0-db3c-42a8-ffb2-288b9773276d"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Overwriting cuda.cu\n"
]
}
],
"source": [
"#include <stdio.h>\n",
"#include <stdlib.h>\n",
"#include <math.h>\n",
"#include <cuda_runtime.h>\n",
"#include <cuComplex.h>\n",
"#define MAX_N 512\n",
"#define BLOCK_SIZE 16\n",
"\n",
"struct Matrix {\n",
" int size;\n",
" double mat[MAX_N][MAX_N];\n",
"};\n",
"\n",
"struct FreqMatrix {\n",
" int size;\n",
" cuDoubleComplex mat[MAX_N][MAX_N];\n",
"};\n",
"\n",
"__device__ cuDoubleComplex cuCexp(cuDoubleComplex x)\n",
"{\n",
" double factor = exp(x.x);\n",
" return make_cuDoubleComplex(factor * cos(x.y), factor * sin(x.y));\n",
"}\n",
"\n",
"__global__ void dft_kernel(struct Matrix *mat, struct FreqMatrix *freq_domain)\n",
"{\n",
" // Implement shared memory\n",
" __shared__ double shared_mat[BLOCK_SIZE][BLOCK_SIZE];\n",
"\n",
" int k = blockIdx.x;\n",
" int l = threadIdx.x;\n",
"\n",
" cuDoubleComplex element = make_cuDoubleComplex(0.0, 0.0);\n",
"\n",
" for (int i = 0; i < mat->size; i += BLOCK_SIZE) {\n",
" for (int j = 0; j < mat->size; j += BLOCK_SIZE) {\n",
" // Load a block of input matrix into shared memory\n",
" shared_mat[l][k] = mat->mat[i + l][j + k];\n",
"\n",
" __syncthreads();\n",
"\n",
" for (int m = 0; m < BLOCK_SIZE; m++) {\n",
" for (int n = 0; n < BLOCK_SIZE; n++) {\n",
" cuDoubleComplex arg = make_cuDoubleComplex((i + m) * k / (double) mat->size + (j + n) * l / (double) mat->size, 0.0);\n",
" cuDoubleComplex exponent = cuCexp(make_cuDoubleComplex(0.0, -2.0 * M_PI * arg.x));\n",
" cuDoubleComplex value = make_cuDoubleComplex(shared_mat[m][k], 0.0);\n",
" element = cuCadd(element, cuCmul(value, exponent));\n",
" }\n",
" }\n",
"\n",
" __syncthreads();\n",
" }\n",
" }\n",
"\n",
" element = cuCdiv(element, make_cuDoubleComplex(mat->size*mat->size, 0.0));\n",
" freq_domain->mat[k][l] = element;\n",
"}\n",
"\n",
"void readMatrix(struct Matrix *m)\n",
"{\n",
" scanf(\"%d\", &(m->size));\n",
" for (int i = 0; i < m->size; i++)\n",
" for (int j = 0; j < m->size; j++)\n",
" scanf(\"%lf\", &(m->mat[i][j]));\n",
"}\n",
"\n",
"int main(void)\n",
"{\n",
" struct Matrix source;\n",
" struct FreqMatrix freq_domain;\n",
" readMatrix(&source);\n",
" freq_domain.size = source.size;\n",
"\n",
" // Allocate device memory\n",
" struct Matrix *d_source;\n",
" cudaMalloc(&d_source, sizeof(struct Matrix));\n",
" cudaMemcpy(d_source, &source, sizeof(struct Matrix), cudaMemcpyHostToDevice);\n",
"\n",
" struct FreqMatrix *d_freq_domain;\n",
" cudaMalloc(&d_freq_domain, sizeof(struct FreqMatrix));\n",
" cudaMemcpy(d_freq_domain, &freq_domain, sizeof(struct FreqMatrix), cudaMemcpyHostToDevice);\n",
"\n",
" // Launch kernel\n",
" dft_kernel<<<source.size, source.size>>>(d_source, d_freq_domain);\n",
"\n",
" // Copy results back to host\n",
" cudaMemcpy(&freq_domain, d_freq_domain, sizeof(struct FreqMatrix), cudaMemcpyDeviceToHost);\n",
"\n",
" // Free device memory\n",
" cudaFree(d_source);\n",
" cudaFree(d_freq_domain);\n",
"\n",
" cuDoubleComplex sum = make_cuDoubleComplex(0.0, 0.0);\n",
" for (int k = 0; k < source.size; k++) {\n",
" for (int l = 0; l < source.size; l++) {\n",
" cuDoubleComplex el = freq_domain.mat[k][l];\n",
" printf(\"(%lf, %lf) \", el.x, el.y);\n",
" sum = cuCadd(sum, el);\n",
" }\n",
" printf(\"\\n\");\n",
" }\n",
" return 0;\n",
"};"
]
},
{
"cell_type": "markdown",
"source": [
"# Write CUDA"
],
"metadata": {
"id": "9dCGWHXiPUsX"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "h6eFuOC1qTid"
},
"outputs": [],
"source": [
"!nvcc cuda.cu -o cuda"
]
},
{
"cell_type": "markdown",
"source": [
"# Test CUDA with Testcase"
],
"metadata": {
"id": "QzqypJcXPKap"
}
},
{
"cell_type": "code",
"source": [
"!time ./cuda < 512.txt > output.txt"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-RHIFjITEX7r",
"outputId": "455d4455-e1e4-48d8-ed63-1bdb314ab965"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"real\t0m2.798s\n",
"user\t0m1.109s\n",
"sys\t0m0.743s\n"
]
}
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [
"F-XhfXxnh1PH",
"VRjJkOLWh9Zh",
"0OniSKRHisBx",
"jTEFIvG_i4dZ",
"2pXi7bvNkE-S",
"nhR97Rz4x9A0"
],
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

0 comments on commit 09fa4d1

Please sign in to comment.