generated from Sister19/kit-sister-2023
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5ad849e
commit 09fa4d1
Showing
1 changed file
with
277 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,277 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "GY5LBx4UuxZl" | ||
}, | ||
"source": [ | ||
"# Setup" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "PExlVrs4uv4m" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# https://www.geeksforgeeks.org/how-to-run-cuda-c-c-on-jupyter-notebook-in-google-colaboratory/" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "M-AIy-7PsXXp" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"!apt-get --purge remove cuda nvidia* libnvidia-*\n", | ||
"!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge\n", | ||
"!apt-get remove cuda-*\n", | ||
"!apt autoremove\n", | ||
"!apt-get update" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "ZNBZ95nauZ3u" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb\n", | ||
"!dpkg -i cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb\n", | ||
"!apt-key add /var/cuda-repo-9-2-local/7fa2af80.pub\n", | ||
"!apt-get update\n", | ||
"!apt-get install cuda-9.2" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "KhW3T1YGu6Ew" | ||
}, | ||
"source": [ | ||
"# Kode CUDA" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "Co9T1Xpo3AnO", | ||
"outputId": "b97898d0-db3c-42a8-ffb2-288b9773276d" | ||
}, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"Overwriting cuda.cu\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"#include <stdio.h>\n", | ||
"#include <stdlib.h>\n", | ||
"#include <math.h>\n", | ||
"#include <cuda_runtime.h>\n", | ||
"#include <cuComplex.h>\n", | ||
"#define MAX_N 512\n", | ||
"#define BLOCK_SIZE 16\n", | ||
"\n", | ||
"struct Matrix {\n", | ||
" int size;\n", | ||
" double mat[MAX_N][MAX_N];\n", | ||
"};\n", | ||
"\n", | ||
"struct FreqMatrix {\n", | ||
" int size;\n", | ||
" cuDoubleComplex mat[MAX_N][MAX_N];\n", | ||
"};\n", | ||
"\n", | ||
"__device__ cuDoubleComplex cuCexp(cuDoubleComplex x)\n", | ||
"{\n", | ||
" double factor = exp(x.x);\n", | ||
" return make_cuDoubleComplex(factor * cos(x.y), factor * sin(x.y));\n", | ||
"}\n", | ||
"\n", | ||
"__global__ void dft_kernel(struct Matrix *mat, struct FreqMatrix *freq_domain)\n", | ||
"{\n", | ||
" // Implement shared memory\n", | ||
" __shared__ double shared_mat[BLOCK_SIZE][BLOCK_SIZE];\n", | ||
"\n", | ||
" int k = blockIdx.x;\n", | ||
" int l = threadIdx.x;\n", | ||
"\n", | ||
" cuDoubleComplex element = make_cuDoubleComplex(0.0, 0.0);\n", | ||
"\n", | ||
" for (int i = 0; i < mat->size; i += BLOCK_SIZE) {\n", | ||
" for (int j = 0; j < mat->size; j += BLOCK_SIZE) {\n", | ||
" // Load a block of input matrix into shared memory\n", | ||
" shared_mat[l][k] = mat->mat[i + l][j + k];\n", | ||
"\n", | ||
" __syncthreads();\n", | ||
"\n", | ||
" for (int m = 0; m < BLOCK_SIZE; m++) {\n", | ||
" for (int n = 0; n < BLOCK_SIZE; n++) {\n", | ||
" cuDoubleComplex arg = make_cuDoubleComplex((i + m) * k / (double) mat->size + (j + n) * l / (double) mat->size, 0.0);\n", | ||
" cuDoubleComplex exponent = cuCexp(make_cuDoubleComplex(0.0, -2.0 * M_PI * arg.x));\n", | ||
" cuDoubleComplex value = make_cuDoubleComplex(shared_mat[m][k], 0.0);\n", | ||
" element = cuCadd(element, cuCmul(value, exponent));\n", | ||
" }\n", | ||
" }\n", | ||
"\n", | ||
" __syncthreads();\n", | ||
" }\n", | ||
" }\n", | ||
"\n", | ||
" element = cuCdiv(element, make_cuDoubleComplex(mat->size*mat->size, 0.0));\n", | ||
" freq_domain->mat[k][l] = element;\n", | ||
"}\n", | ||
"\n", | ||
"void readMatrix(struct Matrix *m)\n", | ||
"{\n", | ||
" scanf(\"%d\", &(m->size));\n", | ||
" for (int i = 0; i < m->size; i++)\n", | ||
" for (int j = 0; j < m->size; j++)\n", | ||
" scanf(\"%lf\", &(m->mat[i][j]));\n", | ||
"}\n", | ||
"\n", | ||
"int main(void)\n", | ||
"{\n", | ||
" struct Matrix source;\n", | ||
" struct FreqMatrix freq_domain;\n", | ||
" readMatrix(&source);\n", | ||
" freq_domain.size = source.size;\n", | ||
"\n", | ||
" // Allocate device memory\n", | ||
" struct Matrix *d_source;\n", | ||
" cudaMalloc(&d_source, sizeof(struct Matrix));\n", | ||
" cudaMemcpy(d_source, &source, sizeof(struct Matrix), cudaMemcpyHostToDevice);\n", | ||
"\n", | ||
" struct FreqMatrix *d_freq_domain;\n", | ||
" cudaMalloc(&d_freq_domain, sizeof(struct FreqMatrix));\n", | ||
" cudaMemcpy(d_freq_domain, &freq_domain, sizeof(struct FreqMatrix), cudaMemcpyHostToDevice);\n", | ||
"\n", | ||
" // Launch kernel\n", | ||
" dft_kernel<<<source.size, source.size>>>(d_source, d_freq_domain);\n", | ||
"\n", | ||
" // Copy results back to host\n", | ||
" cudaMemcpy(&freq_domain, d_freq_domain, sizeof(struct FreqMatrix), cudaMemcpyDeviceToHost);\n", | ||
"\n", | ||
" // Free device memory\n", | ||
" cudaFree(d_source);\n", | ||
" cudaFree(d_freq_domain);\n", | ||
"\n", | ||
" cuDoubleComplex sum = make_cuDoubleComplex(0.0, 0.0);\n", | ||
" for (int k = 0; k < source.size; k++) {\n", | ||
" for (int l = 0; l < source.size; l++) {\n", | ||
" cuDoubleComplex el = freq_domain.mat[k][l];\n", | ||
" printf(\"(%lf, %lf) \", el.x, el.y);\n", | ||
" sum = cuCadd(sum, el);\n", | ||
" }\n", | ||
" printf(\"\\n\");\n", | ||
" }\n", | ||
" return 0;\n", | ||
"};" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"# Write CUDA" | ||
], | ||
"metadata": { | ||
"id": "9dCGWHXiPUsX" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "h6eFuOC1qTid" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"!nvcc cuda.cu -o cuda" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"source": [ | ||
"# Test CUDA with Testcase" | ||
], | ||
"metadata": { | ||
"id": "QzqypJcXPKap" | ||
} | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"source": [ | ||
"!time ./cuda < 512.txt > output.txt" | ||
], | ||
"metadata": { | ||
"colab": { | ||
"base_uri": "https://localhost:8080/" | ||
}, | ||
"id": "-RHIFjITEX7r", | ||
"outputId": "455d4455-e1e4-48d8-ed63-1bdb314ab965" | ||
}, | ||
"execution_count": null, | ||
"outputs": [ | ||
{ | ||
"output_type": "stream", | ||
"name": "stdout", | ||
"text": [ | ||
"\n", | ||
"real\t0m2.798s\n", | ||
"user\t0m1.109s\n", | ||
"sys\t0m0.743s\n" | ||
] | ||
} | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"accelerator": "GPU", | ||
"colab": { | ||
"collapsed_sections": [ | ||
"F-XhfXxnh1PH", | ||
"VRjJkOLWh9Zh", | ||
"0OniSKRHisBx", | ||
"jTEFIvG_i4dZ", | ||
"2pXi7bvNkE-S", | ||
"nhR97Rz4x9A0" | ||
], | ||
"provenance": [], | ||
"toc_visible": true | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |