install in linux
lspci | grep -i nvidia
sudo apt update # 更新 apt
sudo apt install gcc g++ make # 安装 gcc g++ make
sudo apt install libglu1-mesa libxi-dev libxmu-dev libglu1-mesa-dev freeglut3-dev # 安装依赖库
# 下载
# <https://developer.nvidia.com/cuda-toolkit>
nvcc
nvcc -std=c++11 -lcurand -lcublas
install by anaconda
conda install -c anaconda cudatoolkit
numba -s
#include <iostream>
#include <cuda.h>
using namespace std;
__global__ void add(int *a, const int *b){
int i = blockIdx.x;
a[i] += b[i];
}
int main(){
const int N = 10; // number of elements
int *a, *b, *temp, i;
// malloc HOST memory for temp
temp = new int [N];
// malloc DEVICE memory for a, b
cudaMalloc(&a, N*sizeof(int));
cudaMalloc(&b, N*sizeof(int));
// set a's values: a[i] = i
for(i=0;i<N;i++) temp[i] = i;
cudaMemcpy(a, temp, N*sizeof(int), cudaMemcpyHostToDevice);
// set b's values: b[i] = 2*i
for(i=0;i<N;i++) temp[i] = 2*i;
cudaMemcpy(b, temp, N*sizeof(int), cudaMemcpyHostToDevice);
// calculate a[i] += b[i] in GPU
add<<<N,1>>>(a, b);
// show a's values
cudaMemcpy(temp, a, N*sizeof(int), cudaMemcpyDeviceToHost);
for(i=0;i<N;i++){
cout << temp[i] << endl;
}
// free HOST & DEVICE memory
delete [] temp;
cudaFree(a);
cudaFree(b);
}