install in linux

lspci | grep -i nvidia

sudo apt update # 更新 apt
sudo apt install gcc g++ make # 安装 gcc g++ make
sudo apt install libglu1-mesa libxi-dev libxmu-dev libglu1-mesa-dev freeglut3-dev # 安装依赖库

#  下载
# <https://developer.nvidia.com/cuda-toolkit>

nvcc
nvcc -std=c++11 -lcurand -lcublas

install by anaconda

conda install -c anaconda cudatoolkit
numba -s
#include <iostream>
#include <cuda.h>

using namespace std;

__global__ void add(int *a, const int *b){
    int i = blockIdx.x;
    a[i] += b[i];
}

int main(){
    const int N = 10; // number of elements
    int *a, *b, *temp, i;
    // malloc HOST memory for temp
    temp = new int [N];
    // malloc DEVICE memory for a, b
    cudaMalloc(&a, N*sizeof(int));
    cudaMalloc(&b, N*sizeof(int));
    // set a's values: a[i] = i
    for(i=0;i<N;i++) temp[i] = i;
    cudaMemcpy(a, temp, N*sizeof(int), cudaMemcpyHostToDevice);
    // set b's values: b[i] = 2*i
    for(i=0;i<N;i++) temp[i] = 2*i;
    cudaMemcpy(b, temp, N*sizeof(int), cudaMemcpyHostToDevice);
    // calculate a[i] += b[i] in GPU
    add<<<N,1>>>(a, b);
    // show a's values
    cudaMemcpy(temp, a, N*sizeof(int), cudaMemcpyDeviceToHost);
    for(i=0;i<N;i++){
        cout << temp[i] << endl;
    }
    // free HOST & DEVICE memory
    delete [] temp;
    cudaFree(a);
    cudaFree(b);
}