Two-dimensional FFT in cuda-cufftExecC2C, cuda-cufftexecc2c
#include <stdlib.h>#include <stdio.h>#include <string.h>#include <math.h>#include <iostream>#include "cuda_runtime.h"#include "device_launch_parameters.h"#include <cufft.h>#include <opencv.hpp>#define NX 3 #define NY 5#define BATCH 1#define NRANK 2using namespace cv;using std::cout;using std::endl;static __global__ void cufftComplexScale(cufftComplex *idata, cufftComplex *odata, const int size, float scale){ const int threadID = blockIdx.x * blockDim.x + threadIdx.x; if (threadID < size) { odata[threadID].x = idata[threadID].x * scale; odata[threadID].y = idata[threadID].y * scale; }}int main(){ float2* Host_data; cufftHandle plan; cufftComplex *Device_data; int n[NRANK] = { NX, NY }; Host_data = (float2*)malloc(sizeof(float2)*NX*NY); cudaMalloc((void**)&Device_data, sizeof(cufftComplex)*NX*NY); for (int i = 0; i < NY; i++) for (int j = 0; j < NX; j++){ Host_data[i*NX + j].x = i*NX + j; Host_data[i*NX + j].y = 0; } cudaMemcpy(Device_data, Host_data, sizeof(cufftComplex)*NX*NY, cudaMemcpyHostToDevice); cufftPlanMany(&plan, NRANK, n, NULL, 1, 0, NULL, 1, 0, CUFFT_C2C, BATCH); cufftExecC2C(plan, Device_data, Device_data, CUFFT_FORWARD); cufftExecC2C(plan, Device_data, Device_data, CUFFT_INVERSE); dim3 dimBlock(NX*NY); dim3 dimGrid(1); cufftComplexScale << <dimGrid, dimBlock >> >(Device_data, Device_data, NX*NY, 1.0f / (NX*NY)); cudaMemcpy(Host_data, Device_data, sizeof(cufftComplex)*NX*NY, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cufftDestroy(plan); cudaFree(Device_data); for (int i = 0; i < NY; i++) for (int j = 0; j < NX; j++){ printf("%f %f\n",Host_data[i*NX + j].x, Host_data[i*NX + j].y); } system("pause"); return 0; }