从 FPGA 上的 OpenCL FFT 内核获取 nan 值

发布于 2025-01-15 18:07:04 字数 6100 浏览 2 评论 0原文

我试图通过自己为Intel FPGA编写Host程序来使用Intel的FFT1D内核。可以找到英特尔 FFT1d 的链接 这里

我还在下面给出了我的主机程序,其中,我保存了一个文件(其中包含一些数据),我的任务是读取该数据,计算其 FFT 并打印其中的一些。这是一个 4K 点 FFT

 #include <stdio.h>
    #include <stdlib.h>
#include "CL/opencl.h"
#include "AOCLUtils/aocl_utils.h"
#include <string.h>
#include "fft_config.h"


#define N (1<<LOGN) //Please check the FFT Sample Code for Ref (2 to the power 12 gives 4K points)
#define DATA_FILE "complex_input.data"

using namespace aocl_utils;


cl_platform_id platform = NULL;
cl_device_id device = NULL;
cl_command_queue queue0 = NULL;
cl_command_queue queue1 = NULL;
cl_context context = NULL;
cl_program program = NULL;
cl_kernel kernel0, kernel1;
cl_mem d_inData, d_outData;
cl_int err = 0;

typedef struct {
  float x;
  float y;
} float2;
//float2 h_outData[N], h_inData[N];
float2 *h_inData = (float2 *)alignedMalloc(sizeof(float2)*N);
float2 *h_outData = (float2 *)alignedMalloc(sizeof(float2)*N);

void init();    //Function that does the job of Querying Platform and Device, creating Context, Command Queues, Program and required Kernels to do the job.
void cleanup(); //Function that releases all the Created Contexts, Buffers etc, in order to finish the execution.
void read_data();   //Reads data from the complex numbers from .data file and fills in the float2 struct h_inData[].
int temp_value = 1;

int main()
{
 // h_inData = (float2 *)alignedMalloc(sizeof(float2)*N);
  //h_outData = (float2 *)alignedMalloc(sizeof(float2)*N);

  int inverse = false;
  int temp =1;
  init();
  read_data();

  d_inData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float2)*N, NULL, &err);
  checkError(err,"Failed to allocate Buffer for input array\n");

  d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2)*N, NULL, &err);
  checkError(err, "Failed to allocate the Buffer for output\n");


  //WE FINISH THE FETCH KERNEL
  err = clEnqueueWriteBuffer(queue1,d_inData, CL_TRUE, 0, sizeof(float2)*N, h_inData, 0, NULL, NULL);
  checkError(err,"Failed to Write the input Buffer\n");

  err = clSetKernelArg(kernel1, 0, sizeof(cl_mem), (void *)&d_inData);
  checkError(err, "Failed to set KerArg for Kernel1 - 0\n");

  err = clSetKernelArg(kernel0, 0, sizeof(cl_mem), (void *)&d_outData);
  checkError(err, "Failed to set KerArg for Kernel0 - 0\n");

  err = clSetKernelArg(kernel0, 1, sizeof(cl_int), (void *)&temp_value);
  checkError(err, "Failed to set KerArg for Kernel0 - 1\n");

  err = clSetKernelArg(kernel0, 2, sizeof(cl_int), (void *)&inverse);
  checkError(err, "Failed to set KerArg for Kernel0 - 2\n");

  printf("FFT Initialization Complete!\n\n");

  err = clEnqueueTask(queue0, kernel0, 0, NULL, NULL);
  checkError(err, "Failed to Launch the Kernel for FFT\n");

  size_t local_work_size = N/8;
  size_t global_work_size = local_work_size * 1; //Coz the number of Iterations is just 1

  err = clEnqueueNDRangeKernel(queue1, kernel1, 1, NULL, &local_work_size, &global_work_size, 0, NULL, NULL);
  checkError(err, "Failed to launch the Fetch Kernel\n");

  err = clFinish(queue0);
  checkError(err, "Failed to finish FFT\n");
  err = clFinish(queue1);
  checkError(err, "Failed to finish Fetch kernel\n");

  err = clEnqueueReadBuffer(queue0, d_outData, CL_TRUE, 0, sizeof(float2)*N, h_outData, 0, NULL, NULL);
  checkError(err, "Failed to Read back the Buffer output\n");


  printf("FFT is Complete!\n\n"); 
  printf("Printing some of the values, just to make sure they are non-zero\n\n");
  for(int ii=100;ii<125;ii++)
  {
        printf("%f + %f j -> %f + %f j\n",h_inData[ii].x,h_inData[ii].y,h_outData[ii].x,h_outData[ii].y);
  }
  printf("\n\n");
  cleanup();

  return 0;
}



void read_data()
{
  size_t sourceSize;
  float* temp;

  FILE *fp = fopen(DATA_FILE,"r");
  if(fp==NULL)
  {
    printf("Could not find the Random Data File! Exiting!\n");
    exit(1);
  }
  fseek(fp,0,SEEK_END);
  sourceSize=ftell(fp);
  rewind(fp);

  temp = (float *)alignedMalloc(sourceSize);
  fread(temp, sizeof(float),sourceSize,fp);
  fclose(fp);

  for(int i=0;i<N;i++)
  {
    h_inData[i].x = temp[2*i];
    h_inData[i].y = temp[(2*i)+1];
  }
}


void init()
{
  platform = findPlatform("Intel(R) FPGA SDK for OpenCL(TM)");
  if(platform == NULL)
  {
    printf("Could not find the Platform\n");
    exit(1);
  }


  scoped_array<cl_device_id> devices;
  cl_uint num_devices;
  devices.reset(getDevices(platform, CL_DEVICE_TYPE_ACCELERATOR, &num_devices));
  device = devices[0];

  context = clCreateContext(NULL, 1, &device, &oclContextCallback, NULL, &err);
  checkError(err, "Failed to create Context\n");

  queue0 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
  checkError(err, "Failed to create Command Queue0\n");
  queue1 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
  checkError(err, "Failed to create Command Queue1\n");


  program = createProgramFromBinary(context, "bin/fft1d.aocx", &device, 1);

  err = clBuildProgram(program, 1, &device, "", NULL, NULL);
  checkError(err, "Failed to Build Program\n");


  kernel0 = clCreateKernel(program, "fft1d", &err);
  checkError(err,"Could not Create Kernel0\n");

  kernel1 = clCreateKernel(program, "fetch", &err);
  checkError(err, "Could not Create Kernel1\n");

  printf("Finished with the Initial Setup!\n");
}

void cleanup()
{
  if(kernel0)
    clReleaseKernel(kernel0);
  if(kernel1)
    clReleaseKernel(kernel1);
  if(program)
    clReleaseProgram(program);
  if(queue0)
    clReleaseCommandQueue(queue0);
  if(queue1)
    clReleaseCommandQueue(queue1);
  if(d_inData)
    clReleaseMemObject(d_inData);
  if(d_outData)
    clReleaseMemObject(d_outData);
  if(context)
    clReleaseContext(context);


}

我检查了文件中的数据是否可以正常读取,并且它是正确的且符合预期。

请告诉我哪里出了问题!

I was trying to use the Intel's FFT1D kernel by writing the Host program by my own for Intel FPGA. Link to Intel's FFT1d can be found here

I have also given my host program below, wherein, I have a file saved (which contains some data), my task is to read that data, calculate its FFT and print some of it. It is a 4K point FFT

 #include <stdio.h>
    #include <stdlib.h>
#include "CL/opencl.h"
#include "AOCLUtils/aocl_utils.h"
#include <string.h>
#include "fft_config.h"


#define N (1<<LOGN) //Please check the FFT Sample Code for Ref (2 to the power 12 gives 4K points)
#define DATA_FILE "complex_input.data"

using namespace aocl_utils;


cl_platform_id platform = NULL;
cl_device_id device = NULL;
cl_command_queue queue0 = NULL;
cl_command_queue queue1 = NULL;
cl_context context = NULL;
cl_program program = NULL;
cl_kernel kernel0, kernel1;
cl_mem d_inData, d_outData;
cl_int err = 0;

typedef struct {
  float x;
  float y;
} float2;
//float2 h_outData[N], h_inData[N];
float2 *h_inData = (float2 *)alignedMalloc(sizeof(float2)*N);
float2 *h_outData = (float2 *)alignedMalloc(sizeof(float2)*N);

void init();    //Function that does the job of Querying Platform and Device, creating Context, Command Queues, Program and required Kernels to do the job.
void cleanup(); //Function that releases all the Created Contexts, Buffers etc, in order to finish the execution.
void read_data();   //Reads data from the complex numbers from .data file and fills in the float2 struct h_inData[].
int temp_value = 1;

int main()
{
 // h_inData = (float2 *)alignedMalloc(sizeof(float2)*N);
  //h_outData = (float2 *)alignedMalloc(sizeof(float2)*N);

  int inverse = false;
  int temp =1;
  init();
  read_data();

  d_inData = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float2)*N, NULL, &err);
  checkError(err,"Failed to allocate Buffer for input array\n");

  d_outData = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_CHANNEL_2_INTELFPGA, sizeof(float2)*N, NULL, &err);
  checkError(err, "Failed to allocate the Buffer for output\n");


  //WE FINISH THE FETCH KERNEL
  err = clEnqueueWriteBuffer(queue1,d_inData, CL_TRUE, 0, sizeof(float2)*N, h_inData, 0, NULL, NULL);
  checkError(err,"Failed to Write the input Buffer\n");

  err = clSetKernelArg(kernel1, 0, sizeof(cl_mem), (void *)&d_inData);
  checkError(err, "Failed to set KerArg for Kernel1 - 0\n");

  err = clSetKernelArg(kernel0, 0, sizeof(cl_mem), (void *)&d_outData);
  checkError(err, "Failed to set KerArg for Kernel0 - 0\n");

  err = clSetKernelArg(kernel0, 1, sizeof(cl_int), (void *)&temp_value);
  checkError(err, "Failed to set KerArg for Kernel0 - 1\n");

  err = clSetKernelArg(kernel0, 2, sizeof(cl_int), (void *)&inverse);
  checkError(err, "Failed to set KerArg for Kernel0 - 2\n");

  printf("FFT Initialization Complete!\n\n");

  err = clEnqueueTask(queue0, kernel0, 0, NULL, NULL);
  checkError(err, "Failed to Launch the Kernel for FFT\n");

  size_t local_work_size = N/8;
  size_t global_work_size = local_work_size * 1; //Coz the number of Iterations is just 1

  err = clEnqueueNDRangeKernel(queue1, kernel1, 1, NULL, &local_work_size, &global_work_size, 0, NULL, NULL);
  checkError(err, "Failed to launch the Fetch Kernel\n");

  err = clFinish(queue0);
  checkError(err, "Failed to finish FFT\n");
  err = clFinish(queue1);
  checkError(err, "Failed to finish Fetch kernel\n");

  err = clEnqueueReadBuffer(queue0, d_outData, CL_TRUE, 0, sizeof(float2)*N, h_outData, 0, NULL, NULL);
  checkError(err, "Failed to Read back the Buffer output\n");


  printf("FFT is Complete!\n\n"); 
  printf("Printing some of the values, just to make sure they are non-zero\n\n");
  for(int ii=100;ii<125;ii++)
  {
        printf("%f + %f j -> %f + %f j\n",h_inData[ii].x,h_inData[ii].y,h_outData[ii].x,h_outData[ii].y);
  }
  printf("\n\n");
  cleanup();

  return 0;
}



void read_data()
{
  size_t sourceSize;
  float* temp;

  FILE *fp = fopen(DATA_FILE,"r");
  if(fp==NULL)
  {
    printf("Could not find the Random Data File! Exiting!\n");
    exit(1);
  }
  fseek(fp,0,SEEK_END);
  sourceSize=ftell(fp);
  rewind(fp);

  temp = (float *)alignedMalloc(sourceSize);
  fread(temp, sizeof(float),sourceSize,fp);
  fclose(fp);

  for(int i=0;i<N;i++)
  {
    h_inData[i].x = temp[2*i];
    h_inData[i].y = temp[(2*i)+1];
  }
}


void init()
{
  platform = findPlatform("Intel(R) FPGA SDK for OpenCL(TM)");
  if(platform == NULL)
  {
    printf("Could not find the Platform\n");
    exit(1);
  }


  scoped_array<cl_device_id> devices;
  cl_uint num_devices;
  devices.reset(getDevices(platform, CL_DEVICE_TYPE_ACCELERATOR, &num_devices));
  device = devices[0];

  context = clCreateContext(NULL, 1, &device, &oclContextCallback, NULL, &err);
  checkError(err, "Failed to create Context\n");

  queue0 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
  checkError(err, "Failed to create Command Queue0\n");
  queue1 = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err);
  checkError(err, "Failed to create Command Queue1\n");


  program = createProgramFromBinary(context, "bin/fft1d.aocx", &device, 1);

  err = clBuildProgram(program, 1, &device, "", NULL, NULL);
  checkError(err, "Failed to Build Program\n");


  kernel0 = clCreateKernel(program, "fft1d", &err);
  checkError(err,"Could not Create Kernel0\n");

  kernel1 = clCreateKernel(program, "fetch", &err);
  checkError(err, "Could not Create Kernel1\n");

  printf("Finished with the Initial Setup!\n");
}

void cleanup()
{
  if(kernel0)
    clReleaseKernel(kernel0);
  if(kernel1)
    clReleaseKernel(kernel1);
  if(program)
    clReleaseProgram(program);
  if(queue0)
    clReleaseCommandQueue(queue0);
  if(queue1)
    clReleaseCommandQueue(queue1);
  if(d_inData)
    clReleaseMemObject(d_inData);
  if(d_outData)
    clReleaseMemObject(d_outData);
  if(context)
    clReleaseContext(context);


}

I checked if the data from file is being read fine, and It is correct and as expected.

Please let me know where could this go wrong!

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

夏末染殇 2025-01-22 18:07:04

更新!

我找到了解决方案。在这里,从本身读取并不是一个好主意。我尝试在执行期间生成随机数,效果很好!

Update!

I found out the solution. Reading from the itself was not a good idea, here. I tried generating the random there during the execution and it worked just fine!

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文