I have created a GPU based indicator for MetaTrader Terminal platform, using OpenCL and MQL5.
I have tried hard that my [ MetaTrader Terminal: Strategy Tester ] optimization job must get transferred on GPU to maximum. Most of the calculations are done by the indicator. Hence, I made changes in the indicator and has completely transferred on GPU.
But the real issue arises when I try to go for optimization process in the strategy tester section.
The process I see uses both my GPU and CPU but there is no effect on the complete process.
I suspect that the process is not getting distributed to each GPU core for processing, instead all the GPU cores are working on the same process or function for execution.
Kindly, let me know what I need to do to get the single GPU work for on single function execution to give faster output.
Here is my code link attached: Complete code with Expert
The kernel of my code is :
__kernel void calSMA(
int limit,
int rates_total,
__global double *price,
__global double *ExtLineBuffer,
int InpMAPeriod
)
{
int count = 0;
int len = get_global_id(2);
for(int i=limit;i<rates_total;i++)
ExtLineBuffer[len+i] = ExtLineBuffer[len+ i-1]+(price[len+i]-price[len+i-InpMAPeriod])/InpMAPeriod;
}
__kernel void calcSMALoop(int begin, int limit, __global double *price, __global double *firstValue, int InpMAPeriod)
{
int i, len = get_global_id(2);
for(i=begin;i<limit;i++)
firstValue[len]+=price[i];
firstValue[len]/=InpMAPeriod;
}
__kernel void calcEMA(int begin, int limit, __global double *price, __global double *ExtLineBuffer, double SmoothFactor)
{
int len = get_global_id(2);
for(int i=begin;i<limit;i++)
ExtLineBuffer[len + i]=price[len + i]*SmoothFactor+ExtLineBuffer[len + i-1]*(1.0-SmoothFactor);
}
__kernel void calcSSMA(int limit, int rates_total, __global double *price, __global double *ExtLineBuffer, int InpMAPeriod)
{
int len = get_global_id(2);
for(int i=limit;i<rates_total;i++)
ExtLineBuffer[len + i]=(ExtLineBuffer[len + i-1]*(InpMAPeriod-1)+price[len + i])/InpMAPeriod;
}
__kernel void calcLWMALoop(int begin, int limit, __global double *price, __global double *firstValue, int weightsum, __global int *weightreturn)
{
weightsum = 0;
int len = get_global_id(2);
for(int i=begin;i<limit;i++)
{
weightsum+=(i-begin+1);
firstValue[len]+=(i-begin+1)*price[i];
}
firstValue[len]/=(double)weightsum;
weightreturn[0] = weightsum;
}
//__global int counter = 0;
double returnCalculation(int InpMAPeriod, double price, int j)
{
return ((InpMAPeriod-j)*price);
}
__kernel void calcLWMA(int limit, int rates_total, __global double *price, __global double *ExtLineBuffer, int InpMAPeriod, int weightsum)
{
int len = get_global_id(2);
for(int i=limit;i<rates_total;i++)
{
double sum = 0;
for(int j=0;j<InpMAPeriod;j++) sum+=returnCalculation(InpMAPeriod,price[len + i-j],j);
ExtLineBuffer[len + i]=sum/weightsum;
}
}
Please suggest me the way out for distributing the function with different values or frames in MQL5 using GPU on OpenCL.
EDITED
Its a great challenge for the challenge seekers... Even I am eager to know whether there can be anything done with OpenCL and MQL5 for optimization task. I hope I will get answers for what I am seeking.
EDITED AGAIN the MAGPU.mqh
file
#include "CHECKMA.mq5"
#define CUDA_CORE 2
int Execute_SMA(
const double &price[],
int rates_total,
int limit
)
{
int cl_mem = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE),
cl_price = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE);
Check_Memory_Initialization(cl_mem, cl_price, cl_CommonKernel1, "Execute_SMA function error");
if(!CLSetKernelArgMem(cl_CommonKernel1,2,cl_price))
Print("Input Bufer Not Set");
//else Print("Input Buffer Set");
if(!CLSetKernelArgMem(cl_CommonKernel1,3,cl_mem))
Print("Output Bufer Not Set");
//else Print("Output Buffer Set");
if(!CLBufferWrite(cl_price, price))
Print("Could not copy Input buffer");
//else Print("Copied: ",cl_price);
if(!CLBufferWrite(cl_mem, ExtLineBuffer))
Print("Could not copy Input buffer");
//else Print("Copied: ",cl_mem);
//else Print("Input Buffer Copied");
if(!CLSetKernelArg(cl_CommonKernel1,0,limit))
Print("Could Not Set Arg 0");
//else Print("Set Arg 0");
if(!CLSetKernelArg(cl_CommonKernel1,1,rates_total))
Print("Could Not Set Arg 1");
//else Print("Set Arg 1");
//if(!CLSetKernelArg(cl_CommonKernel1,4,previous_value))
//Print("Could Not Set Arg2");
//else Print("Set Arg 2");
if(!CLSetKernelArg(cl_CommonKernel1,4,InpMAPeriod))
Print("Could Not Set Arg3: ",GetLastError());
//Print(CLGetInfoInteger(cl_ctx,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS));
if(!CLExecute(cl_CommonKernel1,CUDA_CORE,offset,work))
Print("Kernel not executed",GetLastError());
//else Print("Executing Now!");
//if(CLExecutionStatus(cl_krn) == 0) Print("Completed");
//if(CLExecutionStatus(cl_krn) == 1) Print("CL_RUNNING");
//if(CLExecutionStatus(cl_krn) == 2) Print("CL_SUBMITTED");
//if(CLExecutionStatus(cl_krn) == 3) Print("CL_QUEUED");
//if(CLExecutionStatus(cl_krn) == -1)Print("Error Occurred:", GetLastError());
//if(!CLExecutionStatus(cl_krn))
//Print(CLExecutionStatus(cl_krn));
if(!CLBufferRead(cl_mem,ExtLineBuffer))
Print("Buffer Copy Nothing: ", GetLastError());
CLBufferFree(cl_price);
CLBufferFree(cl_mem);
return(1);
}
double ExecuteLoop(
int begin,
int limit,
const double &price[]
)
{
int cl_mem = CLBufferCreate(cl_ctx,sizeof(double),CL_MEM_READ_WRITE),
cl_price = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE);
double temp[];
ArrayResize(temp,1);
temp[0] = 0;
Check_Memory_Initialization(cl_mem, cl_price, cl_CommonKernel2, "ExecuteLoop function error");
if(!CLSetKernelArgMem(cl_CommonKernel2,2,cl_price))
Print("Input Bufer Not Set 2");
if(!CLSetKernelArgMem(cl_CommonKernel2,3,cl_mem))
Print("Output Bufer Not Set 2");
if(!CLBufferWrite(cl_price, price))
Print("Could not copy Input buffer 2");
if(!CLSetKernelArg(cl_CommonKernel2,0,begin))
Print("Could Not Set Arg 0");
if(!CLSetKernelArg(cl_CommonKernel2,1,limit))
Print("Could Not Set Arg 1");
if(!CLSetKernelArg(cl_CommonKernel2,4,InpMAPeriod))
Print("Could Not Set Arg3: ",GetLastError());
if(!CLExecute(cl_CommonKernel2,CUDA_CORE,offset,work))
Print("Kernel not executed",GetLastError());
if(!CLBufferRead(cl_mem,temp))
Print("Buffer Copy Nothing: ", GetLastError());
CLBufferFree(cl_price);
CLBufferFree(cl_mem);
return(temp[0]);
}
int ExecuteEMA(int begin, int limit, const double &price[], double SmoothFactor)
{
int cl_mem = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE),
cl_price = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE);
Check_Memory_Initialization(cl_mem, cl_price, cl_CommonKernel1, "ExecuteEMA function error");
if(!CLSetKernelArgMem(cl_CommonKernel1,2,cl_price))
Print("Input Bufer Not Set");
if(!CLSetKernelArgMem(cl_CommonKernel1,3,cl_mem))
Print("Output Bufer Not Set");
if(!CLBufferWrite(cl_price, price))
Print("Could not copy Input buffer");
if(!CLBufferWrite(cl_mem, ExtLineBuffer))
Print("Could not copy Input buffer");
if(!CLSetKernelArg(cl_CommonKernel1,0,begin))
Print("Could Not Set Arg 0");
if(!CLSetKernelArg(cl_CommonKernel1,1,limit))
Print("Could Not Set Arg 1");
if(!CLSetKernelArg(cl_CommonKernel1,4,SmoothFactor))
Print("Could Not Set Arg3: ",GetLastError());
if(!CLExecute(cl_CommonKernel1,CUDA_CORE,offset,work))
Print("Kernel not executed",GetLastError());
if(!CLBufferRead(cl_mem,ExtLineBuffer))
Print("Buffer Copy Nothing: ", GetLastError());
CLBufferFree(cl_price);
CLBufferFree(cl_mem);
return(1);
}
int Execute_SSMA(
const double &price[],
int rates_total,
int limit
)
{
int cl_mem = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE),
cl_price = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE);
Check_Memory_Initialization(cl_mem, cl_price, cl_CommonKernel1, "Execute_SSMA function error");
if(!CLSetKernelArgMem(cl_CommonKernel1,2,cl_price))
Print("Input Bufer Not Set");
if(!CLSetKernelArgMem(cl_CommonKernel1,3,cl_mem))
Print("Output Bufer Not Set");
if(!CLBufferWrite(cl_price, price))
Print("Could not copy Input buffer");
if(!CLBufferWrite(cl_mem, ExtLineBuffer))
Print("Could not copy Input buffer");
//
//else Print("Input Buffer Copied");
if(!CLSetKernelArg(cl_CommonKernel1,0,limit))
Print("Could Not Set Arg 0");
if(!CLSetKernelArg(cl_CommonKernel1,1,rates_total))
Print("Could Not Set Arg 1");
if(!CLSetKernelArg(cl_CommonKernel1,4,InpMAPeriod))
Print("Could Not Set Arg3: ",GetLastError());
if(!CLExecute(cl_CommonKernel1,CUDA_CORE,offset,work))
Print("Kernel not executed",GetLastError());
if(!CLBufferRead(cl_mem,ExtLineBuffer))
Print("Buffer Copy Nothing: ", GetLastError());
CLBufferFree(cl_price);
CLBufferFree(cl_mem);
return(1);
}
double ExecuteLWMALoop(
int begin,
int limit,
const double &price[],
int weightsumlocal
)
{
int cl_mem = CLBufferCreate(cl_ctx,sizeof(double),CL_MEM_READ_WRITE),
cl_price = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE),
cl_weightsumlocal = CLBufferCreate(cl_ctx,sizeof(int),CL_MEM_READ_WRITE);
double temp[];
int weight[];
ArrayResize(temp,1);
ArrayResize(weight,1);
weight[0] = 0;
temp[0] = 0;
Check_Memory_Initialization(cl_mem, cl_price, cl_CommonKernel2, "ExecuteLWMALoop function error");
if(!CLSetKernelArgMem(cl_CommonKernel2,2,cl_price))
Print("Input Bufer Not Set 2");
if(!CLSetKernelArgMem(cl_CommonKernel2,3,cl_mem))
Print("Output Bufer Not Set 2");
if(!CLSetKernelArgMem(cl_CommonKernel2,5,cl_weightsumlocal))
Print("Output Bufer Not Set 2");
if(!CLBufferWrite(cl_price, price))
Print("Could not copy Input buffer 2");
if(!CLSetKernelArg(cl_CommonKernel2,0,begin))
Print("Could Not Set Arg 0");
if(!CLSetKernelArg(cl_CommonKernel2,1,limit))
Print("Could Not Set Arg 1");
if(!CLSetKernelArg(cl_CommonKernel2,4,weightsumlocal))
Print("Could Not Set Arg3: ",GetLastError());
if(!CLExecute(cl_CommonKernel2,CUDA_CORE,offset,work))
Print("Kernel not executed",GetLastError());
if(!CLBufferRead(cl_mem,temp))
Print("Buffer Copy Nothing: ", GetLastError());
if(!CLBufferRead(cl_weightsumlocal,weight))
Print("Buffer Copy Nothing: ", GetLastError());
weightsum = weight[0];
CLBufferFree(cl_weightsumlocal);
CLBufferFree(cl_price);
CLBufferFree(cl_mem);
return(temp[0]);
}
int Execute_LWMA(const double &price[], int rates_total, int limit, int weightsum1)
{
int cl_mem = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE),
cl_price = CLBufferCreate(cl_ctx,ArraySize(price)*sizeof(double),CL_MEM_READ_WRITE);
Check_Memory_Initialization(cl_mem, cl_price, cl_CommonKernel1, "Execute_SSMA function error");
if(!CLSetKernelArgMem(cl_CommonKernel1,2,cl_price))
Print("Input Bufer Not Set");
if(!CLSetKernelArgMem(cl_CommonKernel1,3,cl_mem))
Print("Output Bufer Not Set");
if(!CLBufferWrite(cl_price, price))
Print("Could not copy Input buffer");
if(!CLBufferWrite(cl_mem, ExtLineBuffer))
Print("Could not copy Input buffer");
//else Print("Input Buffer Copied");
if(!CLSetKernelArg(cl_CommonKernel1,0,limit))
Print("Could Not Set Arg 0");
if(!CLSetKernelArg(cl_CommonKernel1,1,rates_total))
Print("Could Not Set Arg 1");
if(!CLSetKernelArg(cl_CommonKernel1,4,InpMAPeriod))
Print("Could Not Set Arg4: ",GetLastError());
if(!CLSetKernelArg(cl_CommonKernel1,5,weightsum1))
Print("Could Not Set Arg5: ",GetLastError());
if(!CLExecute(cl_CommonKernel1,CUDA_CORE,offset,work))
Print("Kernel not executed",GetLastError());
if(!CLBufferRead(cl_mem,ExtLineBuffer))
Print("Buffer Copy Nothing: ", GetLastError());
CLBufferFree(cl_price);
CLBufferFree(cl_mem);
return(1);
}
void checkKernel(int cl_kernel, string var_name)
{
if(cl_kernel==INVALID_HANDLE )
{
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
Print("OpenCL kernel create failed: ERR_OPENCL_INVALID_HANDLE ", var_name);
return;
}
if(cl_kernel==ERR_INVALID_PARAMETER )
{
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
Print("OpenCL kernel create failed: ERR_INVALID_PARAMETER ", var_name);
return;
}
if(cl_kernel==ERR_OPENCL_TOO_LONG_KERNEL_NAME )
{
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
Print("OpenCL kernel create failed: ERR_OPENCL_TOO_LONG_KERNEL_NAME ", var_name);
return;
}
if(cl_kernel==ERR_OPENCL_KERNEL_CREATE )
{
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
Print("OpenCL kernel create failed 1: ERR_OPENCL_KERNEL_CREATE ", var_name);
return;
}
}
int Check_Memory_Initialization(int cl_mem, int cl_price, int cl_ker, string name_process_call)
{
if(cl_mem==INVALID_HANDLE)
{
CLKernelFree(cl_ker);
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
Print("OpenCL buffer create failed: cl_mem INVALID_HANDLE: ", name_process_call);
return(0);
}
if(cl_mem==ERR_NOT_ENOUGH_MEMORY )
{
CLKernelFree(cl_ker);
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
Print("OpenCL buffer create failed: cl_mem ERR_NOT_ENOUGH_MEMORY: ", name_process_call);
return(0);
}
if(cl_mem==ERR_OPENCL_BUFFER_CREATE )
{
CLKernelFree(cl_ker);
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
Print("OpenCL buffer create failed: cl_mem ERR_OPENCL_BUFFER_CREATE: ", name_process_call);
return(0);
}
if(cl_price==INVALID_HANDLE)
{
CLKernelFree(cl_ker);
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
CLBufferFree(cl_mem);
Print("OpenCL buffer create failed: cl_price: ", name_process_call);
return(0);
}
if(cl_price==ERR_NOT_ENOUGH_MEMORY)
{
CLKernelFree(cl_ker);
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
CLBufferFree(cl_mem);
Print("OpenCL buffer create failed: cl_price ERR_NOT_ENOUGH_MEMORY: ", name_process_call);
return(0);
}
if(cl_price==ERR_OPENCL_BUFFER_CREATE)
{
CLKernelFree(cl_ker);
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
CLBufferFree(cl_mem);
Print("OpenCL buffer create failed: cl_price ERR_OPENCL_BUFFER_CREATE: ", name_process_call);
return(0);
}
return(1);
}
MAIN INDICATOR FILE CHECKMA.mq5
file
#resource "program_MA_GPU.cl" as string cl_program
#include "MAGPU.mqh"
#property indicator_chart_window
#property indicator_buffers 1
#property indicator_plots 1
#property indicator_type1 DRAW_LINE
#property indicator_color1 Red
input int InpMAPeriod=13; // Period
input int InpMAShift=0; // Shift
input ENUM_MA_METHOD InpMAMethod=MODE_SMA; // Method
//--- indicator buffers
double ExtLineBuffer[];
int offset[CUDA_CORE], work[CUDA_CORE];//={0,19,38,57,76,95,114,123};
string str;
int cl_ctx, cl_prg, cl_CommonKernel1, cl_CommonKernel2;
static int weightsum;
void CalculateSimpleMA(int rates_total,int prev_calculated,int begin,const double &price[])
{
int limit;
if(prev_calculated==0)
{
limit=InpMAPeriod+begin;
ArrayFill(ExtLineBuffer,0,limit-1,0.0);
ExtLineBuffer[limit-1]=ExecuteLoop(begin,limit,price);
}
else limit=prev_calculated-ArraySize(price)+InpMAPeriod+17;
Execute_SMA(price,rates_total,limit);
}
void CalculateEMA(int rates_total,int prev_calculated,int begin,const double &price[])
{
int limit;
double SmoothFactor=2.0/(1.0+InpMAPeriod);
if(prev_calculated==0)
{
limit=InpMAPeriod+begin;
ExtLineBuffer[begin]=price[begin];
ExecuteEMA(begin+1,limit,price,SmoothFactor);
}
else limit=prev_calculated;
ExecuteEMA(begin+99900,limit,price,SmoothFactor);
}
void CalculateLWMA(int rates_total,int prev_calculated,int begin,const double &price[])
{
int limit;
if(prev_calculated==0)
{
weightsum=0;
limit=InpMAPeriod+begin;
//--- set empty value for first limit bars
ArrayFill(ExtLineBuffer,0,limit,0.0);
//--- calculate first visible value
ExtLineBuffer[limit-1]=ExecuteLWMALoop(begin,limit,price,weightsum);
}
else limit=prev_calculated-ArraySize(price)+InpMAPeriod+17;
//--- main loop
Execute_LWMA(price,rates_total,limit,weightsum);
}
void CalculateSmoothedMA(int rates_total,int prev_calculated,int begin,const double &price[])
{
int limit;
//--- first calculation or number of bars was changed
if(prev_calculated==0)
{
limit=InpMAPeriod+begin;
//--- set empty value for first limit bars
ArrayFill(ExtLineBuffer,0,limit-1,0.0);
ExtLineBuffer[limit-1]=ExecuteLoop(begin,limit,price);
}
else limit=prev_calculated-ArraySize(price)+InpMAPeriod+17;
Execute_SSMA(price,rates_total,limit);
//---
}
void OnInit()
{
//--- indicator buffers mapping
SetIndexBuffer(0,ExtLineBuffer,INDICATOR_DATA);
//--- set accuracy
IndicatorSetInteger(INDICATOR_DIGITS,_Digits+1);
//--- sets first bar from what index will be drawn
PlotIndexSetInteger(0,PLOT_DRAW_BEGIN,InpMAPeriod);
//---- line shifts when drawing
PlotIndexSetInteger(0,PLOT_SHIFT,InpMAShift);
//--- name for DataWindow
//---- sets drawing line empty value--
PlotIndexSetDouble(0,PLOT_EMPTY_VALUE,0.0);
//---- initialization done
cl_ctx = CLContextCreate(CL_USE_GPU_ONLY);
cl_prg=CLProgramCreate(cl_ctx,cl_program,str);
if(cl_ctx==INVALID_HANDLE)
{
Print("OpenCL not found: ", GetLastError() );
return;
}
if(cl_prg==INVALID_HANDLE)
{
CLContextFree(cl_ctx);
Print("OpenCL program create failed: ", str);
return;
}
if(cl_prg==ERR_INVALID_PARAMETER )
{
CLContextFree(cl_ctx);
Print("OpenCL program create failed: ", str);
return;
}
if(cl_prg==ERR_NOT_ENOUGH_MEMORY )
{
CLContextFree(cl_ctx);
Print("OpenCL program create failed: ", str);
return;
}
if(cl_prg==ERR_OPENCL_PROGRAM_CREATE )
{
CLContextFree(cl_ctx);
Print("OpenCL program create failed: ", str);
return;
}
int c = 1;
ArrayFill(work,0,CUDA_CORE,c);
//ArrayInitialize(offset,0);
int enter = -c;
for (int i =0; i < CUDA_CORE; i++)
{
offset[i] = enter + c;
enter = offset[i];
}
switch(InpMAMethod)
{
case MODE_SMA : cl_CommonKernel1 = CLKernelCreate(cl_prg,"calSMA");
checkKernel(cl_CommonKernel1,"cl_CommonKernel1 SMA");
cl_CommonKernel2 = CLKernelCreate(cl_prg,"calcSMALoop");
checkKernel(cl_CommonKernel2,"cl_CommonKernel2 SMA");
break;
case MODE_EMA : cl_CommonKernel1 = CLKernelCreate(cl_prg,"calcEMA");
checkKernel(cl_CommonKernel1,"cl_CommonKernel1 EMA");
break;
case MODE_LWMA : cl_CommonKernel1 = CLKernelCreate(cl_prg,"calcLWMA");
checkKernel(cl_CommonKernel1,"cl_CommonKernel1 LWMA");
cl_CommonKernel2 = CLKernelCreate(cl_prg,"calcLWMALoop");
checkKernel(cl_CommonKernel2,"cl_CommonKernel2 LWMA");
break;
case MODE_SMMA : cl_CommonKernel1 = CLKernelCreate(cl_prg,"calcSSMA");
checkKernel(cl_CommonKernel1,"cl_CommonKernel1 SSMA");
cl_CommonKernel2 = CLKernelCreate(cl_prg,"calcSMALoop");
checkKernel(cl_CommonKernel2,"cl_CommonKernel2 SSMA");
break;
}
}
int OnCalculate(const int rates_total,
const int prev_calculated,
const int begin,
const double &price[])
{
if(rates_total<InpMAPeriod-1+begin)
return(0);
if(prev_calculated==0)
ArrayInitialize(ExtLineBuffer,0);
PlotIndexSetInteger(0,PLOT_DRAW_BEGIN,InpMAPeriod-1+begin);
switch(InpMAMethod)
{
case MODE_EMA: CalculateEMA(rates_total,prev_calculated,begin,price); break;
case MODE_LWMA: CalculateLWMA(rates_total,prev_calculated,begin,price); break;
case MODE_SMMA: CalculateSmoothedMA(rates_total,prev_calculated,begin,price); break;
case MODE_SMA: CalculateSimpleMA(rates_total,prev_calculated,begin,price); break;
}
//--- return value of prev_calculated for next call
return(rates_total);
}
void OnDeinit(const int reason)
{
CLKernelFree(cl_CommonKernel1);
CLKernelFree(cl_CommonKernel2);
CLProgramFree(cl_prg);
CLContextFree(cl_ctx);
}
OpenCL device #0: GPU NVIDIA Corporation GeForce GT 730 with OpenCL 1.2 (2 units, 901 MHz, 4096 Mb, version 391.35)
OpenCL device #1: GPU NVIDIA Corporation GeForce GT 730 with OpenCL 1.2 (2 units, 901 MHz, 4096 Mb, version 391.35)
OpenCL device #2: CPU Intel(R) Corporation Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz with OpenCL 2.1 (8 units, 4000 MHz, 16300 Mb, version 7.0.0.2567)
Isn't this meaning I have 2 work item per GPU device, right? – Gallicizeclinfo
command: gist.github.com/JafferWilson/668983f777fa41aa1a92ddf5e287e88f – Gallicize