Does vfmaq_f32 really have higher running accuracy? I guess the accuracy of vfmaq_f32 varies depending on the length of the bit extension of the floating point processing unit in different architectures,on macos arm64,the result of running the code is consistent. Can higher or lower precision results be obtained on other architectures? Or are there compilation options that can control the accuracy of the results?
#include<arm_neon.h>
#include<iostream>
using namespace std;
int main(){
float a = 12.3839467819;
float b = 21.437678904;
float c = 4171.42144;
printf("%.17f\n",a);
printf("%.17f\n",b);
printf("%.17f\n",c);
printf("%.17f\n",a+b*c);
float32x4_t a_reg = vdupq_n_f32(a);
float32x4_t b_reg = vdupq_n_f32(b);
float32x4_t c_reg = vdupq_n_f32(c);
float32x4_t res_reg = vfmaq_f32(a_reg, b_reg, c_reg);
float res[4] = {0.f};
vst1q_f32(res,res_reg);
printf("%.17f\n",res[0]);
res_reg = vmlaq_f32(a_reg, b_reg, c_reg);
vst1q_f32(res,res_reg);
printf("%.17f\n",res[0]);
res_reg = vmulq_f32(b_reg, c_reg);
res_reg = vaddq_f32(res_reg, a_reg);
vst1q_f32(res,res_reg);
printf("%.17f\n",res[0]);
return 0;
}
#include <iostream>
andusing namespace std
, this is definitely no C source code (or an essential syntax error). Btw. as you are new, it couldn't hurt to take the tour and read How to Ask. – Meagre