As was mentioned in this Shared Memory Array Default Value question, shared memory is non-initialized, i.e. can contain any value.
#include <stdio.h>
#define BLOCK_SIZE 512
__global__ void scan(float *input, float *output, int len) {
__shared__ int data[BLOCK_SIZE];
// DEBUG
if (threadIdx.x == 0 && blockIdx.x == 0)
{
printf("Block Number: %d\n", blockIdx.x);
for (int i = 0; i < BLOCK_SIZE; ++i)
{
printf("DATA[%d] = %d\n", i, data[i]);
}
}
}
int main(int argc, char ** argv) {
dim3 block(BLOCK_SIZE, 1, 1);
dim3 grid(10, 1, 1);
scan<<<grid,block>>>(NULL, NULL, NULL);
cudaDeviceSynchronize();
return 0;
}
But why in this code it is not true and I'm constantly getting zeroed shared memory?
DATA[0] = 0
DATA[1] = 0
DATA[2] = 0
DATA[3] = 0
DATA[4] = 0
DATA[5] = 0
DATA[6] = 0
...
I tested with Release and Debug Mode: -O3 -arch=sm_20
, -O3 -arch=sm_30
and -arch=sm_30
. The result is always the same.