OK, well, that was interesting. I thought I'd write some actual code to see what the speed was. And here it is. Compiled using C++ DevStudio 2010 Express. There's quite a bit of code here. It times 5 ways of writing the data:-
- Naively calling fwrite
- Using a buffer and doing fewer calls to fwrite using bigger buffers
- Using the Win32 API naively
- Using a buffer and doing fewer calls to Win32 using bigger buffers
- Using Win32 but double buffering the output and using asynchronous writes
Please check that I've not done something a bit stupid with any of the above.
The program uses QueryPerformanceCounter for timing the code and ends the timing after the file has been closed to try and include any pending internal buffered data.
The results on my machine (an old WinXP SP3 box):-
- fwrite on its own is generally the fastest although the buffered version can sometimes beat it if you get the size and iterations just right.
- Naive Win32 is significantly slower
- Buffered Win32 doubles the speed but it is still easily beaten by fwrite
- Asynchronous writes were not significantly better than the buffered version. Perhaps someone could check my code and make sure I've not done something stupid as I've never really used the asynchronous IO before.
You may get different results depending on your setup.
Feel free to edit and improve the code.
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <memory.h>
#include <Windows.h>
const int
// how many times fwrite/my_fwrite is called
c_iterations = 10000000,
// the size of the buffer used by my_fwrite
c_buffer_size = 100000;
char
buffer1 [c_buffer_size],
buffer2 [c_buffer_size],
*current_buffer = buffer1;
int
write_ptr = 0;
__int64
write_offset = 0;
OVERLAPPED
overlapped = {0};
// write to a buffer, when buffer full, write the buffer to the file using fwrite
void my_fwrite (void *ptr, int size, int count, FILE *fp)
{
const int
c = size * count;
if (write_ptr + c > c_buffer_size)
{
fwrite (buffer1, write_ptr, 1, fp);
write_ptr = 0;
}
memcpy (&buffer1 [write_ptr], ptr, c);
write_ptr += c;
}
// write to a buffer, when buffer full, write the buffer to the file using Win32 WriteFile
void my_fwrite (void *ptr, int size, int count, HANDLE fp)
{
const int
c = size * count;
if (write_ptr + c > c_buffer_size)
{
DWORD
written;
WriteFile (fp, buffer1, write_ptr, &written, 0);
write_ptr = 0;
}
memcpy (&buffer1 [write_ptr], ptr, c);
write_ptr += c;
}
// write to a double buffer, when buffer full, write the buffer to the file using
// asynchronous WriteFile (waiting for previous write to complete)
void my_fwrite (void *ptr, int size, int count, HANDLE fp, HANDLE wait)
{
const int
c = size * count;
if (write_ptr + c > c_buffer_size)
{
WaitForSingleObject (wait, INFINITE);
overlapped.Offset = write_offset & 0xffffffff;
overlapped.OffsetHigh = write_offset >> 32;
overlapped.hEvent = wait;
WriteFile (fp, current_buffer, write_ptr, 0, &overlapped);
write_offset += write_ptr;
write_ptr = 0;
current_buffer = current_buffer == buffer1 ? buffer2 : buffer1;
}
memcpy (current_buffer + write_ptr, ptr, c);
write_ptr += c;
}
int main ()
{
// do lots of little writes
FILE
*f1 = fopen ("f1.bin", "wb");
LARGE_INTEGER
f1_start,
f1_end;
QueryPerformanceCounter (&f1_start);
for (int i = 0 ; i < c_iterations ; ++i)
{
fwrite (&i, sizeof i, 1, f1);
}
fclose (f1);
QueryPerformanceCounter (&f1_end);
// do a few big writes
FILE
*f2 = fopen ("f2.bin", "wb");
LARGE_INTEGER
f2_start,
f2_end;
QueryPerformanceCounter (&f2_start);
for (int i = 0 ; i < c_iterations ; ++i)
{
my_fwrite (&i, sizeof i, 1, f2);
}
if (write_ptr)
{
fwrite (buffer1, write_ptr, 1, f2);
write_ptr = 0;
}
fclose (f2);
QueryPerformanceCounter (&f2_end);
// use Win32 API, without buffer
HANDLE
f3 = CreateFile (TEXT ("f3.bin"), GENERIC_WRITE, 0, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);
LARGE_INTEGER
f3_start,
f3_end;
QueryPerformanceCounter (&f3_start);
for (int i = 0 ; i < c_iterations ; ++i)
{
DWORD
written;
WriteFile (f3, &i, sizeof i, &written, 0);
}
CloseHandle (f3);
QueryPerformanceCounter (&f3_end);
// use Win32 API, with buffer
HANDLE
f4 = CreateFile (TEXT ("f4.bin"), GENERIC_WRITE, 0, 0, CREATE_ALWAYS, FILE_FLAG_WRITE_THROUGH, 0);
LARGE_INTEGER
f4_start,
f4_end;
QueryPerformanceCounter (&f4_start);
for (int i = 0 ; i < c_iterations ; ++i)
{
my_fwrite (&i, sizeof i, 1, f4);
}
if (write_ptr)
{
DWORD
written;
WriteFile (f4, buffer1, write_ptr, &written, 0);
write_ptr = 0;
}
CloseHandle (f4);
QueryPerformanceCounter (&f4_end);
// use Win32 API, with double buffering
HANDLE
f5 = CreateFile (TEXT ("f5.bin"), GENERIC_WRITE, 0, 0, CREATE_ALWAYS, FILE_FLAG_OVERLAPPED | FILE_FLAG_WRITE_THROUGH, 0),
wait = CreateEvent (0, false, true, 0);
LARGE_INTEGER
f5_start,
f5_end;
QueryPerformanceCounter (&f5_start);
for (int i = 0 ; i < c_iterations ; ++i)
{
my_fwrite (&i, sizeof i, 1, f5, wait);
}
if (write_ptr)
{
WaitForSingleObject (wait, INFINITE);
overlapped.Offset = write_offset & 0xffffffff;
overlapped.OffsetHigh = write_offset >> 32;
overlapped.hEvent = wait;
WriteFile (f5, current_buffer, write_ptr, 0, &overlapped);
WaitForSingleObject (wait, INFINITE);
write_ptr = 0;
}
CloseHandle (f5);
QueryPerformanceCounter (&f5_end);
CloseHandle (wait);
LARGE_INTEGER
freq;
QueryPerformanceFrequency (&freq);
printf (" fwrites without buffering = %dms\n", (1000 * (f1_end.QuadPart - f1_start.QuadPart)) / freq.QuadPart);
printf (" fwrites with buffering = %dms\n", (1000 * (f2_end.QuadPart - f2_start.QuadPart)) / freq.QuadPart);
printf (" Win32 without buffering = %dms\n", (1000 * (f3_end.QuadPart - f3_start.QuadPart)) / freq.QuadPart);
printf (" Win32 with buffering = %dms\n", (1000 * (f4_end.QuadPart - f4_start.QuadPart)) / freq.QuadPart);
printf ("Win32 with double buffering = %dms\n", (1000 * (f5_end.QuadPart - f5_start.QuadPart)) / freq.QuadPart);
}
mycmd > file
. Is there a point that I'm missing? – Calderonfwrite
causes a system call? – Fibril>
only causes the shell to open the file and pass the file descriptor to the process. It doesn't cause all I/O to pass through the shell, thank heavens. – Fibril