From information on ensuring data is on disk (http://winntfs.com/2012/11/29/windows-write-caching-part-2-an-overview-for-application-developers/), even in the case of e.g. a power outage, it appears that on Windows platforms you need to rely on its "fsync" version FlushFileBuffers
to have the best guarantee that buffers are actually flushed from disk device caches onto the storage medium itself. The combination of FILE_FLAG_NO_BUFFERING
with FILE_FLAG_WRITE_THROUGH
does not ensure flushing the device cache, but merely have an effect on the file system cache, if this information is correct.
Given the fact that I will be working with rather large files, that need to be updated "transactionally", this means performing an "fsync" at the end of a transaction commit. So I created a tiny app to test the performance in doing so. It basically performs sequential writes of a batch of 8 memory page sized random bytes using 8 writes and then flushes. The batch is repeated in a loop, and after every so many written pages it records the performance. Additionally it has two configurable options: to fsync on a flush and whether to write a byte to the last position of the file, before beginning the page writes.
// Code updated to reflect new results as discussed in answer below.
// 26/Aug/2013: Code updated again to reflect results as discussed in follow up question.
// 28/Aug/2012: Increased file stream buffer to ensure 8 page flushes.
class Program
{
static void Main(string[] args)
{
BenchSequentialWrites(reuseExistingFile:false);
}
public static void BenchSequentialWrites(bool reuseExistingFile = false)
{
Tuple<string, bool, bool, bool, bool>[] scenarios = new Tuple<string, bool, bool, bool, bool>[]
{ // output csv, fsync?, fill end?, write through?, mem map?
Tuple.Create("timing FS-E-B-F.csv", true, false, false, false),
Tuple.Create("timing NS-E-B-F.csv", false, false, false, false),
Tuple.Create("timing FS-LB-B-F.csv", true, true, false, false),
Tuple.Create("timing NS-LB-B-F.csv", false, true, false, false),
Tuple.Create("timing FS-E-WT-F.csv", true, false, true, false),
Tuple.Create("timing NS-E-WT-F.csv", false, false, true, false),
Tuple.Create("timing FS-LB-WT-F.csv", true, true, true, false),
Tuple.Create("timing NS-LB-WT-F.csv", false, true, true, false),
Tuple.Create("timing FS-E-B-MM.csv", true, false, false, true),
Tuple.Create("timing NS-E-B-MM.csv", false, false, false, true),
Tuple.Create("timing FS-LB-B-MM.csv", true, true, false, true),
Tuple.Create("timing NS-LB-B-MM.csv", false, true, false, true),
Tuple.Create("timing FS-E-WT-MM.csv", true, false, true, true),
Tuple.Create("timing NS-E-WT-MM.csv", false, false, true, true),
Tuple.Create("timing FS-LB-WT-MM.csv", true, true, true, true),
Tuple.Create("timing NS-LB-WT-MM.csv", false, true, true, true),
};
foreach (var scenario in scenarios)
{
Console.WriteLine("{0,-12} {1,-16} {2,-16} {3,-16} {4:F2}", "Total pages", "Interval pages", "Total time", "Interval time", "MB/s");
CollectGarbage();
var timingResults = SequentialWriteTest("test.data", !reuseExistingFile, fillEnd: scenario.Item3, nPages: 200 * 1000, fSync: scenario.Item2, writeThrough: scenario.Item4, writeToMemMap: scenario.Item5);
using (var report = File.CreateText(scenario.Item1))
{
report.WriteLine("Total pages,Interval pages,Total bytes,Interval bytes,Total time,Interval time,MB/s");
foreach (var entry in timingResults)
{
Console.WriteLine("{0,-12} {1,-16} {2,-16} {3,-16} {4:F2}", entry.Item1, entry.Item2, entry.Item5, entry.Item6, entry.Item7);
report.WriteLine("{0},{1},{2},{3},{4},{5},{6}", entry.Item1, entry.Item2, entry.Item3, entry.Item4, entry.Item5.TotalSeconds, entry.Item6.TotalSeconds, entry.Item7);
}
}
}
}
public unsafe static IEnumerable<Tuple<long, long, long, long, TimeSpan, TimeSpan, double>> SequentialWriteTest(
string fileName,
bool createNewFile,
bool fillEnd,
long nPages,
bool fSync = true,
bool writeThrough = false,
bool writeToMemMap = false,
long pageSize = 4096)
{
// create or open file and if requested fill in its last byte.
var fileMode = createNewFile ? FileMode.Create : FileMode.OpenOrCreate;
using (var tmpFile = new FileStream(fileName, fileMode, FileAccess.ReadWrite, FileShare.ReadWrite, (int)pageSize))
{
Console.WriteLine("Opening temp file with mode {0}{1}", fileMode, fillEnd ? " and writing last byte." : ".");
tmpFile.SetLength(nPages * pageSize);
if (fillEnd)
{
tmpFile.Position = tmpFile.Length - 1;
tmpFile.WriteByte(1);
tmpFile.Position = 0;
tmpFile.Flush(true);
}
}
// Make sure any flushing / activity has completed
System.Threading.Thread.Sleep(TimeSpan.FromMinutes(1));
System.Threading.Thread.SpinWait(50); // warm up.
var buf = new byte[pageSize];
new Random().NextBytes(buf);
var ms = new System.IO.MemoryStream(buf);
var stopwatch = new System.Diagnostics.Stopwatch();
var timings = new List<Tuple<long, long, long, long, TimeSpan, TimeSpan, double>>();
var pageTimingInterval = 8 * 2000;
var prevPages = 0L;
var prevElapsed = TimeSpan.FromMilliseconds(0);
// Open file
const FileOptions NoBuffering = ((FileOptions)0x20000000);
var options = writeThrough ? (FileOptions.WriteThrough | NoBuffering) : FileOptions.None;
using (var file = new FileStream(fileName, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite, (int)(16 *pageSize), options))
{
stopwatch.Start();
if (writeToMemMap)
{
// write pages through memory map.
using (var mmf = MemoryMappedFile.CreateFromFile(file, Guid.NewGuid().ToString(), file.Length, MemoryMappedFileAccess.ReadWrite, null, HandleInheritability.None, true))
using (var accessor = mmf.CreateViewAccessor(0, file.Length, MemoryMappedFileAccess.ReadWrite))
{
byte* base_ptr = null;
accessor.SafeMemoryMappedViewHandle.AcquirePointer(ref base_ptr);
var offset = 0L;
for (long i = 0; i < nPages / 8; i++)
{
using (var memStream = new UnmanagedMemoryStream(base_ptr + offset, 8 * pageSize, 8 * pageSize, FileAccess.ReadWrite))
{
for (int j = 0; j < 8; j++)
{
ms.CopyTo(memStream);
ms.Position = 0;
}
}
FlushViewOfFile((IntPtr)(base_ptr + offset), (int)(8 * pageSize));
offset += 8 * pageSize;
if (fSync)
FlushFileBuffers(file.SafeFileHandle);
if (((i + 1) * 8) % pageTimingInterval == 0)
timings.Add(Report(stopwatch.Elapsed, ref prevElapsed, (i + 1) * 8, ref prevPages, pageSize));
}
accessor.SafeMemoryMappedViewHandle.ReleasePointer();
}
}
else
{
for (long i = 0; i < nPages / 8; i++)
{
for (int j = 0; j < 8; j++)
{
ms.CopyTo(file);
ms.Position = 0;
}
file.Flush(fSync);
if (((i + 1) * 8) % pageTimingInterval == 0)
timings.Add(Report(stopwatch.Elapsed, ref prevElapsed, (i + 1) * 8, ref prevPages, pageSize));
}
}
}
timings.Add(Report(stopwatch.Elapsed, ref prevElapsed, nPages, ref prevPages, pageSize));
return timings;
}
private static Tuple<long, long, long, long, TimeSpan, TimeSpan, double> Report(TimeSpan elapsed, ref TimeSpan prevElapsed, long curPages, ref long prevPages, long pageSize)
{
var intervalPages = curPages - prevPages;
var intervalElapsed = elapsed - prevElapsed;
var intervalPageSize = intervalPages * pageSize;
var mbps = (intervalPageSize / (1024.0 * 1024.0)) / intervalElapsed.TotalSeconds;
prevElapsed = elapsed;
prevPages = curPages;
return Tuple.Create(curPages, intervalPages, curPages * pageSize, intervalPageSize, elapsed, intervalElapsed, mbps);
}
private static void CollectGarbage()
{
GC.Collect();
GC.WaitForPendingFinalizers();
System.Threading.Thread.Sleep(200);
GC.Collect();
GC.WaitForPendingFinalizers();
System.Threading.Thread.SpinWait(10);
}
[DllImport("kernel32.dll", SetLastError = true)]
static extern bool FlushViewOfFile(
IntPtr lpBaseAddress, int dwNumBytesToFlush);
[DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Auto)]
static extern bool FlushFileBuffers(SafeFileHandle hFile);
}
The performance results I am obtaining (64 bit Win 7, slow spindle disk) are not very encouraging. It appears that "fsync" performance depends very much on the size of the file being flushed, such that this dominates the time, and not the amount of "dirty" data to be flushed. The graph below shows the results for the 4 different settings options of the little benchmark app.
As you can see, performance of "fsync" exponentially decreases as the file grows (until at a few GB it really grinds to a halt). Also, the disk itself does not seem to be doing a whole lot (i.e. resource monitor shows its active time as just around a few percent, and its disk queue mostly empty for most of the time).
I had obviously expected "fsync" performance to be quite a bit worse than doing normal buffered flushes, but I had expected it to be more or less constant and independent of file size. Like this it would seem to suggest that it is not usable in combination with a single large file.
Does somebody have an explanation, different experiences or a different solution that allows ensuring data is on disk and that has a more or less constant, predictable performance?
UPDATED See new information in answer below.