Here is a generalized approach of Andrew Hanlon's solution. It returns a TransformBlock
that supports posting messages recursively to itself, and completes automatically when there are no more messages to process.
The transform
lambda has three arguments instead of the usual one. The first argument is the item being processed. The second argument is the "path" of the processed message, which is a sequence IEnumerable<TInput>
containing its parent messages. The third argument is an Action<TInput>
that posts new messages to the block, as children of the current message.
/// <summary>Creates a dataflow block that supports posting messages to itself,
/// and knows when it has completed processing all messages.</summary>
public static IPropagatorBlock<TInput, TOutput>
CreateRecursiveTransformBlock<TInput, TOutput>(
Func<TInput, IEnumerable<TInput>, Action<TInput>, Task<TOutput>> transform,
ExecutionDataflowBlockOptions dataflowBlockOptions = null)
{
if (transform == null) throw new ArgumentNullException(nameof(transform));
dataflowBlockOptions = dataflowBlockOptions ?? new ExecutionDataflowBlockOptions();
int pendingCount = 1; // The initial 1 represents the completion of input1 block
var input1 = new TransformBlock<TInput, (TInput, IEnumerable<TInput>)>(item =>
{
Interlocked.Increment(ref pendingCount);
return (item, Enumerable.Empty<TInput>());
}, new ExecutionDataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken,
BoundedCapacity = dataflowBlockOptions.BoundedCapacity
});
var input2 = new BufferBlock<(TInput, IEnumerable<TInput>)>(new DataflowBlockOptions()
{
CancellationToken = dataflowBlockOptions.CancellationToken
// Unbounded capacity
});
var output = new TransformBlock<(TInput, IEnumerable<TInput>), TOutput>(async entry =>
{
try
{
var (item, path) = entry;
var postChildAction = CreatePostAction(item, path);
return await transform(item, path, postChildAction).ConfigureAwait(false);
}
finally
{
if (Interlocked.Decrement(ref pendingCount) == 0) input2.Complete();
}
}, dataflowBlockOptions);
Action<TInput> CreatePostAction(TInput parentItem, IEnumerable<TInput> parentPath)
{
return item =>
{
// The Post will be unsuccessful only in case of block failure
// or cancellation, so no specific action is needed here.
if (input2.Post((item, parentPath.Append(parentItem))))
{
Interlocked.Increment(ref pendingCount);
}
};
}
input1.LinkTo(output);
input2.LinkTo(output);
PropagateCompletion(input1, input2,
condition: () => Interlocked.Decrement(ref pendingCount) == 0);
PropagateCompletion(input2, output);
PropagateFailure(output, input1, input2); // Ensure that all blocks are faulted
return DataflowBlock.Encapsulate(input1, output);
async void PropagateCompletion(IDataflowBlock block1, IDataflowBlock block2,
Func<bool> condition = null)
{
try
{
await block1.Completion.ConfigureAwait(false);
}
catch { }
if (block1.Completion.Exception != null)
{
block2.Fault(block1.Completion.Exception.InnerException);
}
else
{
if (block1.Completion.IsCanceled) return; // On cancellation do nothing
if (condition == null || condition()) block2.Complete();
}
}
async void PropagateFailure(IDataflowBlock block1, IDataflowBlock block2,
IDataflowBlock block3)
{
try
{
await block1.Completion.ConfigureAwait(false);
}
catch (Exception ex)
{
if (block1.Completion.IsCanceled) return; // On cancellation do nothing
block2.Fault(ex); block3.Fault(ex);
}
}
}
// Overload with synchronous delegate
public static IPropagatorBlock<TInput, TOutput>
CreateRecursiveTransformBlock<TInput, TOutput>(
Func<TInput, IEnumerable<TInput>, Action<TInput>, TOutput> transform,
ExecutionDataflowBlockOptions dataflowBlockOptions = null)
{
return CreateRecursiveTransformBlock<TInput, TOutput>((item, path, postAction) =>
Task.FromResult(transform(item, path, postAction)), dataflowBlockOptions);
}
The resulting block consists internally of three blocks: two input blocks that receive messages, and one output block that processes the messages. The first input block receives messages from outside, and the second input block receives messages from inside. The second input block has unbounded capacity, so an infinite recursion will eventually result to an OutOfMemoryException
.
Usage example:
var fileCounter = CreateRecursiveTransformBlock<string, int>(
(folderPath, parentPaths, postChild) =>
{
var subfolders = Directory.EnumerateDirectories(folderPath);
foreach (var subfolder in subfolders) postChild(subfolder);
var files = Directory.EnumerateFiles(folderPath);
Console.WriteLine($"{folderPath} has {files.Count()} files"
+ $", and is {parentPaths.Count()} levels deep");
return files.Count();
});
fileCounter.LinkTo(DataflowBlock.NullTarget<int>());
fileCounter.Post(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments));
fileCounter.Complete();
fileCounter.Completion.Wait();
The above code prints in the console all the subfolders of the folder "MyDocuments".