Let me propose another option, which yields lazily both sequence of groups and
elements inside groups.
Demonstration in .NET Fiddle
Implementation:
public static class EnumerableExtensions
{
public static IEnumerable<IGrouping<TKey, TSource>> GroupAdjacent<TSource, TKey>(
this IEnumerable<TSource> source,
Func<TSource, TKey> keySelector,
IEqualityComparer<TKey>? comparer = null)
{
var comparerOrDefault = comparer ?? EqualityComparer<TKey>.Default;
using var iterator = new Iterator<TSource>(source.GetEnumerator());
iterator.MoveNext();
while (iterator.HasCurrent)
{
var key = keySelector(iterator.Current);
var elements = YieldAdjacentElements(iterator, key, keySelector, comparerOrDefault);
yield return new Grouping<TKey, TSource>(key, elements);
while (iterator.HasCurrentWithKey(key, keySelector, comparerOrDefault))
{
iterator.MoveNext();
}
}
}
static IEnumerable<TSource> YieldAdjacentElements<TKey, TSource>(
Iterator<TSource> iterator,
TKey key,
Func<TSource, TKey> keySelector,
IEqualityComparer<TKey> comparer)
{
while (iterator.HasCurrentWithKey(key, keySelector, comparer))
{
yield return iterator.Current;
iterator.MoveNext();
}
}
private static bool HasCurrentWithKey<TKey, TSource>(
this Iterator<TSource> iterator,
TKey key,
Func<TSource, TKey> keySelector,
IEqualityComparer<TKey> comparer) =>
iterator.HasCurrent && comparer.Equals(keySelector(iterator.Current), key);
private sealed class Grouping<TKey, TElement> : IGrouping<TKey, TElement>
{
public Grouping(TKey key, IEnumerable<TElement> elements)
{
Key = key;
Elements = elements;
}
public TKey Key { get; }
public IEnumerable<TElement> Elements { get; }
public IEnumerator<TElement> GetEnumerator() => Elements.GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => Elements.GetEnumerator();
}
private sealed class Iterator<T> : IDisposable
{
private readonly IEnumerator<T> _enumerator;
public Iterator(IEnumerator<T> enumerator)
{
_enumerator = enumerator;
}
public bool HasCurrent { get; private set; }
public T Current => _enumerator.Current;
public void MoveNext()
{
HasCurrent = _enumerator.MoveNext();
}
public void Dispose()
{
_enumerator.Dispose();
}
}
}
Notice, that it is impossible to achieve such level of laziness with regular GroupBy
operation, since it needs to look through the whole collection before yielding the first group.
Particularly, in my case migration from GroupBy
to GroupAdjacent
in connection with lazy handling of whole pipeline helped to resolve memory consumption issues for large sequences.
In general, GroupAdjacent
can be used as lazy and more efficient alternative of GroupBy
, provided that input collection satisfies condition, that keys are sorted (or at least not fragmented), and provided that all operations in pipeline are lazy.