June 22, 2021

Implementing a Batch in Linq

When enumeration database records in Linq, it is important to batch the work otherwise LINQ can generate SQL statements that are too big (regardless of whether Entity Frameworks are being used or LINQ to SQL). Another problem is that it can take a long time to process all the records, not only can that require a lot of memory, after a while you will start to wonder if the code has crashed or not. It is better to see a batch finishing every now and then and thus know that it is actually progressing.
// Source here : https://stackoverflow.com/questions/13731796/create-batches-in-linq
// Example here : https://dotnetfiddle.net/HpRgd5
public static class BatchLinq
{
    /// <summary>
    /// Full lazy implementation of a batch
    /// Known issue with this approach is that each batch must be 
    /// enumerated and enumerated fully before moving to the next batch.
    /// </summary>
    /// <typeparam name="T">The type being enumerated</typeparam>
    /// <param name="source">The source enumerable</param>
    /// <param name="size">Size of the batch</param>
    /// <returns>An enumeration of batches with size elements 
    /// in each batch</returns>
    public static IEnumerable<IEnumerable<T>> Batch<T>(this IEnumerable<T> source, int size)
    {
        if (size <= 0)
        {
            throw new ArgumentOutOfRangeException("size", "Must be greater than zero.");
        }
        using (var enumerator = source.GetEnumerator())
        {
            while (enumerator.MoveNext())
            {
                var innerBatch = new InnerBatch();
                var enumerable = innerBatch.Batch(enumerator, size);
                yield return enumerable;
                if (!innerBatch.IsFinished)
                    enumerable.Count();
            }
        }
    }

    private class InnerBatch
    {
        public bool IsFinished { get; private set; } = false;

        public IEnumerable<T> Batch<T>(IEnumerator<T> source, int size)
        {
            int i = 0;
            do 
                yield return source.Current;
            while (++i < size && source.MoveNext());
            IsFinished = true;
        }
    }

    // This implementation splits the enumeration up into batches of lists
    // This is safe to use if the batches are small but it is not a fully lazy 
    // evaluation of the batch like the above method. However if you do not 
    // enumerate each batch fully then this version must be used
    public static IEnumerable<IList<T>> BatchByList<T>(this IEnumerable<T> source, int size)
    {
        List<T> batch = new List<T>(size);

        foreach (var item in source)
        {
            batch.Add(item);

            if (batch.Count >= size)
            {
                yield return batch;
                batch = new List<T>(size);
            }
        }

        if (batch.Count > 0)
        {
            yield return batch;
        }
    }
}

No comments: