TL;DR
I know it is an old post, but I have implemented a solution starting from one of those proposed by extending it and solving some problems of this; moreover I have also read the other solutions presented and compared to these it seems to me to propose a solution that is much more suited to the requests formulated in the original question.
In this solution I extend Slauma's approach which I would say is perfect for the case proposed in the original question, and that is to use Entity Framework and Transaction Scope for an expensive write operation on the db.
In Slauma's solution - which incidentally was a draft and was only used to get an idea of the speed of EF with a strategy to implement bulk-insert - there were problems due to:
- the timeout of the transaction (by default 1 minute extendable via code to max 10 minutes);
- the duplication of the first block of data with a width equal to the size of the commit used at the end of the transaction (this problem is quite weird and circumvented by means of a workaround).
I also extended the case study presented by Slauma by reporting an example that includes the contextual insertion of several dependent entities.
The performances that I have been able to verify have been of 10K rec/min inserting in the db a block of 200K wide records approximately 1KB each. The speed was constant, there was no degradation in performance and the test took about 20 minutes to run successfully.
The solution in detail
the method that presides over the bulk-insert operation inserted in an example repository class:
abstract class SomeRepository {
protected MyDbContext myDbContextRef;
public void ImportData<TChild, TFather>(List<TChild> entities, TFather entityFather)
where TChild : class, IEntityChild
where TFather : class, IEntityFather
{
using (var scope = MyDbContext.CreateTransactionScope())
{
MyDbContext context = null;
try
{
context = new MyDbContext(myDbContextRef.ConnectionString);
context.Configuration.AutoDetectChangesEnabled = false;
entityFather.BulkInsertResult = false;
var fileEntity = context.Set<TFather>().Add(entityFather);
context.SaveChanges();
int count = 0;
//avoids an issue with recreating context: EF duplicates the first commit block of data at the end of transaction!!
context = MyDbContext.AddToContext<TChild>(context, null, 0, 1, true);
foreach (var entityToInsert in entities)
{
++count;
entityToInsert.EntityFatherRefId = fileEntity.Id;
context = MyDbContext.AddToContext<TChild>(context, entityToInsert, count, 100, true);
}
entityFather.BulkInsertResult = true;
context.Set<TFather>().Add(fileEntity);
context.Entry<TFather>(fileEntity).State = EntityState.Modified;
context.SaveChanges();
}
finally
{
if (context != null)
context.Dispose();
}
scope.Complete();
}
}
}
interfaces used for example purposes only:
public interface IEntityChild {
//some properties ...
int EntityFatherRefId { get; set; }
}
public interface IEntityFather {
int Id { get; set; }
bool BulkInsertResult { get; set; }
}
db context where I implemented the various elements of the solution as static methods:
public class MyDbContext : DbContext
{
public string ConnectionString { get; set; }
public MyDbContext(string nameOrConnectionString)
: base(nameOrConnectionString)
{
Database.SetInitializer<MyDbContext>(null);
ConnectionString = Database.Connection.ConnectionString;
}
/// <summary>
/// Creates a TransactionScope raising timeout transaction to 30 minutes
/// </summary>
/// <param name="_isolationLevel"></param>
/// <param name="timeout"></param>
/// <remarks>
/// It is possible to set isolation-level and timeout to different values. Pay close attention managing these 2 transactions working parameters.
/// <para>Default TransactionScope values for isolation-level and timeout are the following:</para>
/// <para>Default isolation-level is "Serializable"</para>
/// <para>Default timeout ranges between 1 minute (default value if not specified a timeout) to max 10 minute (if not changed by code or updating max-timeout machine.config value)</para>
/// </remarks>
public static TransactionScope CreateTransactionScope(IsolationLevel _isolationLevel = IsolationLevel.Serializable, TimeSpan? timeout = null)
{
SetTransactionManagerField("_cachedMaxTimeout", true);
SetTransactionManagerField("_maximumTimeout", timeout ?? TimeSpan.FromMinutes(30));
var transactionOptions = new TransactionOptions();
transactionOptions.IsolationLevel = _isolationLevel;
transactionOptions.Timeout = TransactionManager.MaximumTimeout;
return new TransactionScope(TransactionScopeOption.Required, transactionOptions);
}
private static void SetTransactionManagerField(string fieldName, object value)
{
typeof(TransactionManager).GetField(fieldName, BindingFlags.NonPublic | BindingFlags.Static).SetValue(null, value);
}
/// <summary>
/// Adds a generic entity to a given context allowing commit on large block of data and improving performance to support db bulk-insert operations based on Entity Framework
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="context"></param>
/// <param name="entity"></param>
/// <param name="count"></param>
/// <param name="commitCount">defines the block of data size</param>
/// <param name="recreateContext"></param>
/// <returns></returns>
public static MyDbContext AddToContext<T>(MyDbContext context, T entity, int count, int commitCount, bool recreateContext) where T : class
{
if (entity != null)
context.Set<T>().Add(entity);
if (count % commitCount == 0)
{
context.SaveChanges();
if (recreateContext)
{
var contextConnectionString = context.ConnectionString;
context.Dispose();
context = new MyDbContext(contextConnectionString);
context.Configuration.AutoDetectChangesEnabled = false;
}
}
return context;
}
}