Thisis the solution I used when my collection grew too large to return in a single query. It takes advantage of the inherent ordering of the _id
field and allows you to loop through a collection by specified batch size.
Here it is as an npm module, mongoose-paging, full code is below:
function promiseWhile(condition, action) {
return new Promise(function(resolve, reject) {
process.nextTick(function loop() {
if(!condition()) {
resolve();
} else {
action().then(loop).catch(reject);
}
});
});
}
function findPaged(query, fields, options, iterator, cb) {
var Model = this,
step = options.step,
cursor = null,
length = null;
promiseWhile(function() {
return ( length===null || length > 0 );
}, function() {
return new Promise(function(resolve, reject) {
if(cursor) query['_id'] = { $gt: cursor };
Model.find(query, fields, options).sort({_id: 1}).limit(step).exec(function(err, items) {
if(err) {
reject(err);
} else {
length = items.length;
if(length > 0) {
cursor = items[length - 1]._id;
iterator(items, function(err) {
if(err) {
reject(err);
} else {
resolve();
}
});
} else {
resolve();
}
}
});
});
}).then(cb).catch(cb);
}
module.exports = function(schema) {
schema.statics.findPaged = findPaged;
};
Attach it to your model like this:
MySchema.plugin(findPaged);
Then query like this:
MyModel.findPaged(
// mongoose query object, leave blank for all
{source: 'email'},
// fields to return, leave blank for all
['subject', 'message'],
// number of results per page
{step: 100},
// iterator to call on each set of results
function(results, cb) {
console.log(results);
// this is called repeatedly while until there are no more results.
// results is an array of maximum length 100 containing the
// results of your query
// if all goes well
cb();
// if your async stuff has an error
cb(err);
},
// function to call when finished looping
function(err) {
throw err;
// this is called once there are no more results (err is null),
// or if there is an error (then err is set)
}
);