How to populate documents with unlimited nested levels using mongoose
Asked Answered
K

2

8

I'm designing a web application that manages organizational structure for parent and child companies. There are two types of companies: 1- Main company, 2 -Subsidiary company.The company can belong only to one company but can have a few child companies. My mongoose Schema looks like this:

var companySchema = new mongoose.Schema({
    companyName: {
        type: String,
        required: true
    },
    estimatedAnnualEarnings: {
        type: Number,
        required: true
    },
    companyChildren: [{type: mongoose.Schema.Types.ObjectId, ref: 'Company'}],
    companyType: {type: String, enum: ['Main', 'Subsidiary']}
})

module.exports = mongoose.model('Company', companySchema);

I store all my companies in one collection and each company has an array with references to its child companies. Then I want to display all companies as a tree(on client side). I want query all Main companies that populates their children and children populate their children and so on,with unlimited nesting level. How can I do that? Or maybe you know better approach. Also I need ability to view,add,edit,delete any company.

Now I have this:

router.get('/companies', function(req, res) {
    Company.find({companyType: 'Main'}).populate({path: 'companyChildren'}).exec(function(err, list) {
        if(err) {
            console.log(err);
        } else {
            res.send(list);
        }
    })
});

But it populates only one nested level. I appreciate any help

Kibbutznik answered 7/7, 2017 at 10:13 Comment(1)
the best solution ever using bluebird : traverse a tree in mongoDBPlucky
A
18

You can do this in latest Mongoose releases. No plugins required:

const async = require('async'),
      mongoose = require('mongoose'),
      Schema = mongoose.Schema;

const uri = 'mongodb://localhost/test',
      options = { use: MongoClient };

mongoose.Promise = global.Promise;
mongoose.set('debug',true);

function autoPopulateSubs(next) {
  this.populate('subs');
  next();
}

const companySchema = new Schema({
  name: String,
  subs: [{ type: Schema.Types.ObjectId, ref: 'Company' }]
});

companySchema
  .pre('findOne', autoPopulateSubs)
  .pre('find', autoPopulateSubs);


const Company = mongoose.model('Company', companySchema);

function log(data) {
  console.log(JSON.stringify(data, undefined, 2))
}

async.series(
  [
    (callback) => mongoose.connect(uri,options,callback),

    (callback) =>
      async.each(mongoose.models,(model,callback) =>
        model.remove({},callback),callback),

    (callback) =>
      async.waterfall(
        [5,4,3,2,1].map( name =>
          ( name === 5 ) ?
            (callback) => Company.create({ name },callback) :
            (child,callback) =>
              Company.create({ name, subs: [child] },callback)
        ),
        callback
      ),

    (callback) =>
      Company.findOne({ name: 1 })
        .exec((err,company) => {
          if (err) callback(err);
          log(company);
          callback();
        })

  ],
  (err) => {
    if (err) throw err;
    mongoose.disconnect();
  }
)

Or a more modern Promise version with async/await:

const mongoose = require('mongoose'),
      Schema = mongoose.Schema;

mongoose.set('debug',true);
mongoose.Promise = global.Promise;
const uri = 'mongodb://localhost/test',
      options = { useMongoClient: true };

const companySchema = new Schema({
  name: String,
  subs: [{ type: Schema.Types.ObjectId, ref: 'Company' }]
});

function autoPopulateSubs(next) {
  this.populate('subs');
  next();
}

companySchema
  .pre('findOne', autoPopulateSubs)
  .pre('find', autoPopulateSubs);

const Company = mongoose.model('Company', companySchema);

function log(data) {
  console.log(JSON.stringify(data, undefined, 2))
}

(async function() {

  try {
    const conn = await mongoose.connect(uri,options);

    // Clean data
    await Promise.all(
      Object.keys(conn.models).map(m => conn.models[m].remove({}))
    );

    // Create data
    await [5,4,3,2,1].reduce((acc,name) =>
      (name === 5) ? acc.then( () => Company.create({ name }) )
        : acc.then( child => Company.create({ name, subs: [child] }) ),
      Promise.resolve()
    );

    // Fetch and populate
    let company = await Company.findOne({ name: 1 });
    log(company);

  } catch(e) {
    console.error(e);
  } finally {
    mongoose.disconnect();
  }

})()

Produces:

{
  "_id": "595f7a773b80d3114d236a8b",
  "name": "1",
  "__v": 0,
  "subs": [
    {
      "_id": "595f7a773b80d3114d236a8a",
      "name": "2",
      "__v": 0,
      "subs": [
        {
          "_id": "595f7a773b80d3114d236a89",
          "name": "3",
          "__v": 0,
          "subs": [
            {
              "_id": "595f7a773b80d3114d236a88",
              "name": "4",
              "__v": 0,
              "subs": [
                {
                  "_id": "595f7a773b80d3114d236a87",
                  "name": "5",
                  "__v": 0,
                  "subs": []
                }
              ]
            }
          ]
        }
      ]
    }
  ]
}

Note that the async parts are not actually required at all and are just here for setting up the data for demonstration. It's the .pre() hooks that allow this to actually happen as we "chain" each .populate() which actually calls either .find() or .findOne() under the hood to another .populate() call.

So this:

function autoPopulateSubs(next) {
  this.populate('subs');
  next();
}

Is the part being invoked that is actually doing the work.

All done with "middleware hooks".


Data State

To make it clear, this is the data in the collection which is set up. It's just references pointing to each subsidiary in plain flat documents:

{
        "_id" : ObjectId("595f7a773b80d3114d236a87"),
        "name" : "5",
        "subs" : [ ],
        "__v" : 0
}
{
        "_id" : ObjectId("595f7a773b80d3114d236a88"),
        "name" : "4",
        "subs" : [
                ObjectId("595f7a773b80d3114d236a87")
        ],
        "__v" : 0
}
{
        "_id" : ObjectId("595f7a773b80d3114d236a89"),
        "name" : "3",
        "subs" : [
                ObjectId("595f7a773b80d3114d236a88")
        ],
        "__v" : 0
}
{
        "_id" : ObjectId("595f7a773b80d3114d236a8a"),
        "name" : "2",
        "subs" : [
                ObjectId("595f7a773b80d3114d236a89")
        ],
        "__v" : 0
}
{
        "_id" : ObjectId("595f7a773b80d3114d236a8b"),
        "name" : "1",
        "subs" : [
                ObjectId("595f7a773b80d3114d236a8a")
        ],
        "__v" : 0
}
Alesha answered 7/7, 2017 at 12:16 Comment(2)
This was helpful, thank you! In case it's helpful for others to know: I copied your code into my codebase, and modified it gradually until it worked for my data — and this revealed what was going wrong. The autoPopulate middleware works like a charm.Mecke
What if I don't want to populate on every query of the schema? E.g. some conditional check before running the pre hooks? I found this but saving the flag to the db does seem like a bad practice, as presented in the comments.Cocker
V
0

I think a simpler approach would be to track the parent since that is unique instead of tracking an array of children which could get messy. There is a nifty module called mongoose-tree built just for this:

var tree = require('mongoose-tree');

var CompanySchema = new mongoose.Schema({
    companyName: {
        type: String,
        required: true
    },
    estimatedAnnualEarnings: {
        type: Number,
        required: true
    },
    companyType: {type: String, enum: ['Main', 'Subsidiary']}
})

CompanySchema.plugin(tree);
module.exports = mongoose.model('Company', CompanySchema);

Set some test data:

var comp1 = new CompanySchema({name:'Company 1'});
var comp2 = new CompanySchema({name:'Company 2'});
var comp3 = new CompanySchema({name:'Company 3'});

comp3.parent = comp2;
comp2.parent = comp1;

comp1.save(function() {
   comp2.save(function() {
      comp3.save();
   });
});

Then use mongoose-tree to build a function that can get either the ancestors or children:

router.get('/company/:name/:action', function(req, res) {
    var name = req.params.name;
    var action = req.params.action;
    Company.find({name: name}, function(err, comp){
       //typical error handling omitted for brevity
       if (action == 'ancestors'){
          comp.getAncestors(function(err, companies) {
             // companies is an array  
             res.send(companies);
          });               
       }else if (action == 'children'){
          comp.getChildren(function(err, companies) {
             res.send(companies); 
          });
       }
    });
});
Valenevalenka answered 7/7, 2017 at 10:47 Comment(2)
Why add a plugin? Mongoose does this out of the box.Alesha
My real point was to restructure the model to use a parent ref instead of array of children. Whether you use populate on a pre hook or a plugin doesn't really matter. Except the pre hook will always recursively populate subs which you might not want for performance reasons. The plugin allows you to explicitly populate them which is nice. It also allows you to traverse in either direction.Valenevalenka

© 2022 - 2024 — McMap. All rights reserved.