Because the embedded fields are dynamic, the best approach is to modify your schema in such as way that the translation
field becomes an array of embedded documents. An example of such schema that maps the current structure follows:
"translation": [
{
"lang": "en",
"name" : "brown fox",
"description" : "the quick brown fox jumps over a lazy dog"
},
{
"lang": "it",
"name" : "brown fox ",
"description" : " the quick brown fox jumps over a lazy dog"
},
{
"lang": "fr",
"name" : "renard brun ",
"description" : " le renard brun rapide saute par-dessus un chien paresseux"
},
{
"lang": "de",
"name" : "brown fox ",
"description" : " the quick brown fox jumps over a lazy dog"
},
{
"lang": "es",
"name" : "brown fox ",
"description" : " el rápido zorro marrón salta sobre un perro perezoso"
}
]
With this schema, it's easy to apply the text index on the name
and description
fields:
db.collection.createIndex(
{
"translation.name": "text",
"translation.description": "text"
}
)
As for modifying the schema, you would need to use an api that allows you to update your collection in bulk and the Bulk API
does that for you. These offer better performance as you will be sending the operations to the server in batches of say 1000, which gives you a better performance as you are not sending every request to the server, but just once in every 1000 requests.
The following demonstrates this approach, the first example uses the Bulk API available in MongoDB versions >= 2.6 and < 3.2. It updates all the documents in the collection by changing all the translation fields to arrays:
var bulk = db.collection.initializeUnorderedBulkOp(),
counter = 0;
db.collection.find({
"translation": {
"$exists": true,
"$not": { "$type": 4 }
}
}).snapshot().forEach(function (doc) {
var localization = Object.keys(doc.translation)
.map(function (key){
var obj = doc["translation"][key];
obj["lang"] = key;
return obj;
});
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "translation": localization }
});
counter++;
if (counter % 1000 === 0) {
bulk.execute(); // Execute per 1000 operations
// re-initialize every 1000 update statements
bulk = db.collection.initializeUnorderedBulkOp();
}
})
// Clean up remaining operations in queue
if (counter % 1000 !== 0) { bulk.execute(); }
The next example applies to the new MongoDB version 3.2 which has since deprecated the Bulk API and provided a newer set of apis using bulkWrite()
.
It uses the same cursors as above but creates the arrays with the bulk operations using the same forEach()
cursor method to push each bulk write document to the array. Because write commands can accept no more than 1000 operations, you will need to group your operations to have at most 1000 operations and re-initialise the array when loop hits the 1000 iteration:
var cursor = db.collection.find({
"translation": {
"$exists": true,
"$not": { "$type": 4 }
}
}).snapshot(),
bulkUpdateOps = [];
cursor.forEach(function(doc){
var localization = Object.keys(doc.translation)
.map(function (key){
var obj = doc["translation"][key];
obj["lang"] = key;
return obj;
});
bulkUpdateOps.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "translation": localization } }
}
});
if (bulkUpdateOps.length === 1000) {
db.collection.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
});
if (bulkUpdateOps.length > 0) { db.collection.bulkWrite(bulkUpdateOps); }
{ "lang": "en", "name" : "brown fox", "description" : "the quick brown fox jumps over a lazy dog" }
– Checked