I need to build an application that does these things (in order):
on load:
01- connect to MongoDB 'db'
02- creates a collection 'cas'
03- check if a web page has updates, if yes go to step 04, if not go to step 07
04- do web scraping (using Cheerio) of the web site and get a $ variable like that $ = cheerio.load(body);
05- elaborate this object to get only informations I'm interested in and organize them in a jsons object like this one:
var jsons = [
{year: 2015, country: Germany, value: 51},
{year: 2015, country: Austria, value: 12},
{year: 2016, country: Germany, value: 84},
{year: 2016, country: Bulgaria, value: 104},
...
];
06- insert each of these elements ({year: 2015, country: Germany, value: 51}, ...) in the collection 'cas' of database 'db'
07- download the data (for example in a csv file)
08- create a web page for data visualization of these data using D3.js
09- disconnect from 'db'
If Node.js were synchronous, I could write something like this:
var url = 'http://...';
var jsons = [];
connectDb('db');
createCollection('db', 'cas');
if(checkForUpdates(url)) {
var $ = scrape(url);
jsons = elaborate($);
for(var i = 0; i < jsons.length; i++) {
saveDocumentOnDbIfNotExistsYet('db', 'cas', jsons[i]);
}
}
downloadCollectionToFile('db', 'cas', './output/casData.csv');
createBarChart('./output/casData.csv');
disconnectDb('db');
But Node.js is asynchronous so this code would not work properly. I've read that I can use Promise to get the code to run in a certain order.
I read the documentation about the Promise and some sites that showed simple tutorials. The structure of a Promise is:
// some code (A)
var promise = new Promise(function(resolve, reject) {
// some code (B)
});
promise.then(function() {
// some code (C)
});
promise.catch(function() {
// some code (D)
});
// some code (E)
If I understood correctly, in this case the execution (if Node.js were synchronous) would be equivalent to:
// some code (A)
// some code (E)
if(some code (B) not produce errors) {
// some code (C)
}
else {
// some code (D)
}
or (swap between code A and E, because they are asynchronous)
// some code (E)
// some code (A)
if(some code (B) not produce errors) {
// some code (C)
}
else {
// some code (D)
}
So now I wonder what is the right structure for my application. I thought about:
var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var MongoClient = require('mongodb').MongoClient;
var dbUrl = 'mongodb://localhost:27017/';
var dbName = 'db';
var collectionName = 'cas';
const app = express(); // run using > node app.js
// connect to db
var connect = function(url) {
return new Promise(function(resolve, reject) {
MongoClient.connect(url + dbName, function(err, db) {
if(err) {
reject(err);
}
else {
console.log('Connected');
resolve(db);
}
});
});
}
// create collection
connect.then(function(db) {
db.createCollection(collectionName, function(err, res) {
if(err) {
throw err;
}
else {
console.log('Collection', collectionName, 'created!');
}
});
});
// connection error
connect.catch(function(err) {
console.log('Error during connection...');
throw err;
});
It's right? If yes, how can I proceed with other steps? I can I improve my code?
EDIT 1
Following the example of Андрей Щербаков, I modified my code in this way:
app.js:
// my files
var db = require('./middlewares/db.js');
var url = 'mongodb://localhost:27017/';
var dbName = 'db';
var collectionName = 'cas';
const start = async function() {
const connect = await db.connectToMongoDb(url, dbName);
const cas = await connect.createYourCollection(collectionName);
const isPageHasUpdates = oneMoreFunction(); // i don't know how you gonna check it
if(isPageHasUpdates) {
await step 4;
await step 5;
await step 6;
}
await step 7
return something; // if you want
}
start()
.then(res => console.log(res)) // here you can use result of your start function if you return something or skip this then
.catch(err => console.log(err)); // do something with your error
middlewares/db.js:
var MongoClient = require('mongodb').MongoClient;
let dbInstance;
var methods = {};
methods.connectToMongoDb = function(url, dbName) {
if(dbInstance) {
return dbInstance;
}
else {
MongoClient.connect(url + dbName, function(err, db) {
if(!err) {
dbInstance = db;
return db;
}
});
}
}
methods.createYourCollection = function(collectionName) {
?.createCollection(collectionName, function(err, res) {
if(err) {
throw err;
}
});
}
module.exports = methods;
But I'm not sure I'm doing well.
How can I separate function in different files? For example I want to put all the function about db in file middlewares/db.js. But I have some problems in line ?.createCollection(collectionName, function(err, res)
.