psst! I'm using some ES6 syntax, please run the code snippets at least on Node 6, okay?
An asynchronous task can be modelled as a function that takes a callback:
function task(arg1, arg2, callback) {
// ...
callback(null, result);
}
task(arg1, arg2, (err, result) => {
handle(result);
});
but there's an alternative convention that often simplifies things:
function task(arg1, arg2) {
// ...
return Promise.resolve(result);
}
task(arg1, arg2).then(handle(result));
While both conventions make sense, I've seen the second approach more helpful in practice to write simple async code with good error handling.
The most important points to grasp are:
- Functions return a value; async tasks return a promise of a future value.
- When the async task completes, it marks the promise as "resolved".
- If the async task fails, instead of raising an exception, it marks the promise as "rejected" together with the error.
- The returned promise is an object too. You can do useful things with it even before it completes. The code that runs the async task doesn't have to be in the same place as the code that is interested in the result of the task.
An important thing about promises is that they are guaranteed to be async, unlike callbacks:
// callbacks
myTask(1, 2, (err, result) => {
console.log("A");
});
console.log("B");
// can be AB or BA depending on myTask
// promises
myTask(1, 2).then(() => {
console.log("A");
})
console.log("B");
// always BA
This makes code easier to reason about but it also means that when you actually depend on the second behaviour, promises won't be helpful.
(Read up more on Promises!)
Starting point
Okay, let's go back to your code. First let me replace ogs
with a dummy async function so that we have some code that we can work on without networking:
var async = require('async');
function ogs(param, callback) {
let value = ["ogs", param];
setTimeout(
() => callback(null, value),
20);
}
// global variables
var param1 = {url: 'http://www.google.com/'};
var param2 = {url: 'https://www.yahoo.com/'};
function function1(callback){
ogs(param1, function(error, data1) {
callback(null, data1);
});
}
function function2(data1, callback){
ogs(param2, function(error, data2) {
callback(null, data1, data2);
});
}
function function3(data1, data2, callback){
console.log(data1);
console.log("---------------");
console.log(data2);
}
(function temp() {
async.waterfall([function1, function2, function3],
function(err, result){
console.log(result);
console.log(err);
if(err) console.log(err);
}
);
})();
Let's try these promise things
An equivalent of ogs
that returns a promise instead of taking a callback can look like:
function ogs(param, callback) {
// return a promise that resolves after 20ms
return new Promise((resolve, reject) => {
setTimeout(() => {
let value = ["ogs", param];
resolve(value);
}, 20);
});
}
since ogs
now returns a promise, it's straightforward to use it inside each function
:
function function1(){
return ogs(param1); // call async task, obtain the promise for its result and return it directly
}
function function2() {
return ogs(param2);
}
function function3(data1, data2){
console.log(data1);
console.log("---------------");
console.log(data2);
}
If you'd like to add some logging in the middle, that's easy too:
function function2() {
return ogs(param2).then(data2 => {
console.log("inside function2", data2);
return data2;
});
}
Now that each of the steps is a promise-returning async-task, let's connect them together! The simplest way is to use Promise.then
directly:
(function temp() {
function1().then(data1 => {
return function2().then(data2 => {
return function3(data1, data2);
});
}).catch(error => {
console.error("There was a problem:", error);
})
})();
This will run function1
, and when it completes, it will pass the result to function2
, then pass both results to function3
.
Running things in parallel
But wait! function2
doesn't even need to wait for function1
to finish. These are two separate requests. We can start both of them at once.
(function temp() {
let data1Promise = function1();
let data2Promise = function2();
Promise.all([data1Promise, data2Promise]).then(([data1, data2]) => {
return function3(data1, data2);
}).catch(error => {
console.error("There was a problem:", error);
})
})();
Promise.all
takes an array of promises and returns a promise that resolves with an array of results. We unpack these results from the array and pass them to function3
.
Running network requests in parallel should allow your app to run perceivably faster. Win!
Now back to your original problem:
How to get rid of globals?
We have complete control over the signature of function1
and function2
, so let's use that! Let's have these functions take the param as an argument, not look at global variables. Like this:
function function1(param){
return ogs(param);
}
function function2(param) {
return ogs(param, {"some other options": true});
}
These functions now look super similar! Maybe you could just use one (or maybe just drop them and call ogs
directly?)
After removing the globals, our code now looks like this:
(function temp() {
let param1 = {url: 'http://www.google.com/'};
let param2 = {url: 'https://www.yahoo.com/'};
let data1Promise = function1(param1);
let data2Promise = function2(param2);
Promise.all([data1Promise, data2Promise]).then(([data1, data2]) => {
return function3(data1, data2);
}).catch(error => {
console.error("There was a problem:", error);
})
})();
But I really need my functions to run in sequence!
What if function2
actually cannot start without the result of function1
?
function function1(param) {
return ogs(param);
}
function function2(data1, param) {
return ogs(param2, {"some other options": data1});
}
We can revert to the first version with nested then
but we could also try something more tidy:
(function temp() {
let param1 = {url: 'http://www.google.com/'};
let param2 = {url: 'https://www.yahoo.com/'};
let data1Promise = function1(param1);
let data2Promise = data1Promise.then(data1 => function2(data1, param2)); // !
Promise.all([data1Promise, data2Promise]).then(([data1, data2]) => {
return function3(data1, data2);
}).catch(error => {
console.error("There was a problem:", error);
})
})();
How is it different from async.waterfall
?
waterfall
requires you to write the functions in such a way that they call callback
with all the information that the next step requires. The flow looks like this:
function1
-> (data1)
function2
-> (data1, data2)
function3
Imagine that if you had to chain 10 calls instead of 2... Basically step 2 needs to know what step 3, 4, 5, 6 might need.
With promises, you could do the same thing by returning an array from each task, but you can do much better:
There's no longer a need to wrap ogs
with function1
and function2
, because you can just do:
Promise.all([ogs(...), ogs(...), ogs(...)]).then(allResults)
and everything is collected in an array for you.
Very relevant reading: Bluebird's Promise.all() method when one promise is dependent on another
But my API doesn't return promises!
I hope I have you on board with promises by now, but you're still stuck with this signature:
ogs(options, function (err, results) {...})
We'd like to convert it into something like:
ogsAsync(options) -> Promise
It's simple enough to do manually using the Promise constructor:
function ogsAsync(options) {
return new Promise((resolve, reject) => {
ogs(options, (err, results) => {
if (err) {
reject(err);
} else {
resolve(results);
}
});
});
}
But you probably don't need to, because looks like your library already returns a promise, so you can call osg(options)
directly - it already returns a promise. Yay!
But just in case you have to work with a library that doesn't offer promises yet (like redis
or much of node
standard libs), Bluebird provides a nice utility to automatically wrap callback-style tasks into promise-style tasks.
Hope that helps!
async.waterfall()
. If you want to actually propose an answer that shows how to do waterfall with promises, that would be constructive. A comment with a link to a promise library and a claim that 'callbacks are backwards' is not constructive. – Mendel