Understanding Node.JS use async.waterfall how to execute external functions
Asked Answered
N

2

6

I need to execute async function using async.js module. But i have some problem when i execute external functions.

the code is pass well.

but when i change global variables to local variables, i can't set using parameters.

var async = require('async');
var ogs = require('open-graph-scraper');

// global variables
var param1 = {url: 'http://www.google.com/'};
var param2 = {url: 'https://www.yahoo.com/'};

function function1(callback){
    ogs(param1, function(error, data1) {
        callback(null, data1);
    });
} 
function function2(data1, callback){
    ogs(param2, function(error, data2) {
        callback(null, data1, data2);
    });
}
function function3(data1, data2, callback){
    console.log(data1);
    console.log("---------------");
    console.log(data2);
}

(function temp() {
    async.waterfall([function1, function2, function3],
        function(err, result){
            console.log(result);
            console.log(err);
            if(err) console.log(err);
        }
    );
})();

if param1 and param2 change to local variables, like this..

(function temp() {
    var param1 = {url: 'http://www.google.com/'};
    var param2 = {url: 'https://www.yahoo.com/'};
    async.waterfall([function1, function2, function3],
        function(err, result){
            console.log(result);
            console.log(err);
            if(err) console.log(err);
        }
    );
})();

how can use "param" in the function1() or function2()

i can't change function type of the local

async.waterfall([
    function(callback){
    },
    function(data,callback){
    }],
    function(err){
    if(err) console.log(err);
    }
);
Norite answered 22/7, 2016 at 12:53 Comment(5)
Why do they need to be local variables in this context?Kemp
@Kos so not true. And not constructive to this question.Mendel
@Mendel disagree; promises are a very popular pattern that is relevant here. A SO question is an appropriate place to promote good alternativesHerodias
@Herodias I know they're popular; they're just not the silver bullet that many developers seem to think they are. In this case, I don't think it's constructive to the question since he's specifically asking about async.waterfall(). If you want to actually propose an answer that shows how to do waterfall with promises, that would be constructive. A comment with a link to a promise library and a claim that 'callbacks are backwards' is not constructive.Mendel
@Mendel Point taken, sorry for being lazy. I'll do thatHerodias
H
2

psst! I'm using some ES6 syntax, please run the code snippets at least on Node 6, okay?


An asynchronous task can be modelled as a function that takes a callback:

function task(arg1, arg2, callback) {
    // ...
    callback(null, result);
}
task(arg1, arg2, (err, result) => {
    handle(result);
});

but there's an alternative convention that often simplifies things:

function task(arg1, arg2) {
    // ...
    return Promise.resolve(result);
}
task(arg1, arg2).then(handle(result));

While both conventions make sense, I've seen the second approach more helpful in practice to write simple async code with good error handling.

The most important points to grasp are:

  • Functions return a value; async tasks return a promise of a future value.
  • When the async task completes, it marks the promise as "resolved".
  • If the async task fails, instead of raising an exception, it marks the promise as "rejected" together with the error.
  • The returned promise is an object too. You can do useful things with it even before it completes. The code that runs the async task doesn't have to be in the same place as the code that is interested in the result of the task.

An important thing about promises is that they are guaranteed to be async, unlike callbacks:

// callbacks
myTask(1, 2, (err, result) => {
    console.log("A");
});
console.log("B");
// can be AB or BA depending on myTask

// promises
myTask(1, 2).then(() => {
    console.log("A");
})
console.log("B");
// always BA

This makes code easier to reason about but it also means that when you actually depend on the second behaviour, promises won't be helpful.

(Read up more on Promises!)


Starting point

Okay, let's go back to your code. First let me replace ogs with a dummy async function so that we have some code that we can work on without networking:

var async = require('async');

function ogs(param, callback) {
    let value = ["ogs", param];
    setTimeout(
        () => callback(null, value),
        20);
}

// global variables
var param1 = {url: 'http://www.google.com/'};
var param2 = {url: 'https://www.yahoo.com/'};

function function1(callback){
    ogs(param1, function(error, data1) {
        callback(null, data1);
    });
} 
function function2(data1, callback){
    ogs(param2, function(error, data2) {
        callback(null, data1, data2);
    });
}
function function3(data1, data2, callback){
    console.log(data1);
    console.log("---------------");
    console.log(data2);
}

(function temp() {
    async.waterfall([function1, function2, function3],
        function(err, result){
            console.log(result);
            console.log(err);
            if(err) console.log(err);
        }
    );
})();

Let's try these promise things

An equivalent of ogs that returns a promise instead of taking a callback can look like:

function ogs(param, callback) {
    // return a promise that resolves after 20ms
    return new Promise((resolve, reject) => {
        setTimeout(() => {
            let value = ["ogs", param];
            resolve(value);
        }, 20);
    });
}

since ogs now returns a promise, it's straightforward to use it inside each function:

function function1(){
    return ogs(param1); // call async task, obtain the promise for its result and return it directly
} 
function function2() {
    return ogs(param2);
}
function function3(data1, data2){
    console.log(data1);
    console.log("---------------");
    console.log(data2);
}

If you'd like to add some logging in the middle, that's easy too:

function function2() {
    return ogs(param2).then(data2 => {
        console.log("inside function2", data2);
        return data2;
    });
}

Now that each of the steps is a promise-returning async-task, let's connect them together! The simplest way is to use Promise.then directly:

(function temp() {
    function1().then(data1 => {
        return function2().then(data2 => {
            return function3(data1, data2);
        });
    }).catch(error => {
        console.error("There was a problem:", error);
    })
})();

This will run function1, and when it completes, it will pass the result to function2, then pass both results to function3.

Running things in parallel

But wait! function2 doesn't even need to wait for function1 to finish. These are two separate requests. We can start both of them at once.

(function temp() {
    let data1Promise = function1();
    let data2Promise = function2();
    Promise.all([data1Promise, data2Promise]).then(([data1, data2]) => {
        return function3(data1, data2);
    }).catch(error => {
        console.error("There was a problem:", error);
    })
})();

Promise.all takes an array of promises and returns a promise that resolves with an array of results. We unpack these results from the array and pass them to function3.

Running network requests in parallel should allow your app to run perceivably faster. Win!

Now back to your original problem:

How to get rid of globals?

We have complete control over the signature of function1 and function2, so let's use that! Let's have these functions take the param as an argument, not look at global variables. Like this:

function function1(param){
    return ogs(param);
} 
function function2(param) {
    return ogs(param, {"some other options": true});
}

These functions now look super similar! Maybe you could just use one (or maybe just drop them and call ogs directly?)

After removing the globals, our code now looks like this:

(function temp() {
    let param1 = {url: 'http://www.google.com/'};
    let param2 = {url: 'https://www.yahoo.com/'};
    let data1Promise = function1(param1);
    let data2Promise = function2(param2);
    Promise.all([data1Promise, data2Promise]).then(([data1, data2]) => {
        return function3(data1, data2);
    }).catch(error => {
        console.error("There was a problem:", error);
    })
})();

But I really need my functions to run in sequence!

What if function2 actually cannot start without the result of function1?

function function1(param) {
    return ogs(param);
}

function function2(data1, param) {
    return ogs(param2, {"some other options": data1});
}

We can revert to the first version with nested then but we could also try something more tidy:

(function temp() {
    let param1 = {url: 'http://www.google.com/'};
    let param2 = {url: 'https://www.yahoo.com/'};
    let data1Promise = function1(param1);
    let data2Promise = data1Promise.then(data1 => function2(data1, param2));      // !
    Promise.all([data1Promise, data2Promise]).then(([data1, data2]) => {
        return function3(data1, data2);
    }).catch(error => {
        console.error("There was a problem:", error);
    })
})();

How is it different from async.waterfall?

waterfall requires you to write the functions in such a way that they call callback with all the information that the next step requires. The flow looks like this:

function1
    -> (data1)
function2
    -> (data1, data2)
function3

Imagine that if you had to chain 10 calls instead of 2... Basically step 2 needs to know what step 3, 4, 5, 6 might need.

With promises, you could do the same thing by returning an array from each task, but you can do much better:

There's no longer a need to wrap ogs with function1 and function2, because you can just do:

Promise.all([ogs(...), ogs(...), ogs(...)]).then(allResults)

and everything is collected in an array for you.

Very relevant reading: Bluebird's Promise.all() method when one promise is dependent on another

But my API doesn't return promises!

I hope I have you on board with promises by now, but you're still stuck with this signature:

ogs(options, function (err, results) {...})

We'd like to convert it into something like:

ogsAsync(options) -> Promise

It's simple enough to do manually using the Promise constructor:

function ogsAsync(options) {
    return new Promise((resolve, reject) => {
        ogs(options, (err, results) => {
            if (err) {
                reject(err);
            } else {
                resolve(results);
            }
        });
    });
}

But you probably don't need to, because looks like your library already returns a promise, so you can call osg(options) directly - it already returns a promise. Yay!

But just in case you have to work with a library that doesn't offer promises yet (like redis or much of node standard libs), Bluebird provides a nice utility to automatically wrap callback-style tasks into promise-style tasks.

Hope that helps!

Herodias answered 23/7, 2016 at 11:31 Comment(1)
Thank you for the detailed and informative answer. I need time to understand your answer. and I am sorry to reply late.Norite
M
0

So what I normally do is setup a kind of 'bootstrap' function as the first one in my waterfall, that takes the params in and starts the chain passing them forward.

function start(params){
  params = params || {}; // make sure you have at least an empty object here
  return function(callback){
     // do something
     callback(null, params); // error is always the first cb param for most things
  }
}

function second(params, callback){
   // do something else. Maybe extend the params object
   params.newProp = "foo";
   callback(null, params);
}

// later, maybe in another module

async.waterfall([
  start({foo : 'bar'}),
  second
],
  function result(e, res){
    // handle result 
  });
Mendel answered 22/7, 2016 at 12:59 Comment(4)
Thanks Paul for your commnet. But When i use OGS Module, It didn't work, What is the error in below code –Norite
You should update the question with the new code rather than make it an answer. Also, describe better what you mean "doesn't work". Nothing happens at all? You get an error? From the OGS docs it looks like you should be passing an object as the first param, not just the URL string.Mendel
I update the Error Message. Thank you for your appreciation.Norite
once again, the params.url that you're passing to ogs should just be the params object, not the URL.Mendel

© 2022 - 2024 — McMap. All rights reserved.