Downloading images with node.js [closed]
Asked Answered
H

7

229

I'm trying to write a script to download images using node.js. This is what I have so far:

var maxLength = 10 // 10mb
var download = function(uri, callback) {
  http.request(uri)
    .on('response', function(res) {
      if (res.headers['content-length'] > maxLength*1024*1024) {
        callback(new Error('Image too large.'))
      } else if (!~[200, 304].indexOf(res.statusCode)) {
        callback(new Error('Received an invalid status code.'))
      } else if (!res.headers['content-type'].match(/image/)) {
        callback(new Error('Not an image.'))
      } else {
        var body = ''
        res.setEncoding('binary')
        res
          .on('error', function(err) {
            callback(err)
          })
          .on('data', function(chunk) {
            body += chunk
          })
          .on('end', function() {
            // What about Windows?!
            var path = '/tmp/' + Math.random().toString().split('.').pop()
            fs.writeFile(path, body, 'binary', function(err) {
              callback(err, path)
            })
          })
      }
    })
    .on('error', function(err) {
      callback(err)
    })
    .end();
}

I, however, want to make this more robust:

  1. Are there libraries that do this and do this better?
  2. Is there a chance that response headers lie (about length, about content type)?
  3. Are there any other status codes I should care about? Should I bother with redirects?
  4. I think I read somewhere that binary encoding is going to be deprecated. What do I do then?
  5. How can I get this to work on windows?
  6. Any other ways you can make this script better?

Why: for a feature similar to imgur where users can give me a URL, I download that image, and rehost the image in multiple sizes.

Hemotherapy answered 5/10, 2012 at 6:25 Comment(0)
C
475

I'd suggest using the request module. Downloading a file is as simple as the following code:

var fs = require('fs'),
    request = require('request');

var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){
    console.log('content-type:', res.headers['content-type']);
    console.log('content-length:', res.headers['content-length']);

    request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
  });
};

download('https://www.google.com/images/srpr/logo3w.png', 'google.png', function(){
  console.log('done');
});
Cuccuckold answered 5/10, 2012 at 18:1 Comment(14)
Cool! Is there a way to check size and content type before actually downloading it?Hemotherapy
Sure is, request.head. I edited my example to show the content-type and content-length before downloading the file.Cuccuckold
Where does it download the images to?Fondea
Awesome to build automatic shields.io badges!Pierian
@CezaryWojtkowski, I'm getting error message "socket hang up", do you have any good idea how to solve this? Thanks Or.Dobsonfly
Not working for me( Image corruptedExocentric
@Fondea its download the image to your root directory.Handal
Can you change the location of where they are saved? If you wanted them in a specific folder?Daube
If you want to do this with promises/fancy async-await syntax use the request-promise module. E.g. let fileContents = await request(myUrl)Scarecrow
Thanks, works in Chrome for downloading images from the web!Bespread
Can you download a image to a html <img> element?Hanford
request is deprecated.Tablecloth
To choose specific folder you need to change 'google.png' for 'path/to/google.png'. The system understand google.png as ./google.pngServal
How I can make this asynchronous?Abstain
M
62

You can use Axios (a promise-based HTTP client for Node.js) to download images in the order of your choosing in an asynchronous environment:

npm i axios

Then, you can use the following basic example to begin downloading images:

const fs = require('fs');
const axios = require('axios');

/* ============================================================
  Function: Download Image
============================================================ */

const download_image = (url, image_path) =>
  axios({
    url,
    responseType: 'stream',
  }).then(
    response =>
      new Promise((resolve, reject) => {
        response.data
          .pipe(fs.createWriteStream(image_path))
          .on('finish', () => resolve())
          .on('error', e => reject(e));
      }),
  );

/* ============================================================
  Download Images in Order
============================================================ */

(async () => {
  let example_image_1 = await download_image('https://example.com/test-1.png', 'example-1.png');

  console.log(example_image_1.status); // true
  console.log(example_image_1.error); // ''

  let example_image_2 = await download_image('https://example.com/does-not-exist.png', 'example-2.png');

  console.log(example_image_2.status); // false
  console.log(example_image_2.error); // 'Error: Request failed with status code 404'

  let example_image_3 = await download_image('https://example.com/test-3.png', 'example-3.png');

  console.log(example_image_3.status); // true
  console.log(example_image_3.error); // ''
})();
Martyrdom answered 1/8, 2018 at 1:39 Comment(7)
Great example ! But barely readable code, try the standard style :DTautog
@Tautog I prefer semicolons. ;)Martyrdom
You really should attach 'finish' and 'error' events to the write stream, wrap them in a Promise and return the promise. Otherwise you may try to access an image that hasn't been completely downloaded yet.Geri
Wouldn't the await would make sure the image downloads completely before trying to access? @GeriXi
@Geri @Xi I've updated the function download_image to capture the 'finish' and 'error' event for the returned promiseHenghold
Don't think this is working, copied the example but nope...Timorous
example_image_1 is always undefined and no error or info to troubleshoot.Ayakoayala
P
55

I ran into this problem some days ago, for a pure NodeJS answer I would suggest using Stream to merge the chunks together.

var http = require('http'),                                                
    Stream = require('stream').Transform,                                  
    fs = require('fs');                                                    

var url = 'http://www.google.com/images/srpr/logo11w.png';                    

http.request(url, function(response) {                                        
  var data = new Stream();                                                    

  response.on('data', function(chunk) {                                       
    data.push(chunk);                                                         
  });                                                                         

  response.on('end', function() {                                             
    fs.writeFileSync('image.png', data.read());                               
  });                                                                         
}).end();

The newest Node versions won't work well with binary strings, so merging chunks with strings is not a good idea when working with binary data.

*Just be careful when using 'data.read()', it will empty the stream for the next 'read()' operation. If you want to use it more than once, store it somewhere.

Psalms answered 19/6, 2015 at 13:46 Comment(7)
Why not stream the download directly to disk?Wilkerson
had a lot of problems with chunking strings together as it created a corrupt file, but this did itLacefield
Alternatively you can use an array for data, and replace data.read() with Buffer.concat(data). This way we don't need to import the stream moduleMidday
For https://... urls, use the https module, as seen in @chandan-chhajer's answer.Sniffle
How do you check for errors using this? I'm downloading a file which ends up being corrupted somehow... trying to debug this. Is setting the encoding a possible solution?Urbas
Thanks everyone. Used Nihey's answer as main code. Used ternary operator plus some simple regex to determine whether to use http or https like Venryx mentioned. Used Ching Chang's answer to avoid importing Stream, and it worked perfectly. :)Description
Property 'push' does not exist on type 'Stream'.Battalion
N
13
const fs = require('fs');
const http = require('http');
const https = require('https');

const downloadImageToUrl = (url, filename) => {

   let client = http;
   if (url.toString().indexOf("https") === 0) {
      client = https;
   }
   return new Promise((resolve, reject) => {
      client.get(url, (res) => {
          res.pipe(fs.createWriteStream(filename))
          .on('error', reject)
          .once('close', () => resolve(filename))
      })
  })
};

await downloadImageToUrl('https://www.google.com/images/srpr/logo11w.png', 'public/uploads/users/abc.jpg');
Nidifugous answered 6/4, 2018 at 7:13 Comment(1)
your function doesn't trigger the callbackSlat
B
10

if you want progress download try this:

var fs = require('fs');
var request = require('request');
var progress = require('request-progress');

module.exports = function (uri, path, onProgress, onResponse, onError, onEnd) {
    progress(request(uri))
    .on('progress', onProgress)
    .on('response', onResponse)
    .on('error', onError)
    .on('end', onEnd)
    .pipe(fs.createWriteStream(path))
};

how to use:

  var download = require('../lib/download');
  download("https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png", "~/download/logo.png", function (state) {
            console.log("progress", state);
        }, function (response) {
            console.log("status code", response.statusCode);
        }, function (error) {
            console.log("error", error);
        }, function () {
            console.log("done");
        });

note: you should install both request & request-progress modules using:

npm install request request-progress --save
Barbiturate answered 1/4, 2016 at 5:25 Comment(2)
This worked great, but wanted to suggest adding a statusCode check. A 500 statusCode for example, will not hit the 'on("error", e). By adding a on('response', (response) => console.error(response.statusCode)) it greatly facilitates debugging,Ornithic
You can edit my answer :)Barbiturate
B
8

This is an extension to Cezary's answer. If you want to download it to a specific directory, use this. Also, use const instead of var. Its safe this way.

const fs = require('fs');
const request = require('request');
var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){    
    request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
  });
};

download('https://www.google.com/images/srpr/logo3w.png', './images/google.png', function(){
  console.log('done');
});
Broncobuster answered 18/6, 2018 at 16:6 Comment(0)
M
5

Building on the above, if anyone needs to handle errors in the write/read streams, I used this version. Note the stream.read() in case of a write error, it's required so we can finish reading and trigger close on the read stream.

var download = function(uri, filename, callback){
  request.head(uri, function(err, res, body){
    if (err) callback(err, filename);
    else {
        var stream = request(uri);
        stream.pipe(
            fs.createWriteStream(filename)
                .on('error', function(err){
                    callback(error, filename);
                    stream.read();
                })
            )
        .on('close', function() {
            callback(null, filename);
        });
    }
  });
};
Marlysmarmaduke answered 2/5, 2015 at 14:8 Comment(1)
stream.read() appears to be outdated, throws an error not a functionNellynelms

© 2022 - 2024 — McMap. All rights reserved.