Imagine you have many long text files, and you need to only extract data from the first line of each one (without reading any further content). What is the best way in Node JS to do it?
Thanks!
Imagine you have many long text files, and you need to only extract data from the first line of each one (without reading any further content). What is the best way in Node JS to do it?
Thanks!
I ended up adopting this solution, which seems the most performant I've seen so far:
var fs = require('fs');
var Q = require('q');
function readFirstLine (path) {
return Q.promise(function (resolve, reject) {
var rs = fs.createReadStream(path, {encoding: 'utf8'});
var acc = '';
var pos = 0;
var index;
rs
.on('data', function (chunk) {
index = chunk.indexOf('\n');
acc += chunk;
index !== -1 ? rs.close() : pos += chunk.length;
})
.on('close', function () {
resolve(acc.slice(0, pos + index));
})
.on('error', function (err) {
reject(err);
})
});
}
I created a npm module for convenience, named "firstline".
Thanks to @dandavis for the suggestion to use String.prototype.slice()
!
acc.slice(0,acc.indexOf("\n"))
on the close event instead of splitting the whole 64kb+, or even to somehow pass the index from the data event (plus the length of acc) for bare-metal efficiency. –
Mintun data
event would occur before rs.close()
. This changed the value of index
before the close
event, so it caused a bug where you may not get the full first line, I had to wrap the on data function with an index check to make sure it was still undefined. –
Strength ts
and node 18 I get Error: Cannot find module 'q'
. –
Nope There's a built-in module almost for this case - readline
. It avoids messing with chunks and so forth. The code would look like the following:
const fs = require('fs');
const readline = require('readline');
async function getFirstLine(pathToFile) {
const readable = fs.createReadStream(pathToFile);
const reader = readline.createInterface({ input: readable });
const line = await new Promise((resolve) => {
reader.on('line', (line) => {
reader.close();
resolve(line);
});
});
readable.close();
return line;
}
readable.close()
immediately after reader.close()
. This would avoid bringing the await logic which is quite heavy when transpiled. But this is nitpicking at this point. The main concern remains that if there is no line to read, this will hang forever. –
Exeunt I ended up adopting this solution, which seems the most performant I've seen so far:
var fs = require('fs');
var Q = require('q');
function readFirstLine (path) {
return Q.promise(function (resolve, reject) {
var rs = fs.createReadStream(path, {encoding: 'utf8'});
var acc = '';
var pos = 0;
var index;
rs
.on('data', function (chunk) {
index = chunk.indexOf('\n');
acc += chunk;
index !== -1 ? rs.close() : pos += chunk.length;
})
.on('close', function () {
resolve(acc.slice(0, pos + index));
})
.on('error', function (err) {
reject(err);
})
});
}
I created a npm module for convenience, named "firstline".
Thanks to @dandavis for the suggestion to use String.prototype.slice()
!
acc.slice(0,acc.indexOf("\n"))
on the close event instead of splitting the whole 64kb+, or even to somehow pass the index from the data event (plus the length of acc) for bare-metal efficiency. –
Mintun data
event would occur before rs.close()
. This changed the value of index
before the close
event, so it caused a bug where you may not get the full first line, I had to wrap the on data function with an index check to make sure it was still undefined. –
Strength ts
and node 18 I get Error: Cannot find module 'q'
. –
Nope I know this doesn't exactly answer the question but for those who are looking for a READABLE and simple way to do so:
const fs = require('fs').promises;
async function getFirstLine(filePath) {
const fileContent = await fs.readFile(filePath, 'utf-8');
return (fileContent.match(/(^.*)/) || [])[1] || '';
}
NOTE:
OR
conditions`or complex matches) and only reads the first lineTypeError [ERR_INVALID_ARG_TYPE]: The "cb" argument must be of type function. Received type string ('utf-8')
–
Nope fs.readFile
, it's a callback, but you won't need it if you are await
ing it –
Toughminded promises
to the fs object. Now I get TypeError: fileContent.match is not a function
, I use a ts file. –
Nope console.log
to see fileContent
variable XD I bet you forgot the await and you are trying to .match()
a promise. This could be a side conversation, first try to learn how to debug your code :) –
Toughminded In all current versions of Node.js, readline.createInterface
can be used as an async iterable, to read a file line by line - or just for the first line. This is also safe to use with empty files.
Unfortunately, the error handling logic is broken in versions of Node.js before 16, where certain file system errors may go uncaught even if the code is wrapped in a try-catch block because of the way asynchronous errors are propagated in streams. So I recommend using this method only in Node.js >= 16.
import { createReadStream } from "fs";
import { createInterface } from "readline";
async function readFirstLine(path) {
const inputStream = createReadStream(path);
try {
for await (const line of createInterface(inputStream)) return line;
return ''; // If the file is empty.
}
finally {
inputStream.destroy(); // Destroy file stream.
}
}
const firstLine = await readFirstLine("path/to/file");
Promise{pending}
. Can you show how to then catch the Promise to show the file content? –
Nope Please try this:
https://github.com/yinrong/node-line-stream-util#get-head-lines
It unpipe the upstream once got the head lines.
//Here you go;
var lineReader = require('line-reader');
var async = require('async');
exports.readManyFiles = function(files) {
async.map(files,
function(file, callback))
lineReader.open(file, function(reader) {
if (reader.hasNextLine()) {
reader.nextLine(function(line) {
callback(null,line);
});
}
});
},
function(err, allLines) {
//do whatever you want to with the lines
})
}
© 2022 - 2024 — McMap. All rights reserved.
head
command on the files from within node? use the features of the file/operating system for what they're good for. :) – Reorganizehead
is not available on Windows, for example. – Tonicityhead
command, by ant chance do you know how it is internally implemented? – Tonicityhead
, have a look here. – Tonicity