chunk/split a string in Javascript without breaking words
Asked Answered
O

5

16

Good day,

I would like to know if there is an easy way to chunk/split a string without breaking the words.

Eg:

var input = "Lorem ipsum dolor sit amet, consectetur  adipiscing elit. Proin placerat, nisi nec vulputate scelerisque, metus lectus ultricies massa, et luctus elit libero eu erat. Fusce vitae sem lacus, eu ullamcorper lectus. Lorem ipsum dolor sit amet, consectetur adipiscing elit.";

Should return an array like this if I break at 80 characters long:

var output = ["Lorem ipsum dolor sit amet, consectetur  adipiscing elit. Proin placerat, nisi",
"nec vulputate scelerisque, metus lectus ultricies massa, et luctus elit libero",
"eu erat. Fusce vitae sem lacus, eu ullamcorper lectus. Lorem ipsum dolor sit",
"amet, consectetur adipiscing elit."];

I found that really nice piece of code:

//http://phpjs.org/functions/chunk_split:369
function chunk_split (body, chunklen, end) {
    // Returns split line  
    // 
    // version: 1103.1210
    // discuss at: http://phpjs.org/functions/chunk_split
    // +   original by: Paulo Freitas
    // +      input by: Brett Zamir (http://brett-zamir.me)
    // +   bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
    // +   improved by: Theriault
    // *     example 1: chunk_split('Hello world!', 1, '*');
    // *     returns 1: 'H*e*l*l*o* *w*o*r*l*d*!*'
    // *     example 2: chunk_split('Hello world!', 10, '*');
    // *     returns 2: 'Hello worl*d!*'
    chunklen = parseInt(chunklen, 10) || 76;
    end = end || '\r\n';

    if (chunklen < 1) {
        return false;
    }

    return body.match(new RegExp(".{0," + chunklen + "}", "g")).join(end);
}

But I really doubt I can modify it so words aren't broken in half. Any tips?

Thank you!

Overview answered 9/7, 2011 at 3:29 Comment(1)
strings are arrays so you can test if string[80] is a character (not a space) if yes then 81,82 .... and so onVitovitoria
A
9

Something like this?

var n = 80;

while (n) { 
    if (input[n++] == ' ') { 
        break;  
    } 
}

output = input.substring(0,n).split(' ');
console.log(output);

UPDATED

Now that I re-read the question, here's an updated solution:

var len = 80;
var curr = len;
var prev = 0;

output = [];

while (input[curr]) {
    if (input[curr++] == ' ') {
        output.push(input.substring(prev,curr));
        prev = curr;
        curr += len;
    }
}
output.push(input.substr(prev));  
Approver answered 9/7, 2011 at 4:41 Comment(4)
When I saw your small piece of codes I was happy for such a small way to do it. Unfortunatly I can't make it work.Overview
Sorry, I misunderstood your question. :( Modified my answer though and it's still small. :)Approver
not sure what i'm missing here but this doesn't break as requestedCursive
Thank you, I'd tried to find something similar. Currently working with google apps-script, and it's haven't css :(Gorman
J
14

Here's some brute force code that will do it:

function splitIntoLines(input, len) {
    var i;
    var output = [];
    var lineSoFar = "";
    var temp;
    var words = input.split(' ');
    for (i = 0; i < words.length;) {
        // check if adding this word would exceed the len
        temp = addWordOntoLine(lineSoFar, words[i]);
        if (temp.length > len) {
            if (lineSoFar.length == 0) {
                lineSoFar = temp;     // force to put at least one word in each line
                i++;                  // skip past this word now
            }
            output.push(lineSoFar);   // put line into output
            lineSoFar = "";           // init back to empty
        } else {
            lineSoFar = temp;         // take the new word
            i++;                      // skip past this word now
        }
    }
    if (lineSoFar.length > 0) {
        output.push(lineSoFar);
    }
    return(output);
}

function addWordOntoLine(line, word) {
    if (line.length != 0) {
        line += " ";
    }
    return(line += word);
}

If this routine encounters a single word longer than the desired line length, it will put it on a line by itself and will not break it up.

You can play with it here: http://jsfiddle.net/jfriend00/fbaLe/

Jamie answered 9/7, 2011 at 4:37 Comment(2)
Only one of these answers which worked for me out the box (was the last one I tried)Armet
Works as expected. This allows one to split a large text into chunks of size n.Koren
A
9

Something like this?

var n = 80;

while (n) { 
    if (input[n++] == ' ') { 
        break;  
    } 
}

output = input.substring(0,n).split(' ');
console.log(output);

UPDATED

Now that I re-read the question, here's an updated solution:

var len = 80;
var curr = len;
var prev = 0;

output = [];

while (input[curr]) {
    if (input[curr++] == ' ') {
        output.push(input.substring(prev,curr));
        prev = curr;
        curr += len;
    }
}
output.push(input.substr(prev));  
Approver answered 9/7, 2011 at 4:41 Comment(4)
When I saw your small piece of codes I was happy for such a small way to do it. Unfortunatly I can't make it work.Overview
Sorry, I misunderstood your question. :( Modified my answer though and it's still small. :)Approver
not sure what i'm missing here but this doesn't break as requestedCursive
Thank you, I'd tried to find something similar. Currently working with google apps-script, and it's haven't css :(Gorman
V
9

This builds on @steve's answer but will split the string respecting word break so that the string is never longer than the specified length. This works more like a normal word wrap.

function chunkString(s, len)
{
    var curr = len, prev = 0;

    output = [];

    while(s[curr]) {
      if(s[curr++] == ' ') {
        output.push(s.substring(prev,curr));
        prev = curr;
        curr += len;
      }
      else
      {
        var currReverse = curr;
        do {
            if(s.substring(currReverse - 1, currReverse) == ' ')
            {
                output.push(s.substring(prev,currReverse));
                prev = currReverse;
                curr = currReverse + len;
                break;
            }
            currReverse--;
        } while(currReverse > prev)
      }
    }
    output.push(s.substr(prev)); 
    return output;
}
Vaud answered 3/7, 2013 at 17:39 Comment(0)
H
8

Thank to orourkedd, it iwas very useful. I just updated it with splitting.

private chunkString(str, len) {
    let input = str.trim().split(' ');
    let [index, output] = [0, []]
    output[index] = '';
    input.forEach(word => {
        let temp = `${output[index]} ${word}`.trim()
        if (temp.length <= len) {
            output[index] = temp;
        } else {
            index++;
            output[index] = word;
        }
    })
    return output
}
Heredity answered 13/3, 2019 at 1:41 Comment(0)
S
0

Let's compare some approaches:

var text = 'This is a long sentence. Another long for a test!!!'

// 1. Not working good. Cuts last word!
var first =  text.match(/\b(\w+\W+\w+\W+\w+\W+)/g);

// 2. Works better? but may cause problems with another texts!
var second = text.match(/[a-zA-Z]+(?:[^a-zA-Z]+[a-zA-Z]+){0,3}([^a-zA-Z]*){0,}/g);

// 3. Working solution, but needs more CPU ant time
var third = [];
var words = text.split(" ");

const chunkSize = 3;
for (let i = 0; i < words.length; i += chunkSize) {
    const chunk = words.slice(i, i + chunkSize);
    third.push(chunk.join(" "));
}

// 4. Best solution for DRY compliance
const chunk = (arr, size) =>
  Array.from({ length: Math.ceil(arr.length / size) }, (v, i) =>
    arr.slice(i * size, i * size + size)
  );

function str_split(text, chunkSize=3, glue='<br/>')
{
    var res = [];
    
    chunk(text.split(" "), chunkSize).forEach((ch) => {
      res.push(ch.join(" "));
    })
    
    return res.join(glue);
}

console.log(first.join("<br/>"));
console.log(second.join("<br/>"));
console.log(third.join("<br/>"));
console.log(str_split(text));
Shuttle answered 18/4 at 10:8 Comment(0)

© 2022 - 2024 — McMap. All rights reserved.