What is the right way to split a string into a vector of strings? Delimiter is space or comma.
For space separated strings, then you can do this:
std::string s = "What is the right way to split a string into a vector of strings";
std::stringstream ss(s);
std::istream_iterator<std::string> begin(ss);
std::istream_iterator<std::string> end;
std::vector<std::string> vstrings(begin, end);
std::copy(vstrings.begin(), vstrings.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
Output:
What
is
the
right
way
to
split
a
string
into
a
vector
of
strings
string that have both comma and space
struct tokens: std::ctype<char>
{
tokens(): std::ctype<char>(get_table()) {}
static std::ctype_base::mask const* get_table()
{
typedef std::ctype<char> cctype;
static const cctype::mask *const_rc= cctype::classic_table();
static cctype::mask rc[cctype::table_size];
std::memcpy(rc, const_rc, cctype::table_size * sizeof(cctype::mask));
rc[','] = std::ctype_base::space;
rc[' '] = std::ctype_base::space;
return &rc[0];
}
};
std::string s = "right way, wrong way, correct way";
std::stringstream ss(s);
ss.imbue(std::locale(std::locale(), new tokens()));
std::istream_iterator<std::string> begin(ss);
std::istream_iterator<std::string> end;
std::vector<std::string> vstrings(begin, end);
std::copy(vstrings.begin(), vstrings.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
Output:
right
way
wrong
way
correct
way
std::vector<std::string> vstrings(begin, end);
would be nicer IMO, but I suppose we don't know whether the questioner is constructing the vector, or hoping to populate a pre-existing vector. –
Cocytus istream_iterator
but why not finish strong using ostream_iterator
as well? –
Injunction for
loop with a call to std::copy
. –
Cystocele ss.imbue(std::locale(std::locale(), new tokens()))
somewhere? –
Dancer auto loc = std::make_shared<tokens>()
, and then pass ss.imbue(..., loc.get()));
. That should work. –
Changeless A convenient way would be boost's string algorithms library.
#include <boost/algorithm/string/classification.hpp> // Include boost::for is_any_of
#include <boost/algorithm/string/split.hpp> // Include for boost::split
// ...
std::vector<std::string> words;
std::string s;
boost::split(words, s, boost::is_any_of(", "), boost::token_compress_on);
token_compress_on
for? –
Panay eCompress
(the fourth argument) is set to token_compress_on
, adjacent separators are merged together. Otherwise, every two separators delimit a token. boost.org/doc/libs/1_49_0/doc/html/boost/algorithm/… –
Countersign For space separated strings, then you can do this:
std::string s = "What is the right way to split a string into a vector of strings";
std::stringstream ss(s);
std::istream_iterator<std::string> begin(ss);
std::istream_iterator<std::string> end;
std::vector<std::string> vstrings(begin, end);
std::copy(vstrings.begin(), vstrings.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
Output:
What
is
the
right
way
to
split
a
string
into
a
vector
of
strings
string that have both comma and space
struct tokens: std::ctype<char>
{
tokens(): std::ctype<char>(get_table()) {}
static std::ctype_base::mask const* get_table()
{
typedef std::ctype<char> cctype;
static const cctype::mask *const_rc= cctype::classic_table();
static cctype::mask rc[cctype::table_size];
std::memcpy(rc, const_rc, cctype::table_size * sizeof(cctype::mask));
rc[','] = std::ctype_base::space;
rc[' '] = std::ctype_base::space;
return &rc[0];
}
};
std::string s = "right way, wrong way, correct way";
std::stringstream ss(s);
ss.imbue(std::locale(std::locale(), new tokens()));
std::istream_iterator<std::string> begin(ss);
std::istream_iterator<std::string> end;
std::vector<std::string> vstrings(begin, end);
std::copy(vstrings.begin(), vstrings.end(), std::ostream_iterator<std::string>(std::cout, "\n"));
Output:
right
way
wrong
way
correct
way
std::vector<std::string> vstrings(begin, end);
would be nicer IMO, but I suppose we don't know whether the questioner is constructing the vector, or hoping to populate a pre-existing vector. –
Cocytus istream_iterator
but why not finish strong using ostream_iterator
as well? –
Injunction for
loop with a call to std::copy
. –
Cystocele ss.imbue(std::locale(std::locale(), new tokens()))
somewhere? –
Dancer auto loc = std::make_shared<tokens>()
, and then pass ss.imbue(..., loc.get()));
. That should work. –
Changeless You can use getline with delimiter:
string s, tmp;
stringstream ss(s);
vector<string> words;
while(getline(ss, tmp, ',')){
words.push_back(tmp);
.....
}
vector<string> split(string str, string token){
vector<string>result;
while(str.size()){
int index = str.find(token);
if(index!=string::npos){
result.push_back(str.substr(0,index));
str = str.substr(index+token.size());
if(str.size()==0)result.push_back(str);
}else{
result.push_back(str);
str = "";
}
}
return result;
}
split("1,2,3",",") ==> ["1","2","3"]
split("1,2,",",") ==> ["1","2",""]
split("1token2token3","token") ==> ["1","2","3"]
break;
instead of str = "";
as it unnecessarily adds empty string in the result if token is not found. split("1234", ",") ==> ["1234", ""] –
Hollandia If the string has both spaces and commas you can use the string class function
found_index = myString.find_first_of(delims_str, begin_index)
in a loop. Checking for != npos and inserting into a vector. If you prefer old school you can also use C's
strtok()
method.
std::vector<std::string> split(std::string text, char delim) {
std::string line;
std::vector<std::string> vec;
std::stringstream ss(text);
while(std::getline(ss, line, delim)) {
vec.push_back(line);
}
return vec;
}
split("String will be split", ' ')
-> {"String", "will", "be", "split"}
split("Hello, how are you?", ',')
-> {"Hello", "how are you?"}
EDIT: Here's a thing I made, this can use multi-char delimiters, albeit I'm not 100% sure if it always works:
std::vector<std::string> split(std::string text, std::string delim) {
std::vector<std::string> vec;
size_t pos = 0, prevPos = 0;
while (1) {
pos = text.find(delim, prevPos);
if (pos == std::string::npos) {
vec.push_back(text.substr(prevPos));
return vec;
}
vec.push_back(text.substr(prevPos, pos - prevPos));
prevPos = pos + delim.length();
}
}
Tweaked version from Techie Delight:
#include <string>
#include <vector>
std::vector<std::string> split(const std::string& str, char delim) {
std::vector<std::string> strings;
size_t start;
size_t end = 0;
while ((start = str.find_first_not_of(delim, end)) != std::string::npos) {
end = str.find(delim, start);
strings.push_back(str.substr(start, end - start));
}
return strings;
}
Here is my variant that work somelike as explode function in PHP, we provide given string and delimiters list.
std::vector< std::string > explode(const std::string& data, const std::string& delimiters) {
auto is_delim = [&](auto & c) { return delimiters.find(c) != std::string::npos; };
std::vector< std::string > result;
for (std::string::size_type i(0), len(data.length()), pos(0); i <= len; i++) {
if (is_delim(data[i]) || i == len) {
auto tok = data.substr(pos, i - pos);
if ( !tok.empty() )
result.push_back( tok );
pos = i + 1;
}
} return result;
}
example of usage
std::string test_delimiters("hello, there is lots of, delimiters, that may be even together, ");
auto dem_res = explode(test_delimiters, " ,"); // space or comma
for (auto word : dem_res) {
std::cout << word << '\n';
} std::cout << "end\n";
the ouput:
hello
there
is
lots
of
delimiters
that
may
be
even
together
end
i made this custom function that will convert the line to vector
#include <iostream>
#include <vector>
#include <ctime>
#include <string>
using namespace std;
int main(){
string line;
getline(cin, line);
int len = line.length();
vector<string> subArray;
for (int j = 0, k = 0; j < len; j++) {
if (line[j] == ' ') {
string ch = line.substr(k, j - k);
k = j+1;
subArray.push_back(ch);
}
if (j == len - 1) {
string ch = line.substr(k, j - k+1);
subArray.push_back(ch);
}
}
return 0;
}
Here is a modified version of roach's solution that splits based on a string of single character delimiters + supports the option to compress duplicate delimiters.
std::vector<std::string> split(std::string text, std::string delim, bool compress)
{
std::vector<std::string> vec;
size_t pos = 0, prevPos = 0;
while (1)
{
pos = text.find_first_of(delim, prevPos);
while(compress)
{
if( prevPos == pos )
prevPos++;
else
break;
pos = text.find_first_of(delim, prevPos);
}
if (pos == std::string::npos) {
if(prevPos != text.size())
vec.push_back(text.substr(prevPos));
return vec;
}
vec.push_back(text.substr(prevPos, pos - prevPos));
prevPos = pos + 1;
}
}
Example without compress:
std::string s = " 1.2 foo@foo . ";
auto res = split(s, ".@ ", false);
for(auto i : res)
std::cout << "string {" << i << "}" << std::endl;
Output:
string {}
string {}
string {1}
string {2}
string {}
string {foo}
string {foo}
string {}
string {}
With compress split(s, ".@ ", true);
string {1}
string {2}
string {foo}
string {foo}
Here's a function that will split up a string
into a vector
but it doesn't include empty strings in the output vector
.
vector<string> split(string str, string token) {
vector<string> result;
while (str.size()) {
int index = str.find(token);
string substr;
if ((substr = str.substr(0, index)) == "") {
str = str.substr(index + token.size());
} else if (index != string::npos) {
result.push_back(substr);
str = str.substr(index + token.size());
} else {
result.push_back(str);
str = "";
}
}
return result;
}
Note: The above was adapted from this answer.
Usage
void test() {
string a = "hello : world : ok : fine";
auto r = split(a, " : ", 2);
for (auto e: r) {
cout << e << endl;
}
}
static inline std::vector<std::string> split(const std::string &str, const std::string &delimiter = " ", const int max_elements = 0) {
std::vector<std::string> tokens;
std::string::size_type start_index = 0;
while (true) {
std::string::size_type next_index = str.find(delimiter, start_index);
if (next_index == std::string::npos) {
tokens.push_back(str.substr(start_index));
break;
} else {
tokens.push_back(str.substr(start_index, next_index - start_index));
start_index = next_index + delimiter.length();
}
if (max_elements > 0 && tokens.size() == max_elements - 1) {
tokens.push_back(str.substr(start_index));
break;
}
}
return tokens;
}
© 2022 - 2024 — McMap. All rights reserved.