Edit: This answer was made with older .NET versions. I bet that .NET 8 now has more efficient ways of handling this. Just a heads up :)
Building on Henks answer I have created some test methods with his answer and some added, more optimized, methods. I found the results differ based on the size of the input string. Therefore, I have tested with two result sets. In the fastest method, the linked source has a even faster way. But, since it is characterized as unsafe I have left this out.
Long input string results:
- InPlaceCharArray: 2021 ms (Sunsetquest's answer) - (Original source)
- String split then join: 4277ms (Kernowcode's answer)
- String reader: 6082 ms
- LINQ using native char.IsWhitespace: 7357 ms
- LINQ: 7746 ms (Henk's answer)
- ForLoop: 32320 ms
- RegexCompiled: 37157 ms
- Regex: 42940 ms
Short input string results:
- InPlaceCharArray: 108 ms (Sunsetquest's answer) - (Original source)
- String split then join: 294 ms (Kernowcode's answer)
- String reader: 327 ms
- ForLoop: 343 ms
- LINQ using native char.IsWhitespace: 624 ms
- LINQ: 645ms (Henk's answer)
- RegexCompiled: 1671 ms
- Regex: 2599 ms
Code:
public class RemoveWhitespace
{
public static string RemoveStringReader(string input)
{
var s = new StringBuilder(input.Length); // (input.Length);
using (var reader = new StringReader(input))
{
int i = 0;
char c;
for (; i < input.Length; i++)
{
c = (char)reader.Read();
if (!char.IsWhiteSpace(c))
{
s.Append(c);
}
}
}
return s.ToString();
}
public static string RemoveLinqNativeCharIsWhitespace(string input)
{
return new string(input.ToCharArray()
.Where(c => !char.IsWhiteSpace(c))
.ToArray());
}
public static string RemoveLinq(string input)
{
return new string(input.ToCharArray()
.Where(c => !Char.IsWhiteSpace(c))
.ToArray());
}
public static string RemoveRegex(string input)
{
return Regex.Replace(input, @"\s+", "");
}
private static Regex compiled = new Regex(@"\s+", RegexOptions.Compiled);
public static string RemoveRegexCompiled(string input)
{
return compiled.Replace(input, "");
}
public static string RemoveForLoop(string input)
{
for (int i = input.Length - 1; i >= 0; i--)
{
if (char.IsWhiteSpace(input[i]))
{
input = input.Remove(i, 1);
}
}
return input;
}
public static string StringSplitThenJoin(this string str)
{
return string.Join("", str.Split(default(string[]), StringSplitOptions.RemoveEmptyEntries));
}
public static string RemoveInPlaceCharArray(string input)
{
var len = input.Length;
var src = input.ToCharArray();
int dstIdx = 0;
for (int i = 0; i < len; i++)
{
var ch = src[i];
switch (ch)
{
case '\u0020':
case '\u00A0':
case '\u1680':
case '\u2000':
case '\u2001':
case '\u2002':
case '\u2003':
case '\u2004':
case '\u2005':
case '\u2006':
case '\u2007':
case '\u2008':
case '\u2009':
case '\u200A':
case '\u202F':
case '\u205F':
case '\u3000':
case '\u2028':
case '\u2029':
case '\u0009':
case '\u000A':
case '\u000B':
case '\u000C':
case '\u000D':
case '\u0085':
continue;
default:
src[dstIdx++] = ch;
break;
}
}
return new string(src, 0, dstIdx);
}
}
Tests:
[TestFixture]
public class Test
{
// Short input
//private const string input = "123 123 \t 1adc \n 222";
//private const string expected = "1231231adc222";
// Long input
private const string input = "123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222123 123 \t 1adc \n 222";
private const string expected = "1231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc2221231231adc222";
private const int iterations = 1000000;
[Test]
public void RemoveInPlaceCharArray()
{
string s = null;
var stopwatch = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
s = RemoveWhitespace.RemoveInPlaceCharArray(input);
}
stopwatch.Stop();
Console.WriteLine("InPlaceCharArray: " + stopwatch.ElapsedMilliseconds + " ms");
Assert.AreEqual(expected, s);
}
[Test]
public void RemoveStringReader()
{
string s = null;
var stopwatch = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
s = RemoveWhitespace.RemoveStringReader(input);
}
stopwatch.Stop();
Console.WriteLine("String reader: " + stopwatch.ElapsedMilliseconds + " ms");
Assert.AreEqual(expected, s);
}
[Test]
public void RemoveLinqNativeCharIsWhitespace()
{
string s = null;
var stopwatch = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
s = RemoveWhitespace.RemoveLinqNativeCharIsWhitespace(input);
}
stopwatch.Stop();
Console.WriteLine("LINQ using native char.IsWhitespace: " + stopwatch.ElapsedMilliseconds + " ms");
Assert.AreEqual(expected, s);
}
[Test]
public void RemoveLinq()
{
string s = null;
var stopwatch = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
s = RemoveWhitespace.RemoveLinq(input);
}
stopwatch.Stop();
Console.WriteLine("LINQ: " + stopwatch.ElapsedMilliseconds + " ms");
Assert.AreEqual(expected, s);
}
[Test]
public void RemoveRegex()
{
string s = null;
var stopwatch = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
s = RemoveWhitespace.RemoveRegex(input);
}
stopwatch.Stop();
Console.WriteLine("Regex: " + stopwatch.ElapsedMilliseconds + " ms");
Assert.AreEqual(expected, s);
}
[Test]
public void RemoveRegexCompiled()
{
string s = null;
var stopwatch = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
s = RemoveWhitespace.RemoveRegexCompiled(input);
}
stopwatch.Stop();
Console.WriteLine("RegexCompiled: " + stopwatch.ElapsedMilliseconds + " ms");
Assert.AreEqual(expected, s);
}
[Test]
public void RemoveForLoop()
{
string s = null;
var stopwatch = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
s = RemoveWhitespace.RemoveForLoop(input);
}
stopwatch.Stop();
Console.WriteLine("ForLoop: " + stopwatch.ElapsedMilliseconds + " ms");
Assert.AreEqual(expected, s);
}
[TestMethod]
public void StringSplitThenJoin()
{
string s = null;
var stopwatch = Stopwatch.StartNew();
for (int i = 0; i < iterations; i++)
{
s = RemoveWhitespace.StringSplitThenJoin(input);
}
stopwatch.Stop();
Console.WriteLine("StringSplitThenJoin: " + stopwatch.ElapsedMilliseconds + " ms");
Assert.AreEqual(expected, s);
}
}
Edit: Tested a nice one liner from Kernowcode.