I tried to improve an old routine that was parsing a text file (1.5GB). The routine was pretty dumb, and it was building a string like this: s:= s+ buff[i];
So, I thought that TStringBuilder will add significant speed improvements. It turned out that it was actually 114% slower.
So, I built mu own StringBuilder which is 184.82 times (yes 184!!!!!!) faster than the classic s:= s+ chr (experiment on a 4MB string) and even faster than TStringBuilder.
Tests:
Classic s:= s + c
Time: 8502 ms
procedure TfrmTester.btnClassicClick(Sender: TObject);
VAR
s: string;
FileBody: string;
c: Cardinal;
i: Integer;
begin
FileBody:= ReadFile(File4MB);
c:= GetTickCount;
for i:= 1 to Length(FileBody) DO
s:= s+ FileBody[i];
Log.Lines.Add('Time: '+ IntToStr(GetTickCount-c) + 'ms'); // 8502 ms
end;
Prebuffered
Time:
BuffSize= 10000; // 10k buffer = 406ms
BuffSize= 100000; // 100k buffer = 140ms
BuffSize= 1000000; // 1M buffer = 46ms
Code:
procedure TfrmTester.btnBufferedClick(Sender: TObject);
VAR
s: string;
FileBody: string;
c: Cardinal;
CurBuffLen, marker, i: Integer;
begin
FileBody:= ReadFile(File4MB);
c:= GetTickCount;
marker:= 1;
CurBuffLen:= 0;
for i:= 1 to Length(FileBody) DO
begin
if i > CurBuffLen then
begin
SetLength(s, CurBuffLen+ BuffSize);
CurBuffLen:= Length(s)
end;
s[marker]:= FileBody[i];
Inc(marker);
end;
SetLength(s, marker-1); { Cut down the prealocated buffer that we haven't used }
Log.Lines.Add('Time: '+ IntToStr(GetTickCount-c) + 'ms');
if s <> FileBody
then Log.Lines.Add('FAILED!');
end;
Prebuffered, as class
Time:
BuffSize= 10000; // 10k buffer = 437ms
BuffSize= 100000; // 100k buffer = 187ms
BuffSize= 1000000; // 1M buffer = 78ms
Code:
procedure TfrmTester.btnBuffClassClick(Sender: TObject);
VAR
StringBuff: TCStringBuff;
s: string;
FileBody: string;
c: Cardinal;
i: Integer;
begin
FileBody:= ReadFile(File4MB);
c:= GetTickCount;
StringBuff:= TCStringBuff.Create(BuffSize);
TRY
for i:= 1 to Length(FileBody) DO
StringBuff.AddChar(filebody[i]);
s:= StringBuff.GetResult;
FINALLY
FreeAndNil(StringBuff);
END;
Log.Lines.Add('Time: '+ IntToStr(GetTickCount-c) + 'ms');
if s <> FileBody
then Log.Lines.Add('FAILED!');
end;
And this is the class:
{ TCStringBuff }
constructor TCStringBuff.Create(aBuffSize: Integer= 10000);
begin
BuffSize:= aBuffSize;
marker:= 1;
CurBuffLen:= 0;
inp:= 1;
end;
function TCStringBuff.GetResult: string;
begin
SetLength(s, marker-1); { Cut down the prealocated buffer that we haven't used }
Result:= s;
s:= ''; { Free memory }
end;
procedure TCStringBuff.AddChar(Ch: Char);
begin
if inp > CurBuffLen then
begin
SetLength(s, CurBuffLen+ BuffSize);
CurBuffLen:= Length(s)
end;
s[marker]:= Ch;
Inc(marker);
Inc(inp);
end;
Conclusion:
Stop using s:= s + c if you have large (over 10K) strings. It might be true even if you have small strings but you do it often (for example, you have a function that is doing some string processing on a small string, but you call it often).
_
PS: You may also want to see this: https://www.delphitools.info/2013/10/30/efficient-string-building-in-delphi/2/