You could add an extra method to StreamReader (the source code of e.g. Mono could be used for that purpose):
private StringBuilder lineBuilder;
public int RegexBufferSize
{
set { lastRegexMatchedLength = value; }
get { return lastRegexMatchedLength; }
}
private int lastRegexMatchedLength = 0;
public virtual string ReadRegex(Regex regex)
{
if (base_stream == null)
throw new ObjectDisposedException("StreamReader", "Cannot read from a closed RegexStreamReader");
if (pos >= decoded_count && ReadBuffer() == 0)
return null; // EOF Reached
if (lineBuilder == null)
lineBuilder = new StringBuilder();
else
lineBuilder.Length = 0;
lineBuilder.Append(decoded_buffer, pos, decoded_count - pos);
int bytesRead = ReadBuffer();
bool dataTested = false;
while (bytesRead > 0)
{
var lineBuilderStartLen = lineBuilder.Length;
dataTested = false;
lineBuilder.Append(decoded_buffer, 0, bytesRead);
if (lineBuilder.Length >= lastRegexMatchedLength)
{
var currentBuf = lineBuilder.ToString();
var match = regex.Match(currentBuf, 0, currentBuf.Length);
if (match.Success)
{
var offset = match.Index + match.Length;
pos = 0;
decoded_count = lineBuilder.Length - offset;
ensureMinDecodedBufLen(decoded_count);
lineBuilder.CopyTo(offset, decoded_buffer, 0, decoded_count);
var matchedString = currentBuf.Substring(match.Index, match.Length);
return matchedString;
}
else
{
lastRegexMatchedLength *= (int) 1.1; // allow for more space before attempting to match
dataTested = true;
}
}
bytesRead = ReadBuffer();
}
// EOF reached
if (!dataTested)
{
var currentBuf = lineBuilder.ToString();
var match = regex.Match(currentBuf, 0, currentBuf.Length);
if (match.Success)
{
var offset = match.Index + match.Length;
pos = 0;
decoded_count = lineBuilder.Length - offset;
ensureMinDecodedBufLen(decoded_count);
lineBuilder.CopyTo(offset, decoded_buffer, 0, decoded_count);
var matchedString = currentBuf.Substring(match.Index, match.Length);
return matchedString;
}
}
pos = decoded_count;
return null;
}
In the above method, the following vars are used:
- decoded_buffer : the char buffer that contains/will contain the data read
- pos: offset within the array containing unhandled data
- decoded_count: the last element within the buffer containing read data
- RegexBufferSize: the minimum size of the regex input before any matching occurs.
The method ReadBuffer() needs to read data from the stream.
The method ensureMinDecodedBufLen() needs to make sure that the decoded_buffer is large enough.
When calling the method, pass the Regex that needs to be matched against.