Specifically for Transact-SQL (Microsoft SQL Server) you can use the Microsoft.SqlServer.Management.SqlParser.Parser
namespace available in Microsoft.SqlServer.Management.SqlParser.dll, an assembly included with SQL Server and which can be freely distributed.
Here's an example method for parsing T-SQL as a string into a sequence of tokens:
IEnumerable<TokenInfo> ParseSql(string sql)
{
ParseOptions parseOptions = new ParseOptions();
Scanner scanner = new Scanner(parseOptions);
int state = 0,
start,
end,
lastTokenEnd = -1,
token;
bool isPairMatch, isExecAutoParamHelp;
List<TokenInfo> tokens = new List<TokenInfo>();
scanner.SetSource(sql, 0);
while ((token = scanner.GetNext(ref state, out start, out end, out isPairMatch, out isExecAutoParamHelp)) != (int)Tokens.EOF)
{
TokenInfo tokenInfo =
new TokenInfo()
{
Start = start,
End = end,
IsPairMatch = isPairMatch,
IsExecAutoParamHelp = isExecAutoParamHelp,
Sql = sql.Substring(start, end - start + 1),
Token = (Tokens)token,
};
tokens.Add(tokenInfo);
lastTokenEnd = end;
}
return tokens;
}
Note that the TokenInfo
class is just a simple class with the above-referenced properties.
Tokens
is this enumeration:
and includes constants like TOKEN_BEGIN
, TOKEN_COMMIT
, TOKEN_EXISTS
, etc.
UPDATE: it is now a separate nuget: https://www.nuget.org/packages/Microsoft.SqlServer.Management.SqlParser