/EDIT: I've rewritten the whole posting.
Below is a pretty complete solution to the VB highlighting problem. If SO has got nothing better, please use it. VB syntax highlighting is definitely wanted.
I've also added a code example with some complex code literals that gets highlighted correctly. However, I haven't even tried to get XLinq right. Might still work, though. The keywords list is taken from the MSDN. Contextual keywords are not included. Did you know the GetXmlNamespace
operator?
The algorithm knows literal type characters. It should also be able to handle identifier type characters but I haven't tested these. Note that the code works on HTML. As a consequence, &, < and > are required to be read as named (!) entities, not single characters.
Sorry for the long regex.
var highlightVB = function(code) {
var regex = /("(?:""|[^"])+"c?)|('.*$)|#.+?#|(&[HO])?\d+(\.\d*)?(e[+-]?\d+)?U?([SILDFR%@!#]|&)?|\.\d+[FR!#]?|\s+|\w+|&|<|>|([-+*/\\^$@!#%&<>()\[\]{}.,:=]+)/gi;
var lines = code.split("\n");
for (var i = 0; i < lines.length; i++) {
var line = lines[i];
var tokens;
var result = "";
while (tokens = regex.exec(line)) {
var tok = getToken(tokens);
switch (tok.charAt(0)) {
case '"':
if (tok.charAt(tok.length - 1) == "c")
result += span("char", tok);
else
result += span("string", tok);
break;
case "'":
result += span("comment", tok);
break;
case '#':
result += span("date", tok);
break;
default:
var c1 = tok.charAt(0);
if (isDigit(c1) ||
tok.length > 1 && c1 == '.' && isDigit(tok.charAt(1)) ||
tok.length > 5 && (tok.indexOf("&") == 0 &&
tok.charAt(5) == 'H' || tok.charAt(5) == 'O')
)
result += span("number", tok);
else if (isKeyword(tok))
result += span("keyword", tok);
else
result += tok;
break;
}
}
lines[i] = result;
}
return lines.join("\n");
}
var keywords = [
"addhandler", "addressof", "alias", "and", "andalso", "as", "boolean", "byref",
"byte", "byval", "call", "case", "catch", "cbool", "cbyte", "cchar", "cdate",
"cdec", "cdbl", "char", "cint", "class", "clng", "cobj", "const", "continue",
"csbyte", "cshort", "csng", "cstr", "ctype", "cuint", "culng", "cushort", "date",
"decimal", "declare", "default", "delegate", "dim", "directcast", "do", "double",
"each", "else", "elseif", "end", "endif", "enum", "erase", "error", "event",
"exit", "false", "finally", "for", "friend", "function", "get", "gettype",
"getxmlnamespace", "global", "gosub", "goto", "handles", "if", "if",
"implements", "imports", "in", "inherits", "integer", "interface", "is", "isnot",
"let", "lib", "like", "long", "loop", "me", "mod", "module", "mustinherit",
"mustoverride", "mybase", "myclass", "namespace", "narrowing", "new", "next",
"not", "nothing", "notinheritable", "notoverridable", "object", "of", "on",
"operator", "option", "optional", "or", "orelse", "overloads", "overridable",
"overrides", "paramarray", "partial", "private", "property", "protected",
"public", "raiseevent", "readonly", "redim", "rem", "removehandler", "resume",
"return", "sbyte", "select", "set", "shadows", "shared", "short", "single",
"static", "step", "stop", "string", "structure", "sub", "synclock", "then",
"throw", "to", "true", "try", "trycast", "typeof", "variant", "wend", "uinteger",
"ulong", "ushort", "using", "when", "while", "widening", "with", "withevents",
"writeonly", "xor", "#const", "#else", "#elseif", "#end", "#if"
]
var isKeyword = function(token) {
return keywords.indexOf(token.toLowerCase()) != -1;
}
var isDigit = function(c) {
return c >= '0' && c <= '9';
}
var getToken = function(tokens) {
for (var i = 0; i < tokens.length; i++)
if (tokens[i] != undefined)
return tokens[i];
return null;
}
var span = function(class, text) {
return "<span class=\"" + class + "\">" + text + "</span>";
}
Code for testing:
Protected Sub Page_Load(ByVal sender As Object, ByVal e As EventArgs) Handles Me.Load
'set page title
Page.Title = "Something"
Dim r As String = "Say ""Hello"""
Dim i As Integer = 1234
Dim d As Double = 1.23
Dim s As Single = .123F
Dim l As Long = 123L
Dim ul As ULong = 123UL
Dim c As Char = "x"c
Dim h As Integer = &H0
Dim t As Date = #5/31/1993 1:15:30 PM#
Dim f As Single = 1.32e-5F
End Sub
<!-- language: lang-vb -->
– Edmondson