I am writing a program in C# for extracting comments from code. I am using Roslyn compiler to do that. It's great, because I am just visiting the whole abstract syntax tree and fetching SingleLineComment trivia, MultiLineComment trivia and DocumentationComment trivia syntax from the file in solution. But there is a problem because programmers often write comments like that:
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
You can see that these are three single line comments, but I want them them to be fetched from code as one comment. Can I achieve that with Roslyn or maybe there is another way? Because that's frequent situation when programmers are writing multi line commments using single line comments syntax.
My code for extracting comments looks like this:
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using System.Collections.Generic;
namespace RoslynPlay
{
public class CommentStore
{
public List<Comment> Comments { get; } = new List<Comment>();
public void AddCommentTrivia(SyntaxTrivia trivia,
LocationStore commentLocationstore, string fileName)
{
if (trivia.Kind() == SyntaxKind.SingleLineCommentTrivia)
{
Comments.Add(new SingleLineComment(trivia.ToString(),
trivia.GetLocation().GetLineSpan().EndLinePosition.Line + 1, commentLocationstore)
{
FileName = fileName,
});
}
else if (trivia.Kind() == SyntaxKind.MultiLineCommentTrivia)
{
Comments.Add(new MultiLineComment(trivia.ToString(),
trivia.GetLocation().GetLineSpan().StartLinePosition.Line + 1,
trivia.GetLocation().GetLineSpan().EndLinePosition.Line + 1, commentLocationstore)
{
FileName = fileName,
});
}
}
public void AddCommentNode(DocumentationCommentTriviaSyntax node,
LocationStore commentLocationstore, string fileName)
{
Comments.Add(new DocComment(node.ToString(),
node.GetLocation().GetLineSpan().StartLinePosition.Line + 1,
node.GetLocation().GetLineSpan().EndLinePosition.Line,
commentLocationstore)
{
FileName = fileName,
});
}
}
}
and in main main file (Program.cs) I am launching comment extraction from code like this:
string fileContent;
SyntaxTree tree;
SyntaxNode root;
CommentsWalker commentWalker;
MethodsAndClassesWalker methodWalker;
string[] files = Directory.GetFiles(projectPath, $"*.cs", SearchOption.AllDirectories);
var commentStore = new CommentStore();
Console.WriteLine("Reading files...");
ProgressBar progressBar = new ProgressBar(files.Length);
foreach (var file in files)
{
fileContent = File.ReadAllText(file);
string filePath = new Regex($@"{projectPath}\\(.*)$").Match(file).Groups[1].ToString();
tree = CSharpSyntaxTree.ParseText(fileContent);
root = tree.GetRoot();
commentWalker = new CommentsWalker(filePath, commentStore);
commentWalker.Visit(root);
progressBar.UpdateAndDisplay();
}
and here is also the comment walker:
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
namespace RoslynPlay
{
public class CommentsWalker : CSharpSyntaxWalker
{
private string _fileName;
private CommentStore _commentStore;
public CommentsWalker(string fileName,
CommentStore commentStore)
: base(SyntaxWalkerDepth.StructuredTrivia)
{
_fileName = fileName;
_commentStore = commentStore;
}
public override void VisitTrivia(SyntaxTrivia trivia)
{
if (trivia.Kind() == SyntaxKind.SingleLineCommentTrivia
|| trivia.Kind() == SyntaxKind.MultiLineCommentTrivia)
{
_commentStore.AddCommentTrivia(trivia, _commentLocationStore, _fileName);
}
base.VisitTrivia(trivia);
}
public override void VisitDocumentationCommentTrivia(DocumentationCommentTriviaSyntax node)
{
_commentStore.AddCommentNode(node, _commentLocationStore, _fileName);
base.VisitDocumentationCommentTrivia(node);
}
}
}
And the problem is because trivia.Kind() == SyntaxKind.SingleLineCommentTrivia extracts only single line of comments, but I want to extract single line comments blocks as one comment.