How can I debug my flex/bison grammar?
Asked Answered
R

1

9

This is a very silly problem. There are no errors in the grammar rules afaik but its not giving the right output. I have been staring at it but the mistake is not visible to me.

What tools are available to me to help me see what is going on in a parse? My attempts to insert tracing code are a lot of work and don't seem to be helping me much.

parser.y

%{
#include<stdio.h>
#include<stdlib.h>  
#include<string.h>
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"

int yyparse(void);
int yylex(void);
extern char* yytext;
extern FILE * yyin;
extern int tableSize;

FILE *logout;
extern int line_count;
extern char *arr[100];
extern char *final_arr[100];

SymbolTable *table;

void yyerror (const char *s)
{
    fprintf(stderr,"%s\n",s);
    return;
}

%}

%union {
    class SymbolInfo* sym;
    char *s;
    float f;
}

%error-verbose
%verbose
%token COMMA INT ID SEMICOLON FLOAT VOID LCURL RCURL RETURN NOT IF FOR WHILE PRINTLN LPAREN RPAREN
%token CONST_INT CONST_FLOAT LTHIRD RTHIRD 
%token ADDOP MULOP INCOP DECOP RELOP LOGICOP ASSIGNOP

%token <f> DOUBLE
//%expect 1

%precedence THEN
%precedence ELSE

%left "<" ">" "<=" ">=" "=" "!="
%left "+" "-"
%left "*" "/"
%left UMINUS 


%%

start : program     {   printf("start -> program\n");
                        fprintf(logout,"%d : start ->  program\n",line_count);
                    }
      ;

program : program unit {
                            printf("program -> program unit\n");
                            fprintf(logout,"%d : program -> program unit\n\n",line_count);
                            for(int j = 0; final_arr[j] != NULL; j++)
                            {
                                fprintf(logout,"%s",final_arr[j]);
                            }
                                fprintf(logout,"\n\n");
                        }
        | unit          {
                            printf("program -> unit\n");
                            fprintf(logout,"%d : program -> unit\n\n",line_count);
                            for(int j = 0; final_arr[j] != NULL; j++)
                            {
                                fprintf(logout,"%s",final_arr[j]);
                            }
                                fprintf(logout,"\n\n");

                        }
        ;

unit : var_dec  {
                    printf("unit -> var_dec\n");
                    fprintf(logout,"%d : unit -> var_dec\n\n",line_count);
                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

                }
                |func_declaration {

                fprintf(logout,"%d : unit -> func_declaration\n\n",line_count);
                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");
                }
                |func_definition {

                fprintf(logout,"%d : unit -> func_definition\n\n",line_count);
                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

                }
                ;

     ;

func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON     {

                printf("func_declaration -> type_specifier id LPAREN parameter_list RPAREN SEMICOLON\n");
                fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count);
                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

        }
        | type_specifier ID LPAREN RPAREN SEMICOLON {
                printf("func_declaration -> type_specifier id LPAREN RPAREN SEMICOLON\n");
                fprintf(logout,"%d : func_declaration : type_specifier ID LPAREN parameter_list RPAREN SEMICOLON\n\n", line_count); 

                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");


        }
        ;

func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement {
                printf("func_definition -> type_specifier ID LPAREN parameter_list RPAREN compound_statement\n");
                fprintf(logout,"%d : func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement\n\n", line_count); 

        }
        | type_specifier ID LPAREN RPAREN compound_statement {
                printf("func_definition -> type_specifier id LPAREN RPAREN compound_statement\n");
                fprintf(logout,"%d : func_definition : type_specifier ID LPAREN RPAREN compound_statement\n\n", line_count);    

        }
        ;               


parameter_list  : parameter_list COMMA type_specifier ID {

                printf("parameter_list -> parameter_list COMMA type_specifier ID\n");
                fprintf(logout,"%d : parameter_list  : parameter_list COMMA type_specifier ID\n\n", line_count);    
                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

        }
        | parameter_list COMMA type_specifier {
                printf("parameter_list -> parameter_list COMMA type_specifier\n");
                fprintf(logout,"%d : parameter_list  : parameter_list COMMA type_specifier\n\n", line_count);   

        }
        | type_specifier ID {
                printf("parameter_list -> type_specifier ID\n");
                fprintf(logout,"%d : parameter_list : type_specifier ID\n\n", line_count);  
                for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");
        }
        | type_specifier {
                printf("parameter_list -> type_specifier\n");
                fprintf(logout,"%d :  parameter_list : type_specifier \n\n", line_count);   

        }
        ;


compound_statement : LCURL statements RCURL {
    printf("compound_statement -> LCURL statements RCURL\n");
    fprintf(logout,"compound_statement : LCURL statements RCURL\n\n");
}
            | LCURL RCURL
            ;

var_dec: type_specifier declaration_list SEMICOLON {

                    printf("var_dec -> type_specifier declaration_list SEMICOLON \n");
                    fprintf(logout,"%d : var_dec: type_specifier declaration_list SEMICOLON \n\n", line_count);

                    for(int j = 0; arr[j] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                    fprintf(logout,"\n\n");

            }
        ;            

type_specifier : INT    {printf("type_specifier -> INT\n");
                            fprintf(logout,"%d : type_specifier-> INT\n\n%s\n\n", line_count,yytext);
                        }
               | FLOAT  {printf("type_specifier ->FLOAT\n");
                            fprintf(logout,"%d : type_specifier-> FLOAT\n\n%s\n\n",line_count, yytext);

                        }
               | VOID   {printf("type_specifier -> VOID\n");
                            fprintf(logout,"%d : type_specifier-> VOID\n\n%s\n\n",line_count, yytext);

                         }
               ;        

declaration_list : declaration_list COMMA ID {

                        printf("declaration_list -> declaration_list COMMA ID\n");  
                        fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID\n\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");
                       }
                 | declaration_list COMMA ID LTHIRD CONST_INT RTHIRD {

                        printf("declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n");      
                        fprintf(logout,"%d : declaration_list -> declaration_list COMMA ID LTHIRD CONST_INT RTHIRD\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");

                        }
                 |ID    {
                        printf("declaration_list -> ID\n");
                        fprintf(logout,"%d : declaration_list -> ID\n\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");
                        }
                 |ID LTHIRD CONST_INT RTHIRD {

                        printf("declaration_list -> ID LTHIRD CONST_INT RTHIRD\n"); 
                        fprintf(logout,"%d : declaration_list -> ID LTHIRD CONST_INT RTHIRD\n",line_count);
                        for(int j = 1; arr[j+1] != NULL; j++)
                        {
                            fprintf(logout,"%s",arr[j]);
                        }
                            fprintf(logout,"\n\n");

                        }
                 ;  

statements : statement {
    printf("statements -> statement\n");
    fprintf(logout,"%d : statements : statement\n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
}
       | statements statement
       ;

statement : var_dec
      | expression_statement
      | compound_statement
      | FOR LPAREN expression_statement expression_statement expression RPAREN statement
      | IF LPAREN expression RPAREN statement
      | WHILE LPAREN expression RPAREN statement
      | PRINTLN LPAREN ID RPAREN SEMICOLON
      | RETURN expression SEMICOLON  {
            printf("statement -> RETURN expression SEMICOLON\n");
            fprintf(logout,"%d : statement : RETURN expression SEMICOLON\n\n",line_count);
            fprintf(logout, "%s\n\n",yytext);
      }
      ;

expression_statement    : SEMICOLON         
            | expression SEMICOLON 
            ;

variable : ID   {
                    printf("variable -> ID\n");
                    fprintf(logout,"%d : variable : ID\n\n",line_count);
                    fprintf(logout, "%s\n\n",yytext);
}   
     | ID LTHIRD expression RTHIRD 
     ;

 expression : logic_expression  {
        printf("expression -> logic_expression\n");
        fprintf(logout,"%d : expression : logic_expression\n\n",line_count);
        fprintf(logout, "%s\n\n",yytext);
 }
       | variable ASSIGNOP logic_expression     
       ;

logic_expression : rel_expression   
         | rel_expression LOGICOP rel_expression    
         ;

rel_expression  : simple_expression {
    printf("rel_expression  -> simple_expression \n");
    fprintf(logout,"%d : rel_expression : simple_expression\n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
}
        | simple_expression RELOP simple_expression 
        ;

simple_expression : term {
    printf("simple_expression -> term\n");
    fprintf(logout,"%d : simple_expression : term \n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
} 
          | simple_expression ADDOP term {
            printf("simple_expression -> simple_expression ADDOP term\n");
            fprintf(logout,"simple_expression : simple_expression ADDOP term \n\n");
            fprintf(logout, "%s\n\n",yytext);
          }
          ;

term :  unary_expression {
                printf("term -> unary_expression\n");
                fprintf(logout,"%d : term : unary_expression\n\n",line_count);
                fprintf(logout, "%s\n\n",yytext);
            }
     |  term MULOP unary_expression
     ;

unary_expression : ADDOP unary_expression  
         | NOT unary_expression 
         | factor {
            printf("unary_expression -> factor\n");
            fprintf(logout,"%d : unary_expression : factor\n\n",line_count);
            fprintf(logout, "%s\n\n",yytext);
         }
         ;

factor  : variable {
    printf("factor -> variable\n");
    fprintf(logout,"%d : factor : variable\n\n",line_count);
    fprintf(logout, "%s\n\n",yytext);
}
    | ID LPAREN argument_list RPAREN
    | LPAREN expression RPAREN
    | CONST_INT 
    | CONST_FLOAT
    | variable INCOP 
    | variable DECOP
    ;

argument_list : arguments
              |
              ;

arguments : arguments COMMA logic_expression
          | logic_expression
          ;




%%

int main(int argc, char *argv[])
{

    FILE *fp  ;
    int token = 0;
    if((fp = fopen(argv[1],"r")) == NULL)
    {
        fprintf(logout,"cannot open file");
        exit(1);
    }


    logout = fopen("log.txt","w");

    yyin = fp;
    yyparse();

    fclose(fp);
    fclose(logout);
    return 0;

}

input.txt

int var(int a, int b){
return a+b;

}

output I'm getting :

type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term
rel_expression  -> simple_expression 
expression -> logic_expression
syntax error, unexpected ID, expecting SEMICOLON

expected output is :

type_specifier -> INT
type_specifier -> INT
parameter_list -> type_specifier ID
type_specifier -> INT
parameter_list -> parameter_list COMMA type_specifier ID
variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression -> term

variable -> ID
factor -> variable
unary_expression -> factor
term -> unary_expression
simple_expression : simple_expression ADDOP term
rel_expression  -> simple_expression 
logic_expression : rel_expression
expression -> logic_expression
statement : RETURN expression SEMICOLON
statements : statement
compound_statement : LCURL statements RCURL
func_definition : type_specifier ID LPAREN parameter_list RPAREN compound_statement
unit : func_definition
program : program unit
start : program

Adding the flex file just in case

%option noyywrap

%{

#include<stdlib.h>
#include<stdio.h>
#include "y.tab.h"
#include "SymbolTable.h"
#include "SymbolInfo.h"
#include "ScopeTable.h"

void yyerror (char *);
extern YYSTYPE yylval;  
extern SymbolTable *table;
extern FILE *logout;
char *arr[100];
char *final_arr[100];

int k; //final_arr count
int i = 0; //arr count
int line_count = 1;

%}


id [a-z]*
DOUBLE (([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+)) 
newline \n

%%

{newline} {
        arr[i] = "\n",final_arr[k] = arr[i];
        i++; k++;
        line_count++;
    }

[ \t]+  {}
(([0-9]+(\.[0-9]*)?)|([0-9]*\.[0-9]+))  {
                        yylval.f = atof(yytext);
                        return DOUBLE;
                    }

"int" {
        memset(&arr,NULL,sizeof(arr)); i = 0;
        arr[i] = "int "; 
        final_arr[k] = "int ";
        i++; k++;
        return INT;
    }
"float" {
        memset(&arr,NULL,sizeof(arr)); i = 0;
        arr[i] = "float "; final_arr[k] = "float ";
        i++; k++;
        return FLOAT;
    }
"void"  {
        memset(&arr,NULL,sizeof(arr)); i = 0;
        arr[i] = "void "; final_arr[k] = "void ";
        i++; k++;
        return VOID;
    }   


";" {
        arr[i] = ";";final_arr[k] = ";";
        i++; k++;
        return SEMICOLON;}
"," {
        arr[i] = ","; final_arr[k] = ",";
        i++; k++;
        return COMMA;
    }
"(" {
        arr[i] = "(";final_arr[k] = "(";
        i++; k++;
        return LPAREN;}
")" {
        arr[i] = ")";final_arr[k] = ")";
        i++; k++;
        return RPAREN;}
"{" {return LCURL;}
"}" {return RCURL;}

{id}    {
        yylval.s = strdup(yytext);
        arr[i] = strdup(yytext); final_arr[k] = strdup(yytext);
        k++; i++;
        for(int j = 1; arr[j] != NULL; j++)
        {
            //fprintf(logout,"%s", arr[j]);
            //fprintf(logout,"arr [%d] %s\n ",j,arr[j]);
        }
        //fprintf(logout,"\n\n");
        return ID;

        }

%%                          
Rabassa answered 12/6, 2018 at 9:12 Comment(6)
Program questions are off-topic here. Debugging is probably off-topic in most sites on the network.Knepper
where should i post it thenRabassa
@Rabassa Sorry but "Please debug my code" is off-topic everywhere on Stack Exchange.Dextrorse
@afsara_ben: I've tried to make this question more general so that it would be appropriate for SO, and requested the moderators to move it there. In the meantime, there are several hints in my answer about what you did wrong -- and, more importantly, about how you can help yourself to debug your code.Committal
@Committal Yeah, I deleted my comment on your answer after I saw your comment here on the question. Thanks for doing something constructive and useful with the question!Dextrorse
Neither return nor + is handled by the lexical parser.Eckel
C
18

You seem to have spent an awful lot of effort trying to implement a way of tracing what's going on in your parser, and to little effect since the problem here is simply a missing lexer keyword rule.

You would be much better off using the built-in debugging features of flex and bison. Then your grammar and lexer would be much simpler and easier to read, and the debugging output would be more complete (and would let you trace the behaviour through the state table).

Here's a quick summary. It's a snap, really.

  1. Add --debug to your bison command. That will cause bison to generate code to trace your parse. (If you're lazy, you can use -t -- for trace -- which is the Posix standard command-line option, and should also work with yacc, byacc, btyacc, etc., etc.)

  2. Add the following three lines at the beginning of main, assuming that main is in your .y file:

    #ifdef YYDEBUG
      yydebug = 1;
    #endif
    

    For additional bonus points, you could make this assignment conditional on some command line flag.

    Once you do that, you will receive the following trace output:

    ... snip ... Pick up the trace at the ) at the end of the parameter list
    Reading a token: Next token is token RPAREN ()
    Shifting token RPAREN ()
    Entering state 28
    Reading a token: Next token is token LCURL ()
    Shifting token LCURL ()
    Entering state 25
    Reading a token: Next token is token ID ()
    Shifting token ID ()
    Entering state 44
    Reading a token: Next token is token ID ()
    ... snip ...
    

    Note that two IDs were returned after the curly bracket, corresponding to the tokens return and a.

  3. You can also enable tracing in flex with flex --debug (or -d). This causes the scanner to produce an output line of the form

    --accepting rule at line 85 ("return")
    

    for every accepted token (and some other lines). You need to check the line numbers against your source code, unfortunately, but in this case you might have noticed the similarity between the above and

    --accepting rule at line 85 ("b")
    

    For additional debugging simplicity, it's worth getting into the habit writing your scanner in a way that it can be compiled independently of the parser. Then you can test your scanner by compiling it separately using the main() implementation in -lfl.

References and more debugging information:

Committal answered 12/6, 2018 at 14:43 Comment(3)
How to use yydebug when bison parse is reentrant ?Cattalo
@linrongbin: as far as I know, yydebug is not affected by reentrancy; it is still a global variable. Did you encounter some problem using it?Committal
Incredible analysis! Thank you @Committal Very helpfulRobrobaina

© 2022 - 2024 — McMap. All rights reserved.