jison start conditions with json format
Asked Answered
J

1

6

Despite long search in documentation and forums, I still fail to get the right syntax for Jison start condition using JSON format in node.js

> ** Documentation at http://zaach.github.io/jison/docs/ says:
> // Using the JSON format, start conditions are defined with an array
> // before the rule’s 
> matcher {rules:[
>     [['expect'], '[0-9]+"."[0-9]+', 'console.log( "found a float, = " + yytext );'
>     ]]}

But unfortunately no one not provides a full working sample.

I'm trying to exclude any text that is in between two tags. In lex would use start conditions. Jison documentation says it should works. Nevertheless as Jison error messages are not very intuitive, I would be please to find a working sample to move forward.

Would any one have the solution ?

var jison    = require("jison").Parser;

grammar = {  
    "lex": {
        "rules" : [ [" +" , "/* skip whitespace */"]
            ,[['mode1'], '[0-z]+\\b'        , "return 'INFO';"]
            ,[['mode1'], '<\\/extensions>'  , "this.popState(); return 'EXTEND';"]
            ,['<extensions>'                , "this.begin('mode1'); return 'EXTSTART';"]
            ,['$'                           , "return 'EOL';"]
        ]
    },  // end Lex rules

    "bnf": { // WARNING: only one space in between TOKEN ex: "STOP EOF"
        'data': [["EOL"      , "this.cmd='EMPTY'    ; return (this);"]           
           ,['EXTSTART INFO EXTEND EOL'  ,"this.cmd='EXTEN';this.value=$2;return (this);"]
           ]
    }};

  parser    = new jison(grammar);

  test= "\
    <extensions>\
      <opencpn:start></opencpn:start><opencpn:end></opencpn:end>\
      <opencpn:viz>1</opencpn:viz>\
     <opencpn:guid>714d1d6e-78be-46a0-af6e-2f3d0c505f6d</opencpn:guid>\
    </extensions>";

  data=parser.parse (test);

My current sample fail with

/node_modules/jison/node_modules/jison-lex/regexp-lexer.js:42 startConditions[conditions[k]].rules.push(i);

Jewess answered 17/9, 2014 at 11:37 Comment(0)
J
13

Unfortunately no one provided an answer to my question :)

I had to reverse engineering Jison Lexer code to find out the right syntax. As I suppose it may help others, here after the answer :)

Jison's lexical conditions work fine under JSON syntax, but they should be predeclared in an array named "startConditions" as in following example.

grammar = { 
   "lex": {

       "startConditions" : { "INITIAL":"// Default initial Jison/Lex context"
            ,"MOD_EXT": "// extenstions context "
            ,"MOD_RTE": "// routes context"
        },

        "rules" : [['[\\n\\s]+' , "/* skip whitespace & new lines */"]
            // extensions blocs
            ,[['INITIAL'], '<extensions>'     , "this.begin('MOD_EXT');"]
            ,[['MOD_EXT'], '<\\/extensions>'  , "this.popState();"]
            ,[['MOD_EXT'], '[<>\\/\\-\\s\\n]', "/* ignore */"]
            ,[['MOD_EXT'], '[0-z]+'           , "/* ignore */"]
        Etc...

In order to make everyone life easier, here after a simple working example.

// Sample JISON start conditions with Jason syntax
var jison    = require("jison").Parser;

grammar = { 

    "lex": {
         "macros": {  // few usefull macro
            "slash": "\\/",
            "space": "\\s+",
            "quot" : "\\\'",
            "dquot": "\\\"",
            "dot"  : "\\.",
            "digit": "[0-9]",
            "int"  : "-?([0-9]+)",
            "float": "-?([0-9]*\\.[0-9]+)",
            "hexa" : "([0-9]|(a-h)|(A-H])+"
        },

       "startConditions" : { "INITIAL":"// Default initial Jison/Lex context"
            ,"MOD_EXT": "// extenstions context "
            ,"MOD_RTE": "// routes context"
        },

        "rules" : [['[\\n\\s]+' , "/* skip whitespace & new lines */"]
            // extensions blocs
            ,[['INITIAL'], '<extensions>'     , "this.begin('MOD_EXT');"]
            ,[['MOD_EXT'], '<\\/extensions>'  , "this.popState();"]
            ,[['MOD_EXT'], '[<>\\/\\-\\s\\n]', "/* ignore */"]
            ,[['MOD_EXT'], '[0-z]+'           , "/* ignore */"]

            // routes points blocs
            ,[['INITIAL'],'<rtept' , "this.begin('MOD_RTE'); return 'RTE_BEG';"]
            ,[['MOD_RTE'], '<\\/rtept>'       , "this.popState(); return 'RTE_END';"]
            ,[['MOD_RTE'], 'lat='             , "return 'LAT';"]
            ,[['MOD_RTE'], 'lon='             , "return 'LON';"]
            ,[['MOD_RTE'], '{float}'          , "return 'CARD';"]
            ,[['MOD_RTE'], '<name>'           , "return 'NAME_BEG';"]
            ,[['MOD_RTE'], '<\\/name>'        , "return 'NAME_END';"]
            ,[['MOD_RTE'], '<time>'           , "return 'TIME_BEG';"]
            ,[['MOD_RTE'], '<\\/time>'        , "return 'TIME_END';"]
            ,[['MOD_RTE'], '<sym>'            , "return 'SYM_BEG';"]
            ,[['MOD_RTE'], '<\\/sym>'         , "return 'SYM_END';"]
            ,[['MOD_RTE'], '<type>'           , "return 'TYPE_BEG';"]
            ,[['MOD_RTE'], '<\\/type>'        , "return 'TYPE_END';"]
            ,[['MOD_RTE'], '<extensions>'     , "this.begin('MOD_EXT');"]
            ,[['MOD_RTE'], '([0-z]|[-+])+\\b' , "return 'TEXT';"]
            ,[['MOD_RTE'], '[>{quot}{dquot}{space}]' , "// ignore"]

            // end of parsing buffer
            ,['$'                            , "return 'EOL';"]
        ]
    },  // end Lex rules

    "bnf": { // WARNING: only one space in between TOKEN ex: "STOP EOF"
        'data': [
            ["EOL"           ,  "return ('EMPTY');"]
           ,["ROUTEPOINTS EOL", "return (this.route);"]
           ]

        // handle multiple waypoints
         // A routepoint should at least have a LAT+LONG+NAME
        ,'ROUTEPOINTS' : [ // store all waypoint in an array
            ["ROUTEPOINT", "console.log('Parsing First Waypts=%j',this.waypts);this.route=[]; this.route.push(this.waypts);"]
           ,["ROUTEPOINTS ROUTEPOINT", "console.log('Parsing Next  Waypts=%j',this.waypts);;this.route.push(this.waypts);"]

        ]

        // A routepoint should at least have a LAT+LONG+NAME
        ,'ROUTEPOINT' :  [ // <rtept lat='47.542780648' lon='-2.896743643'>...
            ["RTE_BEG LATITUDE LONGITUDE DATE NAME SYM TYPE RTE_END", "this.waypts={lat:$2,lon:$3,name:$5,date:$4};"]
           ,["RTE_BEG LATITUDE LONGITUDE DATE NAME RTE_END", "this.waypts={lat:$2,lon:$3,name:$5,date:$4};"]
           ,["RTE_BEG LATITUDE LONGITUDE NAME RTE_END", "this.waypts={lat:$2,lon:$3,name:$4,date:'unknow'};"]
        ]  
        // lat='47.542780648'
        ,'LATITUDE'    : [["LAT CARD", "$$=$2;"]]          
        //  lon='-2.896743643'
        , 'LONGITUDE'  : [["LON CARD", "$$=$2;"]]
        //<time>2014-09-16T21:55:19Z</time>\
        , 'DATE'       : [["TIME_BEG TEXT TIME_END", "$$=$2;"]]
        // <name>001</name>\
        , 'NAME'       : [["NAME_BEG TEXT NAME_END", "$$=$2;"]]
        // <sym>001</name>\
        , 'SYM'        : [["SYM_BEG TEXT SYM_END", "//ignore"]]
        // <name>001</name>\
        , 'TYPE'       : [["TYPE_BEG TEXT TYPE_END", "//ignore"]]

    }};


  parser    = new jison(grammar);

  test= "\
    <extensions>\
        <opencpn:start></opencpn:start><opencpn:end></opencpn:end>\
        <opencpn:viz>1</opencpn:viz>\
        <opencpn:guid>714d1d6e-78be-46a0-af6e-2f3d0c505f6d</opencpn:guid>\
    </extensions>\
    <rtept lat='47.542780648' lon='-2.896743643'>\
        <time>2014-09-16T21:55:19Z</time>\
        <name>001</name>\
        <sym>diamond</sym>\
        <type>WPT</type>\
        <extensions>\
            <opencpn:guid>408c309c-6a8c-411d-815b-0c0054646d45</opencpn:guid>\
            <opencpn:viz>1</opencpn:viz>\
            <opencpn:viz_name>0</opencpn:viz_name>\
            <opencpn:auto_name>1</opencpn:auto_name>\
        </extensions>\
    </rtept>\
    <rtept lat='44.542780648' lon='-4.896743643'>\
        <time>2014-08-16T21:55:19Z</time>\
        <name>002</name>\
    </rtept>\
    <rtept lat='43.542780648' lon='-5.896743643'>\
        <name>003</name>\
    </rtept>\
    <rtept lat='48.542780648' lon='-3.896743643'>\
        <time>2014-10-16T21:55:19Z</time>\
        <name>004</name>\
        <sym>diamond</sym>\
        <type>WPT</type>\
        <extensions>\
            <opencpn:guid>408c309c-6a8c-411d-815b-0c0054646d45</opencpn:guid>\
            <opencpn:viz>1</opencpn:viz>\
            <opencpn:viz_name>0</opencpn:viz_name>\
            <opencpn:auto_name>1</opencpn:auto_name>\
        </extensions>\
    </rtept>";

  route=parser.parse (test);

  console.log ("\n\nMy GPX route's waypoints");
  for (var waypts in route) {
      console.log (" -- name: %s  Lon: %s Lat:%s Date:%s", route [waypts].name, route [waypts].lat, route [waypts].lon, route [waypts].date);
  };

  console.log ("done");
Jewess answered 22/9, 2014 at 11:39 Comment(1)
why do not use xml parser for this task? but thanks for your explanation of your question :)Philipps

© 2022 - 2024 — McMap. All rights reserved.