From 89bd70dbbfbbbbc42ffc06ee6850017f81b572aa Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Fri, 9 Oct 2020 13:31:57 +0200 Subject: [PATCH] [ WIP #302 ] isUsedCat distinguishes between Internal and Parsable Haskell: prevent internal user token types to become part of the lexer. TODO: exclude them also from being mentioned in the parser (currently broken). Lexer and Parser should share a common functionality that makes sure they do not get out of sync. --- source/src/BNFC/Backend/Agda.hs | 2 +- source/src/BNFC/Backend/C.hs | 10 +++---- source/src/BNFC/Backend/C/CFtoBisonC.hs | 2 +- source/src/BNFC/Backend/C/CFtoFlexC.hs | 2 +- source/src/BNFC/Backend/CPP/NoSTL.hs | 10 +++---- source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs | 2 +- source/src/BNFC/Backend/CPP/STL.hs | 10 +++---- source/src/BNFC/Backend/CSharp/CFtoGPLEX.hs | 2 +- source/src/BNFC/Backend/CSharp/CFtoGPPG.hs | 2 +- source/src/BNFC/Backend/Haskell/CFtoAlex.hs | 2 +- source/src/BNFC/Backend/Haskell/CFtoAlex2.hs | 2 +- source/src/BNFC/Backend/Haskell/CFtoAlex3.hs | 5 +++- .../src/BNFC/Backend/Haskell/CFtoPrinter.hs | 2 +- source/src/BNFC/Backend/Haskell/ToCNF.hs | 3 +- .../src/BNFC/Backend/Java/CFtoAntlr4Lexer.hs | 2 +- source/src/BNFC/Backend/Java/CFtoCup15.hs | 2 +- source/src/BNFC/Backend/Java/CFtoJLex15.hs | 2 +- source/src/BNFC/Backend/Latex.hs | 2 +- .../BNFC/Backend/OCaml/CFtoOCamlPrinter.hs | 2 +- .../src/BNFC/Backend/OCaml/CFtoOCamlShow.hs | 2 +- source/src/BNFC/Backend/Txt2Tag.hs | 3 +- source/src/BNFC/Backend/XML.hs | 4 +-- source/src/BNFC/CF.hs | 30 ++++++++++++++----- .../204_InternalToken/test.cf | 9 ++++-- 24 files changed, 69 insertions(+), 45 deletions(-) diff --git a/source/src/BNFC/Backend/Agda.hs b/source/src/BNFC/Backend/Agda.hs index 8cb528d1d..fcea382c5 100644 --- a/source/src/BNFC/Backend/Agda.hs +++ b/source/src/BNFC/Backend/Agda.hs @@ -233,7 +233,7 @@ cf2AgdaAST time tokenText mod amod pmod cf = vsep $ -- getAbstractSyntax also includes list categories, which isn't what we need -- The user-defined token categories (including Ident). tcats :: [(TokenCat, Bool)] - tcats = (if hasIdent cf then ((catIdent, False) :) else id) + tcats = (if hasIdent Internal cf then ((catIdent, False) :) else id) [ (wpThing name, b) | TokenReg name b _ <- cfgPragmas cf ] -- Bind printers for the following categories (involves lists and literals). printerCats :: [Cat] diff --git a/source/src/BNFC/Backend/C.hs b/source/src/BNFC/Backend/C.hs index e1350c074..fa9a2207e 100644 --- a/source/src/BNFC/Backend/C.hs +++ b/source/src/BNFC/Backend/C.hs @@ -235,19 +235,19 @@ mkHeaderFile _ cf env = unlines $ concat where mkDefines n [] = mkString n mkDefines n (s:ss) = ("#define " ++ s +++ (show n) ++ "\n") ++ (mkDefines (n+1) ss) - mkString n = if isUsedCat cf (TokenCat catString) + mkString n = if isUsedCat Parsable cf (TokenCat catString) then ("#define _STRING_ " ++ show n ++ "\n") ++ mkChar (n+1) else mkChar n - mkChar n = if isUsedCat cf (TokenCat catChar) + mkChar n = if isUsedCat Parsable cf (TokenCat catChar) then ("#define _CHAR_ " ++ show n ++ "\n") ++ mkInteger (n+1) else mkInteger n - mkInteger n = if isUsedCat cf (TokenCat catInteger) + mkInteger n = if isUsedCat Parsable cf (TokenCat catInteger) then ("#define _INTEGER_ " ++ show n ++ "\n") ++ mkDouble (n+1) else mkDouble n - mkDouble n = if isUsedCat cf (TokenCat catDouble) + mkDouble n = if isUsedCat Parsable cf (TokenCat catDouble) then ("#define _DOUBLE_ " ++ show n ++ "\n") ++ mkIdent(n+1) else mkIdent n - mkIdent n = if isUsedCat cf (TokenCat catIdent) + mkIdent n = if isUsedCat Parsable cf (TokenCat catIdent) then ("#define _IDENT_ " ++ show n ++ "\n") else "" -- Andreas, 2019-04-29, issue #210: generate parsers also for coercions diff --git a/source/src/BNFC/Backend/C/CFtoBisonC.hs b/source/src/BNFC/Backend/C/CFtoBisonC.hs index 7ce8a526a..ce9495213 100644 --- a/source/src/BNFC/Backend/C/CFtoBisonC.hs +++ b/source/src/BNFC/Backend/C/CFtoBisonC.hs @@ -267,7 +267,7 @@ specialToks cf = unlines $ concat , ifC catIdent "%token<_string> _IDENT_" ] where - ifC cat s = if isUsedCat cf (TokenCat cat) then [s] else [] + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then [s] else [] startSymbol :: CF -> String startSymbol cf = "%start" +++ identCat (firstEntry cf) diff --git a/source/src/BNFC/Backend/C/CFtoFlexC.hs b/source/src/BNFC/Backend/C/CFtoFlexC.hs index 8de6838e6..955d9ebb3 100644 --- a/source/src/BNFC/Backend/C/CFtoFlexC.hs +++ b/source/src/BNFC/Backend/C/CFtoFlexC.hs @@ -158,7 +158,7 @@ restOfFlex cf env = unlines $ concat , footer ] where - ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else [] userDefTokens = [ "" ++ printRegFlex exp ++ " \t yylval._string = strdup(yytext); return " ++ sName name ++ ";" diff --git a/source/src/BNFC/Backend/CPP/NoSTL.hs b/source/src/BNFC/Backend/CPP/NoSTL.hs index ba577d2c5..b1e8b46eb 100644 --- a/source/src/BNFC/Backend/CPP/NoSTL.hs +++ b/source/src/BNFC/Backend/CPP/NoSTL.hs @@ -163,19 +163,19 @@ mkHeaderFile cf cats eps env = unlines $ concat mkVar _ = [] mkDefines n [] = mkString n mkDefines n (s:ss) = "#define " ++ s +++ show n ++ "\n" ++ mkDefines (n+1) ss - mkString n = if isUsedCat cf (TokenCat catString) + mkString n = if isUsedCat Parsable cf (TokenCat catString) then ("#define _STRING_ " ++ show n ++ "\n") ++ mkChar (n+1) else mkChar n - mkChar n = if isUsedCat cf (TokenCat catChar) + mkChar n = if isUsedCat Parsable cf (TokenCat catChar) then ("#define _CHAR_ " ++ show n ++ "\n") ++ mkInteger (n+1) else mkInteger n - mkInteger n = if isUsedCat cf (TokenCat catInteger) + mkInteger n = if isUsedCat Parsable cf (TokenCat catInteger) then ("#define _INTEGER_ " ++ show n ++ "\n") ++ mkDouble (n+1) else mkDouble n - mkDouble n = if isUsedCat cf (TokenCat catDouble) + mkDouble n = if isUsedCat Parsable cf (TokenCat catDouble) then ("#define _DOUBLE_ " ++ show n ++ "\n") ++ mkIdent(n+1) else mkIdent n - mkIdent n = if isUsedCat cf (TokenCat catIdent) + mkIdent n = if isUsedCat Parsable cf (TokenCat catIdent) then "#define _IDENT_ " ++ show n ++ "\n" else "" mkFunc s = identCat (normCat s) ++ "*" +++ "p" ++ identCat s ++ "(FILE *inp);" diff --git a/source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs b/source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs index bfee72060..02c49d3d3 100644 --- a/source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs +++ b/source/src/BNFC/Backend/CPP/NoSTL/CFtoFlex.hs @@ -117,7 +117,7 @@ restOfFlex inPackage cf env = unlines $ concat , footer ] where - ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else [] ns = nsString inPackage userDefTokens = [ "" ++ printRegFlex exp ++ diff --git a/source/src/BNFC/Backend/CPP/STL.hs b/source/src/BNFC/Backend/CPP/STL.hs index abd8365e1..33dbc4c79 100644 --- a/source/src/BNFC/Backend/CPP/STL.hs +++ b/source/src/BNFC/Backend/CPP/STL.hs @@ -204,19 +204,19 @@ mkHeaderFile inPackage cf cats eps env = unlines $ concat mkVar _ = [] mkDefines n [] = mkString n mkDefines n (s:ss) = "#define " ++ s +++ show n ++ "\n" ++ mkDefines (n+1) ss -- "nsDefine inPackage s" not needed (see cf2flex::makeSymEnv) - mkString n = if isUsedCat cf (TokenCat catString) + mkString n = if isUsedCat Parsable cf (TokenCat catString) then ("#define " ++ nsDefine inPackage "_STRING_ " ++ show n ++ "\n") ++ mkChar (n+1) else mkChar n - mkChar n = if isUsedCat cf (TokenCat catChar) + mkChar n = if isUsedCat Parsable cf (TokenCat catChar) then ("#define " ++ nsDefine inPackage "_CHAR_ " ++ show n ++ "\n") ++ mkInteger (n+1) else mkInteger n - mkInteger n = if isUsedCat cf (TokenCat catInteger) + mkInteger n = if isUsedCat Parsable cf (TokenCat catInteger) then ("#define " ++ nsDefine inPackage "_INTEGER_ " ++ show n ++ "\n") ++ mkDouble (n+1) else mkDouble n - mkDouble n = if isUsedCat cf (TokenCat catDouble) + mkDouble n = if isUsedCat Parsable cf (TokenCat catDouble) then ("#define " ++ nsDefine inPackage "_DOUBLE_ " ++ show n ++ "\n") ++ mkIdent(n+1) else mkIdent n - mkIdent n = if isUsedCat cf (TokenCat catIdent) + mkIdent n = if isUsedCat Parsable cf (TokenCat catIdent) then "#define " ++ nsDefine inPackage "_IDENT_ " ++ show n ++ "\n" else "" mkFuncs s = diff --git a/source/src/BNFC/Backend/CSharp/CFtoGPLEX.hs b/source/src/BNFC/Backend/CSharp/CFtoGPLEX.hs index 9a29b731c..11ca92039 100644 --- a/source/src/BNFC/Backend/CSharp/CFtoGPLEX.hs +++ b/source/src/BNFC/Backend/CSharp/CFtoGPLEX.hs @@ -170,7 +170,7 @@ gplex namespace cf env = concat [ [("." , "return (int)Tokens.error;")] ] where - ifC cat s = if isUsedCat cf (TokenCat cat) then s else [] + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else [] userDefTokens = map tokenline (tokenPragmas cf) where tokenline (name, exp) = ("" ++ printRegGPLEX exp , action name) diff --git a/source/src/BNFC/Backend/CSharp/CFtoGPPG.hs b/source/src/BNFC/Backend/CSharp/CFtoGPPG.hs index 655f5d90c..c585eda52 100644 --- a/source/src/BNFC/Backend/CSharp/CFtoGPPG.hs +++ b/source/src/BNFC/Backend/CSharp/CFtoGPPG.hs @@ -180,7 +180,7 @@ specialToks cf = unlinesInline [ ifC catIdent "%token IDENT_" ] where - ifC cat s = if isUsedCat cf (TokenCat cat) then s else "" + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else "" --The following functions are a (relatively) straightforward translation --of the ones in CFtoHappy.hs diff --git a/source/src/BNFC/Backend/Haskell/CFtoAlex.hs b/source/src/BNFC/Backend/Haskell/CFtoAlex.hs index b354e6407..ca9a2e704 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoAlex.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoAlex.hs @@ -153,7 +153,7 @@ restOfAlex cf = [ ] where ifC :: TokenCat -> String -> String - ifC cat s = if isUsedCat cf (TokenCat cat) then s else "" + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else "" lexComments ([],[]) = [] lexComments (xs,s1:ys) = "<> ::= " ++ ('^':intersperse '^' s1) ++ " [.]* ^n\n" ++ lexComments (xs,ys) lexComments (([l1,l2],[r1,r2]):xs,[]) = concat diff --git a/source/src/BNFC/Backend/Haskell/CFtoAlex2.hs b/source/src/BNFC/Backend/Haskell/CFtoAlex2.hs index d289b0a01..3986b6dcc 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoAlex2.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoAlex2.hs @@ -222,7 +222,7 @@ restOfAlex _ shareStrings tokenText cf = [ TextToken -> ("Data.Text.Text", "Data.Text.take", "Data.Text.uncons", "Data.Text.pack", "Data.Text.unpack", "Nothing", "Just (c,s)") ifC :: TokenCat -> String -> String - ifC cat s = if isUsedCat cf (TokenCat cat) then s else "" + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else "" lexComments ([],[]) = [] lexComments (xs,s1:ys) = '\"' : s1 ++ "\"" ++ " [.]* ; -- Toss single line comments\n" ++ lexComments (xs, ys) lexComments (([l1,l2],[r1,r2]):xs,[]) = concat diff --git a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs index 9b39abed8..9f2766307 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoAlex3.hs @@ -275,7 +275,7 @@ restOfAlex _ shareStrings tokenText cf = [ applyP f s = f ++ " (" ++ s ++ ")" ifC :: TokenCat -> String -> String - ifC cat s = if isUsedCat cf (TokenCat cat) then s else "" + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else "" lexComments :: ( [(String, String)] -- block comment delimiters @@ -308,16 +308,19 @@ restOfAlex _ shareStrings tokenText cf = [ [ printRegAlex exp ++ "\n { tok (\\p s -> PT p (eitherResIdent (T_" ++ name ++ " . share) s)) }" | (name,exp) <- tokenPragmas cf + , isUsedCat Parsable cf $ TokenCat name ] userDefTokenConstrs = unlines [ " | T_" ++ name ++ " !"++stringType | name <- tokenNames cf + , isUsedCat Parsable cf $ TokenCat name ] userDefTokenPrint = unlines [ " PT _ (T_" ++ name ++ " s) -> s" | name <- tokenNames cf + , isUsedCat Parsable cf $ TokenCat name ] ident = diff --git a/source/src/BNFC/Backend/Haskell/CFtoPrinter.hs b/source/src/BNFC/Backend/Haskell/CFtoPrinter.hs index fd4a7c0f7..301e2d8de 100644 --- a/source/src/BNFC/Backend/Haskell/CFtoPrinter.hs +++ b/source/src/BNFC/Backend/Haskell/CFtoPrinter.hs @@ -56,7 +56,7 @@ cf2Printer tokenText functor useGadt name absMod cf = unlines $ concat $ [ prologue tokenText useGadt name absMod , integerRule absMod cf , doubleRule absMod cf - , if hasIdent cf then identRule absMod tokenText cf else [] + , if hasIdent Internal cf then identRule absMod tokenText cf else [] ] ++ [ ownPrintRule absMod tokenText cf own | (own,_) <- tokenPragmas cf ] ++ [ rules absMod functor cf ] diff --git a/source/src/BNFC/Backend/Haskell/ToCNF.hs b/source/src/BNFC/Backend/Haskell/ToCNF.hs index 01020e032..f7a619443 100644 --- a/source/src/BNFC/Backend/Haskell/ToCNF.hs +++ b/source/src/BNFC/Backend/Haskell/ToCNF.hs @@ -200,13 +200,14 @@ genTokTable units cf = vcat , "tokenToCats p t = error (\"unknown token: \" ++ show t)" ] +tokInfo :: CFG f -> [ (TokenCat, Doc, Exp) ] tokInfo cf = concat $ [ [ (catChar , "TC", Con "head") , (catString , "TL", Id) , (catInteger, "TI", Con "readInteger") , (catDouble , "TD", Con "readDouble") ] - , [ (catIdent,"TV", Con "Ident") | hasIdent cf ] + , [ (catIdent,"TV", Con "Ident") | hasIdent Parsable cf ] , [ (t, "T_" <> text t, Con t) | (t, _) <- tokenPragmas cf ] ] diff --git a/source/src/BNFC/Backend/Java/CFtoAntlr4Lexer.hs b/source/src/BNFC/Backend/Java/CFtoAntlr4Lexer.hs index cae7b7742..d1d591259 100644 --- a/source/src/BNFC/Backend/Java/CFtoAntlr4Lexer.hs +++ b/source/src/BNFC/Backend/Java/CFtoAntlr4Lexer.hs @@ -139,7 +139,7 @@ restOfLexerGrammar cf = vcat , ifChar charmodes ] where - ifC cat s = if isUsedCat cf (TokenCat cat) then vcat s else "" + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then vcat s else "" ifString = ifC catString ifChar = ifC catChar strdec = [ "// String token type" diff --git a/source/src/BNFC/Backend/Java/CFtoCup15.hs b/source/src/BNFC/Backend/Java/CFtoCup15.hs index 983796dee..1996039b8 100644 --- a/source/src/BNFC/Backend/Java/CFtoCup15.hs +++ b/source/src/BNFC/Backend/Java/CFtoCup15.hs @@ -194,7 +194,7 @@ specialToks cf = unlines , ifC catIdent "terminal String _IDENT_;" ] where - ifC cat s = if isUsedCat cf (TokenCat cat) then s else "" + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else "" specialRules:: CF -> String specialRules cf = diff --git a/source/src/BNFC/Backend/Java/CFtoJLex15.hs b/source/src/BNFC/Backend/Java/CFtoJLex15.hs index 1d6a606fb..887a1529a 100644 --- a/source/src/BNFC/Backend/Java/CFtoJLex15.hs +++ b/source/src/BNFC/Backend/Java/CFtoJLex15.hs @@ -196,7 +196,7 @@ restOfJLex jflex rp cf = vcat ] where ifC :: TokenCat -> Doc -> Doc - ifC cat s = if isUsedCat cf (TokenCat cat) then s else "" + ifC cat s = if isUsedCat Parsable cf (TokenCat cat) then s else "" userDefTokens = vcat [ "" <> text (printRegJLex jflex exp) <+> "{ return cf.newSymbol(\"\", sym." <> text name diff --git a/source/src/BNFC/Backend/Latex.hs b/source/src/BNFC/Backend/Latex.hs index 90fa0e746..12869a66b 100644 --- a/source/src/BNFC/Backend/Latex.hs +++ b/source/src/BNFC/Backend/Latex.hs @@ -111,7 +111,7 @@ prtTerminals name cf = unlines $ identSection :: CF -> [String] identSection cf - | hasIdent cf = [ "\\subsection*{Identifiers}" ] ++ prtIdentifiers + | hasIdent Parsable cf = [ "\\subsection*{Identifiers}" ] ++ prtIdentifiers | otherwise = [] prtIdentifiers :: [String] diff --git a/source/src/BNFC/Backend/OCaml/CFtoOCamlPrinter.hs b/source/src/BNFC/Backend/OCaml/CFtoOCamlPrinter.hs index 1b42a6482..8c9eb4097 100644 --- a/source/src/BNFC/Backend/OCaml/CFtoOCamlPrinter.hs +++ b/source/src/BNFC/Backend/OCaml/CFtoOCamlPrinter.hs @@ -44,7 +44,7 @@ cf2Printer _name absMod cf = unlines [ integerRule cf, doubleRule cf, stringRule cf, - if hasIdent cf then identRule absMod cf else "", + if hasIdent Internal cf then identRule absMod cf else "", unlines [ownPrintRule absMod cf own | (own,_) <- tokenPragmas cf], rules absMod cf ] diff --git a/source/src/BNFC/Backend/OCaml/CFtoOCamlShow.hs b/source/src/BNFC/Backend/OCaml/CFtoOCamlShow.hs index d7a7932f6..62718e067 100644 --- a/source/src/BNFC/Backend/OCaml/CFtoOCamlShow.hs +++ b/source/src/BNFC/Backend/OCaml/CFtoOCamlShow.hs @@ -37,7 +37,7 @@ cf2show _name absMod cf = unlines [ prologue , integerRule , doubleRule - , if hasIdent cf then identRule absMod cf else "" + , if hasIdent Internal cf then identRule absMod cf else "" , unlines [ ownPrintRule absMod cf own | (own,_) <- tokenPragmas cf ] , rules absMod cf ] diff --git a/source/src/BNFC/Backend/Txt2Tag.hs b/source/src/BNFC/Backend/Txt2Tag.hs index 11ce08d19..00324d08b 100644 --- a/source/src/BNFC/Backend/Txt2Tag.hs +++ b/source/src/BNFC/Backend/Txt2Tag.hs @@ -57,7 +57,8 @@ prtTerminals name cf = unlines $ , prtComments $ comments cf ] -identSection cf = if not (hasIdent cf) then [] else +identSection :: CF -> String +identSection cf = if not (hasIdent Parsable cf) then [] else unlines [ "===Identifiers===", prtIdentifiers diff --git a/source/src/BNFC/Backend/XML.hs b/source/src/BNFC/Backend/XML.hs index 44cb3b965..a53546c67 100644 --- a/source/src/BNFC/Backend/XML.hs +++ b/source/src/BNFC/Backend/XML.hs @@ -48,7 +48,7 @@ cf2DTD typ name cf = unlines [ elemEmp "Integer", elemEmp "Double", elemEmp "String", - if hasIdent cf then elemEmp "Ident" else "", + if hasIdent Internal cf then elemEmp "Ident" else "", unlines [elemEmp own | own <- tokenNames cf], unlines (map (elemData typ cf) (cf2data cf)), "]>" @@ -141,7 +141,7 @@ cf2XMLPrinter typ opts absMod cf = unlines [ integerRule cf, doubleRule cf, stringRule cf, - if hasIdent cf then identRule cf else "", + if hasIdent Internal cf then identRule cf else "", unlines [ownPrintRule cf own | (own,_) <- tokenPragmas cf], rules cf ] diff --git a/source/src/BNFC/CF.hs b/source/src/BNFC/CF.hs index 3042728f1..03dd60ccd 100644 --- a/source/src/BNFC/CF.hs +++ b/source/src/BNFC/CF.hs @@ -173,6 +173,12 @@ data InternalRule | Parsable -- ^ ordinary rule (also for parser) deriving (Eq) +-- | @'Parsable' < 'Internal'@. +-- This allows to select only the parsable rule by @(<= Parsable)@. +instance Ord InternalRule where + Internal <= Parsable = False + _ <= _ = True + instance (Show function) => Show (Rul function) where show (Rule f cat rhs internal) = unwords $ (if internal == Internal then ("internal" :) else id) $ @@ -613,10 +619,15 @@ allParserCatsNorm :: CFG f -> [Cat] allParserCatsNorm = nub . map normCat . allParserCats -- | Is the category is used on an rhs? --- Includes internal rules. -isUsedCat :: CFG f -> Cat -> Bool -isUsedCat cf = (`elem` [ c | Rule _ _ rhs _ <- cfgRules cf, Left c <- rhs ]) - -- TODO: isUsedCat is used in some places where the internal rules should be ignored. +-- +-- * @isUsedCat Parsable@ only looks at the rules that generate the parser. +-- +-- * @isUsedCat Internal@ also takes the internal rules into account +-- (relevant for AST and Printer). +-- +isUsedCat :: InternalRule -> CFG f -> Cat -> Bool +isUsedCat internal cf = flip elem + [ c | Rule _ _ rhs i <- cfgRules cf, Left c <- rhs, i <= internal ] -- | All token categories used in the grammar. -- Includes internal rules. @@ -660,8 +671,8 @@ numberOfBlockCommentForms = length . fst . comments -- built-in categories (corresponds to lexer) -- | Whether the grammar uses the predefined Ident type. -hasIdent :: CFG f -> Bool -hasIdent cf = isUsedCat cf $ TokenCat catIdent +hasIdent :: InternalRule -> CFG f -> Bool +hasIdent internal cf = isUsedCat internal cf $ TokenCat catIdent -- these need new datatypes @@ -669,7 +680,9 @@ hasIdent cf = isUsedCat cf $ TokenCat catIdent -- | Categories corresponding to tokens. These end up in the -- AST. (unlike tokens returned by 'cfTokens') specialCats :: CF -> [TokenCat] -specialCats cf = (if hasIdent cf then (catIdent:) else id) (map fst (tokenPragmas cf)) +specialCats cf = + (if hasIdent Internal cf then (catIdent:) else id) $ + map fst (tokenPragmas cf) -- * abstract syntax trees: data type definitions @@ -797,9 +810,10 @@ precCF :: CF -> Bool precCF cf = length (precLevels cf) > 1 -- | Defines or uses the grammar token types like @Ident@? +-- Includes internal rules. -- Excludes position tokens. hasIdentLikeTokens :: CFG g -> Bool -hasIdentLikeTokens cf = hasIdent cf || or [ not b | TokenReg _ b _ <- cfgPragmas cf ] +hasIdentLikeTokens cf = hasIdent Internal cf || or [ not b | TokenReg _ b _ <- cfgPragmas cf ] -- | Is there a @position token@ declaration in the grammar? hasPositionTokens :: CFG g -> Bool diff --git a/testing/regression-tests/204_InternalToken/test.cf b/testing/regression-tests/204_InternalToken/test.cf index dc26a38bd..ccd395eb4 100644 --- a/testing/regression-tests/204_InternalToken/test.cf +++ b/testing/regression-tests/204_InternalToken/test.cf @@ -2,8 +2,13 @@ -- Tokens from internal rules ended up in lexer. -- Andreas, 2019-11-24, issue #264. -- Internal non-terminals ended up as entrypoints. +-- Andreas, 2020-10-09, issue #302 +-- Internal token types end up in lexer and shadow parsable token types. internal Internal. Foo ::= "Internal"; -Main. Prg ::= Ident; +Main. Prg ::= Ident ; +internal IId. Prg ::= Id ; --- Should accept input "Internal". +token Id letter+ ; -- This overlaps with Ident, but should not confuse the parser. + +-- Should accept input `Internal`.