{ #if __GLASGOW_HASKELL__ > 800 {-# OPTIONS_GHC -Wno-error=deprecated-flags #-} {-# OPTIONS_GHC -Wno-error=missing-signatures #-} {-# OPTIONS_GHC -Wno-error=tabs #-} {-# OPTIONS_GHC -Wno-error=unused-imports #-} #endif {-# OPTIONS_GHC -fno-warn-deprecated-flags #-} {-# OPTIONS_GHC -fno-warn-missing-signatures #-} {-# OPTIONS_GHC -fno-warn-tabs #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} {-| The lexer is generated by Alex (<http://www.haskell.org/alex>) and is an adaptation of GHC's lexer. The main lexing function 'lexer' is called by the "Agda.Syntax.Parser.Parser" to get the next token from the input. -} module Agda.Syntax.Parser.Lexer ( -- * The main function lexer -- * Lex states , normal, code , layout, empty_layout, bol, imp_dir -- * Alex generated functions , AlexReturn(..), alexScanUser ) where import Agda.Syntax.Parser.Alex import Agda.Syntax.Parser.Comments #ifndef __HADDOCK__ import {-# SOURCE #-} Agda.Syntax.Parser.Layout import {-# SOURCE #-} Agda.Syntax.Parser.LexActions #endif import Agda.Syntax.Parser.Monad import Agda.Syntax.Parser.StringLiterals import Agda.Syntax.Parser.Tokens import Agda.Syntax.Literal } -- Note that the regular expressions should not use non-ASCII -- characters, see Agda.Syntax.Parser.Alex.alexGetByte. $digit = 0-9 $hexdigit = [ $digit a-f A-F ] $binarydigit = 0-1 $alpha = [ A-Z a-z _ ] $op = [ \- \! \# \$ \% \& \* \+ \/ \< \= \> \^ \| \~ \? \` \[ \] \, \: ] $idstart = [ $digit $alpha $op ] $idchar = [ $idstart ' \\ ] $nonalpha = $idchar # $alpha $white_notab = $white # \t $white_nonl = $white_notab # \n @prettynumber = $digit+ ([_] $digit+)* | "0x" $hexdigit+ ([_] $hexdigit+)* | "0b" $binarydigit+ ([_] $binarydigit+)* @integer = [\-]? @prettynumber @decimal = $digit+ @exponent = [eE] [\-\+]? @decimal @float = [\-]? @decimal \. @decimal @exponent? | [\-]? @decimal @exponent -- A name can't start with \x (to allow \x -> x). -- Bug in alex: [ _ op ]+ doesn't seem to work! @start = ($idstart # [_]) | \\ [ $nonalpha ] @ident = @start $idchar* | [_] $idchar+ @namespace = (@ident \.)* @q_ident = @namespace @ident tokens :- -- White space <0,code,bol_,layout_,empty_layout_,imp_dir_> $white_nonl+ ; <pragma_,fpragma_> $white_notab ; -- Pragmas <0,code,pragma_> "{-#" { beginWith pragma $ symbol SymOpenPragma } <fpragma_> "{-#" { beginWith fpragma $ symbol SymOpenPragma } <pragma_,fpragma_> "#-}" { endWith $ symbol SymClosePragma } <pragma_> "BUILTIN" { keyword KwBUILTIN } <pragma_> "CATCHALL" { keyword KwCATCHALL } <pragma_> "COMPILE" { endWith $ beginWith fpragma $ keyword KwCOMPILE } <pragma_> "FOREIGN" { endWith $ beginWith fpragma $ keyword KwFOREIGN } <pragma_> "DISPLAY" { keyword KwDISPLAY } <pragma_> "ETA" { keyword KwETA } <pragma_> "IMPOSSIBLE" { keyword KwIMPOSSIBLE } <pragma_> "INJECTIVE" { keyword KwINJECTIVE } <pragma_> "INJECTIVE_FOR_INFERENCE" { keyword KwINJECTIVE_FOR_INFERENCE } <pragma_> "INLINE" { keyword KwINLINE } <pragma_> "INCOHERENT" { keyword KwINCOHERENT } <pragma_> "NOINLINE" { keyword KwNOINLINE } <pragma_> "NOT_PROJECTION_LIKE" { keyword KwNOT_PROJECTION_LIKE } <pragma_> "LINE" { keyword KwLINE } <pragma_> "MEASURE" { keyword KwMEASURE } <pragma_> "NO_POSITIVITY_CHECK" { keyword KwNO_POSITIVITY_CHECK } <pragma_> "NO_TERMINATION_CHECK" { keyword KwNO_TERMINATION_CHECK } <pragma_> "NO_UNIVERSE_CHECK" { keyword KwNO_UNIVERSE_CHECK } <pragma_> "NON_COVERING" { keyword KwNON_COVERING } <pragma_> "NON_TERMINATING" { keyword KwNON_TERMINATING } <pragma_> "OPTIONS" { keyword KwOPTIONS } <pragma_> "POLARITY" { keyword KwPOLARITY } <pragma_> "OVERLAPPABLE" { keyword KwOVERLAPPABLE } <pragma_> "OVERLAPPING" { keyword KwOVERLAPPING } <pragma_> "OVERLAPS" { keyword KwOVERLAPS } <pragma_> "REWRITE" { keyword KwREWRITE } <pragma_> "STATIC" { keyword KwSTATIC } <pragma_> "TERMINATING" { keyword KwTERMINATING } <pragma_> "WARNING_ON_USAGE" { keyword KwWARNING_ON_USAGE } <pragma_> "WARNING_ON_IMPORT" { keyword KwWARNING_ON_IMPORT } <pragma_> . # [ $white \" ] + { withInterval $ TokString } -- we recognise string literals in pragmas <fpragma_> . # [ $white ] + { withInterval $ TokString } -- Comments -- We need to rule out pragmas here. Usually longest match would take -- precedence, but in some states pragmas aren't valid but comments are. <0,code,bol_,layout_,empty_layout_,imp_dir_> "{-" / { not' (followedBy '#') } { nestedComment } -- A misplaced end-comment, like in @f {x-} = x-@ gives a parse error. "-}" { symbol SymEndComment } @ident "-}" { symbol SymEndComment } -- Dashes followed by a name symbol should be parsed as a name. <0,code,bol_,layout_,empty_layout_,imp_dir_> "--" .* / { keepComments .&&. (followedBy '\n' .||. eof) } { confirmLayout `andThen` withInterval TokComment } <0,code,bol_,layout_,empty_layout_,imp_dir_> "--" .* / { followedBy '\n' .||. eof } { confirmLayout `andThen` skip } -- Note: we need to confirm tentative layout columns whenever we meet -- a newline character ('\n'). -- The exception is the newline after a layout keyword. -- We need to check the offside rule for the first token on each line. We -- should not check the offside rule for the end of file token or an -- '\end{code}' <0,code,imp_dir_> \n { begin bol_ } -- Note that @begin@ revisits '\n' in the new state! <bol_> { \n { confirmLayout `andThen` skip } -- ^ \\ "end{code}" { end } () / { not' eof } { offsideRule } } -- After a layout keyword the -- indentation of the first token decides the column of the layout block. <layout_> { \n { confirmLayout `andThen` skip} () { endWith newLayoutBlock } } -- The only rule for the empty_layout state. Generates a close brace. <empty_layout_> () { emptyLayout } -- Keywords <0,code> abstract { keyword KwAbstract } <0,code> codata { keyword KwCoData } <0,code> coinductive { keyword KwCoInductive } <0,code> constructor { keyword KwConstructor } <0,code> data { keyword KwData } <0,code> do { keyword KwDo } <0,code> "eta-equality" { keyword KwEta } <0,code> field { keyword KwField } <0,code> forall { keyword KwForall } <0,code> import { keyword KwImport } <0,code> in { keyword KwIn } <0,code> inductive { keyword KwInductive } <0,code> infix { keyword KwInfix } <0,code> infixl { keyword KwInfixL } <0,code> infixr { keyword KwInfixR } <0,code> instance { keyword KwInstance } <0,code> interleaved { keyword KwInterleaved } <0,code> let { keyword KwLet } <0,code> macro { keyword KwMacro } <0,code> module { keyword KwModule } <0,code> mutual { keyword KwMutual } <0,code> "no-eta-equality" { keyword KwNoEta } <0,code> open { keyword KwOpen } <0,code> overlap { keyword KwOverlap } <0,code> pattern { keyword KwPatternSyn } <0,code> postulate { keyword KwPostulate } <0,code> primitive { keyword KwPrimitive } <0,code> private { keyword KwPrivate } <0,code> quote { keyword KwQuote } <0,code> quoteTerm { keyword KwQuoteTerm } <0,code> record { keyword KwRecord } <0,code> rewrite { keyword KwRewrite } <0,code> syntax { keyword KwSyntax } <0,code> tactic { keyword KwTactic } <0,code> unquote { keyword KwUnquote } <0,code> unquoteDecl { keyword KwUnquoteDecl } <0,code> unquoteDef { keyword KwUnquoteDef } <0,code> variable { keyword KwVariable } <0,code> where { keyword KwWhere } <0,code> with { keyword KwWith } <0,code> opaque { keyword KwOpaque } <0,code> unfolding { keyword KwUnfolding } -- The parser is responsible to put the lexer in the imp_dir_ state when it -- expects an import directive keyword. This means that if you run the -- tokensParser you will never see these keywords. <0,code> using { keyword KwUsing } <0,code> hiding { keyword KwHiding } <0,code> renaming { keyword KwRenaming } <imp_dir_> to { endWith $ keyword KwTo } <0,code> public { keyword KwPublic } -- Holes <0,code> "{!" { hole } -- Special symbols <0,code> "..." { symbol SymEllipsis } <0,code> ".." { symbol SymDotDot } <0,code> "." { symbol SymDot } <0,code> ";" { symbol SymSemi } <0,code> ":" { symbol SymColon } <0,code> "=" { symbol SymEqual } <0,code> "_" { symbol SymUnderscore } <0,code> "?" { symbol SymQuestionMark } <0,code> "|" { symbol SymBar } <0,code> "(|" /[$white] { symbol SymOpenIdiomBracket } <0,code> "|)" { symbol SymCloseIdiomBracket } <0,code> "(|)" { symbol SymEmptyIdiomBracket } <0,code> "(" { symbol SymOpenParen } <0,code> ")" { symbol SymCloseParen } <0,code> "->" { symbol SymArrow } <0,code> "\" { symbol SymLambda } -- " <0,code> "@" { symbol SymAs } <0,code> "{{" /[^[!\-]] { symbol SymDoubleOpenBrace } -- Andreas, 2019-08-08, issue #3962, don't lex '{{' if followed by '-' -- since this will be confused with '{-' (start of comment) by Emacs. -- We don't lex '}}' into a SymDoubleCloseBrace. Instead, we lex it as -- two SymCloseBrace's. When the parser is looking for a double -- closing brace, it will also accept two SymCloseBrace's, after -- verifying that they are immediately next to each other. -- This trick allows us to keep "record { a = record {}}" working -- properly. -- <0,code> "}}" { symbol SymDoubleCloseBrace } <0,code> "{" { symbol SymOpenBrace } -- you can't use braces for layout <0,code> "}" { symbol SymCloseBrace } -- Literals <0,code> \' { litChar } <0,code,pragma_> \" { litString } <0,code> @integer { literal' integer LitNat } <0,code> @float { literal LitFloat } -- Identifiers <0,code,imp_dir_> @q_ident { identifier } -- Andreas, 2013-02-21, added identifiers to the 'imp_dir_' state. -- This is to fix issue 782: 'toz' should not be lexed as 'to' -- (followed by 'z' after leaving imp_dir_). -- With identifiers in state imp_dir_, 'toz' should be lexed as -- identifier 'toz' in imp_dir_ state, leading to a parse error later. { -- | This is the initial state for parsing a regular, non-literate file. normal :: LexState normal = 0 {-| The layout state. Entered when we see a layout keyword ('withLayout') and exited at the next token ('newLayoutBlock'). -} layout :: LexState layout = layout_ {-| The state inside a pragma. -} pragma :: LexState pragma = pragma_ -- | The state inside a FOREIGN pragma. This needs to be different so that we don't -- lex further strings as pragma keywords. fpragma :: LexState fpragma = fpragma_ {-| We enter this state from 'newLayoutBlock' when the token following a layout keyword is to the left of (or at the same column as) the current layout context. Example: > data Empty : Set where > foo : Empty -> Nat Here the second line is not part of the @where@ clause since it is has the same indentation as the @data@ definition. What we have to do is insert an empty layout block @{}@ after the @where@. The only thing that can happen in this state is that 'emptyLayout' is executed, generating the closing brace. The open brace is generated when entering by 'newLayoutBlock'. -} empty_layout :: LexState empty_layout = empty_layout_ -- | This state is entered at the beginning of each line. You can't lex -- anything in this state, and to exit you have to check the layout rule. -- Done with 'offsideRule'. bol :: LexState bol = bol_ -- | This state can only be entered by the parser. In this state you can only -- lex the keywords @using@, @hiding@, @renaming@ and @to@. Moreover they are -- only keywords in this particular state. The lexer will never enter this -- state by itself, that has to be done in the parser. imp_dir :: LexState imp_dir = imp_dir_ -- | Return the next token. This is the function used by Happy in the parser. -- -- @lexer k = 'lexToken' >>= k@ lexer :: (Token -> Parser a) -> Parser a lexer k = lexToken >>= k -- | Do not use this function; it sets the 'ParseFlags' to -- 'undefined'. alexScan :: AlexInput -> Int -> AlexReturn (LexAction Token) -- | This is the main lexing function generated by Alex. alexScanUser :: ([LexState], ParseFlags) -> AlexInput -> Int -> AlexReturn (LexAction Token) }