-- File generated by the BNF Converter.

-- -*- haskell -*-
-- Lexer definition for use with Alex 3.
{
{-# OPTIONS -fno-warn-incomplete-patterns #-}
{-# OPTIONS_GHC -w #-}

{-# LANGUAGE PatternSynonyms #-}

module LexFstStudio where

import Prelude

import qualified Data.Bits
import           Data.Char     (ord)
import           Data.Function (on)
import           Data.Maybe    (fromMaybe)
import qualified Data.Map      as Map
import           Data.Map      (Map)
import           Data.Word     (Word8)
}

-- Predefined character classes

$c = [A-Z\192-\221] # [\215]  -- capital isolatin1 letter (215 = \times)
$s = [a-z\222-\255] # [\247]  -- small   isolatin1 letter (247 = \div  )
$l = [$c $s]         -- letter
$d = [0-9]           -- digit
$i = [$l $d _ ']     -- identifier character
$u = [. \n]          -- universal: any character

-- Symbols and non-identifier-like reserved words

@rsyms = \$ | \& | \( | \) | \* | \+ | \, | \- | \- \> | \. \# \. | \. \. \. | \. "o" \. | \. "x" \. | \/ \/ | "0" | \: | \: \: \= | \; | \< | \= \> | \> | \? | \@ \- \> | \\ | \^ | \_ | \{ | \| | \| \| | \} | \~

:-

-- Line comment "--"
"--" [.]* ;

-- Block comment "{-" "-}"
\{ \- [ $u # \- ]* \- ([ $u # [ \} \- ] ] [ $u # \- ]* \- | \-)* \};

-- Whitespace (skipped)
$white+ ;

-- Symbols
@rsyms
    { tok (eitherResIdent TV) }

-- Keywords and Ident
$l $i*
    { tok (eitherResIdent TV) }

-- String
\" ([$u # [\" \\ \n]] | (\\ (\" | \\ | \' | n | t | r | f)))* \"
    { tok (TL . unescapeInitTail) }

-- Integer
$d+
    { tok TI }

{
-- | Create a token with position.
tok :: (String -> Tok) -> (Posn -> String -> Token)
tok f p = PT p . f

-- | Token without position.
data Tok
  = TK {-# UNPACK #-} !TokSymbol  -- ^ Reserved word or symbol.
  | TL !String                    -- ^ String literal.
  | TI !String                    -- ^ Integer literal.
  | TV !String                    -- ^ Identifier.
  | TD !String                    -- ^ Float literal.
  | TC !String                    -- ^ Character literal.
  deriving (Eq, Show, Ord)

-- | Smart constructor for 'Tok' for the sake of backwards compatibility.
pattern TS :: String -> Int -> Tok
pattern TS t i = TK (TokSymbol t i)

-- | Keyword or symbol tokens have a unique ID.
data TokSymbol = TokSymbol
  { tsText :: String
      -- ^ Keyword or symbol text.
  , tsID   :: !Int
      -- ^ Unique ID.
  } deriving (Show)

-- | Keyword/symbol equality is determined by the unique ID.
instance Eq  TokSymbol where (==)    = (==)    `on` tsID

-- | Keyword/symbol ordering is determined by the unique ID.
instance Ord TokSymbol where compare = compare `on` tsID

-- | Token with position.
data Token
  = PT  Posn Tok
  | Err Posn
  deriving (Eq, Show, Ord)

-- | Pretty print a position.
printPosn :: Posn -> String
printPosn (Pn _ l c) = "line " ++ show l ++ ", column " ++ show c

-- | Pretty print the position of the first token in the list.
tokenPos :: [Token] -> String
tokenPos (t:_) = printPosn (tokenPosn t)
tokenPos []    = "end of file"

-- | Get the position of a token.
tokenPosn :: Token -> Posn
tokenPosn (PT p _) = p
tokenPosn (Err p)  = p

-- | Get line and column of a token.
tokenLineCol :: Token -> (Int, Int)
tokenLineCol = posLineCol . tokenPosn

-- | Get line and column of a position.
posLineCol :: Posn -> (Int, Int)
posLineCol (Pn _ l c) = (l,c)

-- | Convert a token into "position token" form.
mkPosToken :: Token -> ((Int, Int), String)
mkPosToken t = (tokenLineCol t, tokenText t)

-- | Convert a token to its text.
tokenText :: Token -> String
tokenText t = case t of
  PT _ (TS s _) -> s
  PT _ (TL s)   -> show s
  PT _ (TI s)   -> s
  PT _ (TV s)   -> s
  PT _ (TD s)   -> s
  PT _ (TC s)   -> s
  Err _         -> "#error"

-- | Convert a token to a string.
prToken :: Token -> String
prToken t = tokenText t

-- | Convert potential keyword into token or use fallback conversion.
eitherResIdent :: (String -> Tok) -> String -> Tok
eitherResIdent tv s = fromMaybe (tv s) (Map.lookup s resWords)

-- | The keywords and symbols of the language organized as a Map.
resWords :: Map String Tok
resWords = Map.fromAscList
  [ ("$", TS "$" 1)
  , ("&", TS "&" 2)
  , ("(", TS "(" 3)
  , (")", TS ")" 4)
  , ("*", TS "*" 5)
  , ("+", TS "+" 6)
  , (",", TS "," 7)
  , ("-", TS "-" 8)
  , ("->", TS "->" 9)
  , (".#.", TS ".#." 10)
  , ("...", TS "..." 11)
  , (".o.", TS ".o." 12)
  , (".x.", TS ".x." 13)
  , ("//", TS "//" 14)
  , ("0", TS "0" 15)
  , (":", TS ":" 16)
  , ("::=", TS "::=" 17)
  , (";", TS ";" 18)
  , ("<", TS "<" 19)
  , ("=>", TS "=>" 20)
  , (">", TS ">" 21)
  , ("?", TS "?" 22)
  , ("@->", TS "@->" 23)
  , ("\\", TS "\\" 24)
  , ("^", TS "^" 25)
  , ("_", TS "_" 26)
  , ("import", TS "import" 27)
  , ("main", TS "main" 28)
  , ("{", TS "{" 29)
  , ("|", TS "|" 30)
  , ("||", TS "||" 31)
  , ("}", TS "}" 32)
  , ("~", TS "~" 33)
  ]

-- | Unquote string literal.
unescapeInitTail :: String -> String
unescapeInitTail = id . unesc . tail . id
  where
  unesc s = case s of
    '\\':c:cs | elem c ['\"', '\\', '\''] -> c : unesc cs
    '\\':'n':cs  -> '\n' : unesc cs
    '\\':'t':cs  -> '\t' : unesc cs
    '\\':'r':cs  -> '\r' : unesc cs
    '\\':'f':cs  -> '\f' : unesc cs
    '"':[]       -> []
    c:cs         -> c : unesc cs
    _            -> []

-------------------------------------------------------------------
-- Alex wrapper code.
-- A modified "posn" wrapper.
-------------------------------------------------------------------

data Posn = Pn !Int !Int !Int
  deriving (Eq, Show, Ord)

alexStartPos :: Posn
alexStartPos = Pn 0 1 1

alexMove :: Posn -> Char -> Posn
alexMove (Pn a l c) '\t' = Pn (a+1)  l     (((c+7) `div` 8)*8+1)
alexMove (Pn a l c) '\n' = Pn (a+1) (l+1)   1
alexMove (Pn a l c) _    = Pn (a+1)  l     (c+1)

type Byte = Word8

type AlexInput =
  ( Posn     -- current position
  , Char     -- previous char
  , [Byte]   -- pending bytes on the current char
  , String ) -- current input string

tokens :: String -> [Token]
tokens str = go (alexStartPos, '\n', [], str)
    where
      go :: AlexInput -> [Token]
      go inp@(pos, _, _, str) =
        case alexScan inp 0 of
          AlexEOF                   -> []
          AlexError (pos, _, _, _)  -> [Err pos]
          AlexSkip  inp' len        -> go inp'
          AlexToken inp' len act    -> act pos ( take len str) : (go inp')

alexGetByte :: AlexInput -> Maybe (Byte,AlexInput)
alexGetByte (p, c, (b:bs), s) = Just (b, (p, c, bs, s))
alexGetByte (p, _, [], s) =
  case s  of
    [] -> Nothing
    (c:s) ->
      let p'     = alexMove p c
          (b:bs) = utf8Encode c
      in p' `seq` Just (b, (p', c, bs, s))

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (p, c, bs, s) = c

-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
utf8Encode :: Char -> [Word8]
utf8Encode = map fromIntegral . go . ord
  where
  go oc
   | oc <= 0x7f       = [oc]

   | oc <= 0x7ff      = [ 0xc0 + (oc `Data.Bits.shiftR` 6)
                        , 0x80 + oc Data.Bits..&. 0x3f
                        ]

   | oc <= 0xffff     = [ 0xe0 + (oc `Data.Bits.shiftR` 12)
                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
                        , 0x80 + oc Data.Bits..&. 0x3f
                        ]
   | otherwise        = [ 0xf0 + (oc `Data.Bits.shiftR` 18)
                        , 0x80 + ((oc `Data.Bits.shiftR` 12) Data.Bits..&. 0x3f)
                        , 0x80 + ((oc `Data.Bits.shiftR` 6) Data.Bits..&. 0x3f)
                        , 0x80 + oc Data.Bits..&. 0x3f
                        ]
}