{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell #-}

module Language.Nix.Identifier
  ( -- * Type-safe Identifiers
    Identifier, ident
  , parseSimpleIdentifier, parseQuotedIdentifier
    -- * String Predicates
  , needsQuoting
    -- * Internals
    -- TODO: only required by the language-nix test suite, unexport?
  , nixKeywords
  , quote
  )
  where

import Control.DeepSeq
import Control.Lens
import Data.Char
import Data.Either
import Data.String
import GHC.Generics ( Generic )
import Test.QuickCheck
import Text.Parsec.Class as P
import Text.PrettyPrint.HughesPJClass as PP

-- | Identifiers in Nix are essentially strings. They can be constructed
-- (and viewed) with the 'ident' isomorphism. For the sake of convenience,
-- @Identifier@s are an instance of the 'IsString' class.
--
-- It is usually wise to only use identifiers of the form
-- @[a-zA-Z_][a-zA-Z0-9_'-]*@, because these don't need quoting.
-- Consequently, they can appear almost anywhere in a Nix expression
-- (whereas quoted identifiers e.g. can't be used in function patterns).
-- The methods of the 'Pretty' class can be used to print an identifier
-- with proper quoting:
--
-- >>> pPrint (ident # "test")
-- test
-- >>> pPrint (ident # "foo.bar")
-- "foo.bar"
--
-- The 'HasParser' class allows parsing rendered identifiers even if they are
-- quoted:
--
-- >>> parseM "Identifier" "hello" :: Maybe Identifier
-- Just (Identifier "hello")
-- >>> parseM "Identifier" "\"3rd party\"" :: Maybe Identifier
-- Just (Identifier "3rd party")
--
-- __Warning__: Identifiers /may not/ contain @\'\\0\'@, but this is not
-- checked during construction!
--
-- See also <https://nix.dev/manual/nix/2.30/language/identifiers.html>.
declareLenses [d| newtype Identifier = Identifier { ident :: String }
                    deriving (Show, Eq, Ord, IsString, Generic)
              |]
-- ^ An isomorphism that allows conversion of 'Identifier' from/to the
-- standard 'String' type via 'review'.
--
-- >>> ident # "hello"
-- Identifier "hello"
-- >>> from ident # fromString "hello"
-- "hello"

instance NFData Identifier where
  rnf :: Identifier -> ()
rnf (Identifier String
str) = String -> ()
forall a. NFData a => a -> ()
rnf String
str

instance Arbitrary Identifier where
  arbitrary :: Gen Identifier
arbitrary = String -> Identifier
Identifier (String -> Identifier) -> Gen String -> Gen Identifier
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Gen String] -> Gen String
forall a. HasCallStack => [Gen a] -> Gen a
oneof
    [ -- almost always needs quoting, unreasonable
      Gen Char -> Gen String
forall a. Gen a -> Gen [a]
listOf1 (Gen Char -> Gen Char
nonNul Gen Char
arbitraryUnicodeChar)
      -- almost always needs quoting, reasonable-ish
    , Gen Char -> Gen String
forall a. Gen a -> Gen [a]
listOf1 (Gen Char -> Gen Char
nonNul Gen Char
arbitraryPrintableChar)
      -- rarely needs quoting
    , Gen Char -> Gen String
forall a. Gen a -> Gen [a]
listOf1 (Gen Char
arbitraryASCIIChar Gen Char -> (Char -> Bool) -> Gen Char
forall a. Gen a -> (a -> Bool) -> Gen a
`suchThat` Char -> Bool
isSimpleChar) ]
    where nonNul :: Gen Char -> Gen Char
nonNul Gen Char
g = Gen Char
g Gen Char -> (Char -> Bool) -> Gen Char
forall a. Gen a -> (a -> Bool) -> Gen a
`suchThat` (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\0')
          isSimpleChar :: Char -> Bool
isSimpleChar Char
c = Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"_-'"
  shrink :: Identifier -> [Identifier]
shrink (Identifier String
i) = (String -> Identifier) -> [String] -> [Identifier]
forall a b. (a -> b) -> [a] -> [b]
map String -> Identifier
Identifier (String -> [String]
forall a. Arbitrary a => a -> [a]
shrink String
i)

instance CoArbitrary Identifier

instance Pretty Identifier where
  pPrint :: Identifier -> Doc
pPrint = Getting Doc Identifier Doc -> Identifier -> Doc
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view ((String -> Const Doc String) -> Identifier -> Const Doc Identifier
Iso' Identifier String
ident ((String -> Const Doc String)
 -> Identifier -> Const Doc Identifier)
-> ((Doc -> Const Doc Doc) -> String -> Const Doc String)
-> Getting Doc Identifier Doc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS -> Optic' (->) (Const Doc) String String
forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to ShowS
quote Optic' (->) (Const Doc) String String
-> ((Doc -> Const Doc Doc) -> String -> Const Doc String)
-> (Doc -> Const Doc Doc)
-> String
-> Const Doc String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Doc)
-> (Doc -> Const Doc Doc) -> String -> Const Doc String
forall (p :: * -> * -> *) (f :: * -> *) s a.
(Profunctor p, Contravariant f) =>
(s -> a) -> Optic' p f s a
to String -> Doc
text)

-- | Note that this parser is more lenient than Nix w.r.t. simple identifiers,
--   since it will accept 'nixKeywords'.
--
--   Naturally, it does not support string interpolation, but does not reject
--   strings that contain them. E.g. the string literal @"hello ${world}"@
--   will contain @${world}@ verbatim after parsing. Do not rely on this
--   behavior, as it may be changed in the future.
instance HasParser Identifier where
  parser :: forall st input (m :: * -> *). CharParser st input m Identifier
parser = ParsecT st input m Identifier
forall st input (m :: * -> *). CharParser st input m Identifier
parseQuotedIdentifier ParsecT st input m Identifier
-> ParsecT st input m Identifier -> ParsecT st input m Identifier
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT st input m Identifier
forall st input (m :: * -> *). CharParser st input m Identifier
parseSimpleIdentifier

-- | Parsec parser for simple identifiers, i.e. those that don't need quoting.
--   The parser is equivalent to the regular expression @^[a-zA-Z_][a-zA-Z0-9_'-]*$@
--   which the Nix parser uses.
--
--   Note that this parser will accept keywords which would not be parsed as
--   identifiers by Nix, see 'nixKeywords'.
parseSimpleIdentifier :: CharParser st tok m Identifier
parseSimpleIdentifier :: forall st input (m :: * -> *). CharParser st input m Identifier
parseSimpleIdentifier = do
  Char
c <- (Char -> Bool) -> ParsecT st tok m Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (\Char
x -> Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| (Char -> Bool
isAscii Char
x Bool -> Bool -> Bool
&& Char -> Bool
isAlpha Char
x))
  String
cs <- ParsecT st tok m Char -> ParsecT st tok m String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ((Char -> Bool) -> ParsecT st tok m Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (\Char
x -> Char
x Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"_'-" Bool -> Bool -> Bool
|| (Char -> Bool
isAscii Char
x Bool -> Bool -> Bool
&& Char -> Bool
isAlphaNum Char
x)))
  Identifier -> ParsecT st tok m Identifier
forall a. a -> ParsecT st tok m a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> Identifier
Identifier (Char
cChar -> ShowS
forall a. a -> [a] -> [a]
:String
cs))

-- | 'ReadP' parser for quoted identifiers, i.e. those that /do/ need
-- quoting.
parseQuotedIdentifier :: CharParser st tok m Identifier
parseQuotedIdentifier :: forall st input (m :: * -> *). CharParser st input m Identifier
parseQuotedIdentifier = String -> Identifier
Identifier (String -> Identifier)
-> ParsecT st tok m String -> ParsecT st tok m Identifier
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT st tok m String
forall st tok (m :: * -> *). CharParser st tok m String
qstring
  where
    qstring :: CharParser st tok m String
    qstring :: forall st tok (m :: * -> *). CharParser st tok m String
qstring = ParsecT st tok m Char
-> ParsecT st tok m Char
-> ParsecT st tok m String
-> ParsecT st tok m String
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT st tok m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'"') (Char -> ParsecT st tok m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'"') (ParsecT st tok m Char -> ParsecT st tok m String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT st tok m Char
forall st tok (m :: * -> *). CharParser st tok m Char
qtext)

    qtext :: CharParser st tok m Char
    qtext :: forall st tok (m :: * -> *). CharParser st tok m Char
qtext = ParsecT st tok m Char
forall st tok (m :: * -> *). CharParser st tok m Char
quotedPair ParsecT st tok m Char
-> ParsecT st tok m Char -> ParsecT st tok m Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> String -> ParsecT st tok m Char
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m Char
P.noneOf String
"\\\""

    quotedPair :: CharParser st tok m Char
    quotedPair :: forall st tok (m :: * -> *). CharParser st tok m Char
quotedPair = do
      Char
_ <- Char -> ParsecT st tok m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
P.char Char
'\\'
      Char
c <- ParsecT st tok m Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
anyChar
      -- See https://github.com/NixOS/nix/blob/2d83bc6b83763290e9bbf556209927ba469956aa/src/libexpr/lexer.l#L54-L60
      Char -> ParsecT st tok m Char
forall a. a -> ParsecT st tok m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> ParsecT st tok m Char) -> Char -> ParsecT st tok m Char
forall a b. (a -> b) -> a -> b
$ case Char
c of
                 Char
'n' -> Char
'\n'
                 Char
't' -> Char
'\t'
                 Char
'r' -> Char
'\r'
                 -- Note that this handles actual escapes like \" and \\ and
                 -- bogus cases like \f which Nix doesn't fail on (despite not
                 -- supporting it), but simply maps to plain f
                 Char
_ -> Char
c

-- | Checks whether a given string needs quoting when interpreted as an
-- 'Identifier'.
needsQuoting :: String -> Bool
needsQuoting :: String -> Bool
needsQuoting String
s =
  String
s String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [String]
nixKeywords
  Bool -> Bool -> Bool
|| Either ParseError () -> Bool
forall a b. Either a b -> Bool
isLeft (Parsec String () ()
-> () -> String -> String -> Either ParseError ()
forall s t u a.
Stream s Identity t =>
Parsec s u a -> u -> String -> s -> Either ParseError a
runParser (ParsecT String () Identity Identifier
forall st input (m :: * -> *). CharParser st input m Identifier
parseSimpleIdentifier ParsecT String () Identity Identifier
-> Parsec String () () -> Parsec String () ()
forall a b.
ParsecT String () Identity a
-> ParsecT String () Identity b -> ParsecT String () Identity b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parsec String () ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof) () String
"" String
s)

-- | List of strings that are parseable as simple identifiers (see
--   'parseSimpleIdentifier') in isolation, but won't be accepted by Nix because
--   [keywords](https://nix.dev/manual/nix/2.30/language/identifiers.html#keywords)
--   take precedence.
nixKeywords :: [String]
nixKeywords :: [String]
nixKeywords =
  [ String
"assert", String
"with", String
"if", String
"then", String
"else", String
"let", String
"in", String
"rec", String
"inherit", String
"or" ]

-- | Helper function to quote a given identifier string if necessary.
--   Usually, one should use the 'Pretty' instance of 'Identifier' instead.
--
-- >>> putStrLn (quote "abc")
-- abc
-- >>> putStrLn (quote "abc.def")
-- "abc.def"
-- >>> putStrLn (quote "$foo")
-- "$foo"
-- >>> putStrLn (quote "${foo}")
-- "\${foo}"
quote :: String -> String
quote :: ShowS
quote String
s = if String -> Bool
needsQuoting String
s then Char
'"' Char -> ShowS
forall a. a -> [a] -> [a]
: ShowS
quote' String
s else String
s
  where
    quote' :: ShowS
quote' (Char
c1:Char
c2:String
cs) = Char -> Maybe Char -> String
escapeChar Char
c1 (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
c2) String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
quote' (Char
c2Char -> ShowS
forall a. a -> [a] -> [a]
:String
cs)
    quote' (Char
c:String
cs) = Char -> Maybe Char -> String
escapeChar Char
c Maybe Char
forall a. Maybe a
Nothing String -> ShowS
forall a. [a] -> [a] -> [a]
++ ShowS
quote' String
cs
    quote' String
"" = String
"\""

escapeChar :: Char -> Maybe Char -> String
escapeChar :: Char -> Maybe Char -> String
escapeChar Char
c1 Maybe Char
c2 =
  case Char
c1 of
    -- supported escape sequences, see quotedPair above
    -- N.B. technically, we only need to escape \r (since Nix converts raw \r to \n),
    -- but it's nicer to escape what we can.
    Char
'\n' -> String
"\\n"
    Char
'\t' -> String
"\\t"
    Char
'\r' -> String
"\\r"
    -- syntactically significant in doubly quoted strings
    Char
'\\' -> String
"\\\\"
    Char
'"' -> String
"\\\""
    Char
'$' | Maybe Char
c2 Maybe Char -> Maybe Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'{' -> String
"\\$"
    Char
_ -> [Char
c1]