{-
  Copyright (c) Meta Platforms, Inc. and affiliates.
  All rights reserved.

  This source code is licensed under the BSD-style license found in the
  LICENSE file in the root directory of this source tree.
-}

module Glean.Query.UserQuery
  ( userQueryFacts
  , userQuery
  , userQueryWrites
  , schemaVersionForQuery
  ) where

import Control.Applicative
import Control.DeepSeq
import Control.Exception
import Control.Monad
import Control.Monad.Except
import Data.Bifunctor (first)
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import Data.Coerce
import Data.Default
import qualified Data.HashMap.Strict as HashMap
import Data.Hashable (hash)
import Data.Int
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Encoding as Text
import Compat.Prettyprinter hiding ((<>))
import Compat.Prettyprinter.Render.Text
import qualified Data.Vector as Vector
import Data.Vector (Vector)
import qualified Data.Vector.Storable as VS
import Data.Word (Word64)
import System.IO
import TextShow

import ServiceData.GlobalStats as Stats
import qualified ServiceData.Types as Stats
import Util.AllocLimit
import Util.Timing
import Util.Log
import Util.STM

import Glean.Display
import qualified Glean.Angle.Parser as Angle
import Glean.Angle.Types hiding (Type, FieldDef, SourcePat_(..))
import qualified Glean.Angle.Types as Angle
import qualified Glean.Internal.Types as Thrift
import qualified Glean.Backend.Types as Backend
import Glean.Schema.Types
  (RefTarget(..), LookupResult(..), NameEnv, resolveRef)
import qualified Glean.Database.Catalog as Catalog
import Glean.Database.Schema.Types
import Glean.Database.Open
import qualified Glean.Database.PredicateStats as PredicateStats
import Glean.Database.Types as Database
import Glean.Database.Writes
import Glean.FFI
import Glean.Query.Codegen
import Glean.Query.Codegen.Types hiding (Match(..))
import Glean.Query.Transform
import Glean.Query.Flatten
import Glean.Query.Opt
import Glean.Query.Reorder
import Glean.Query.Incremental (makeIncremental)
import Glean.RTS as RTS
import Glean.RTS.Bytecode.Disassemble
import qualified Glean.RTS.Bytecode.Gen.Version as Bytecode
import qualified Glean.RTS.Foreign.Bytecode as Bytecode
import Glean.RTS.Foreign.FactSet (FactSet)
import qualified Glean.RTS.Foreign.FactSet as FactSet
import Glean.RTS.Foreign.Lookup
import Glean.RTS.Foreign.Ownership
import Glean.RTS.Foreign.Query
import Glean.RTS.Foreign.Stacked (stacked)
import Glean.RTS.Types (Type, PidRef(..))
import qualified Glean.Types as Thrift
import qualified Glean.ServerConfig.Types as ServerConfig
import Glean.Query.JSON
import Glean.Schema.Resolve
import Glean.Schema.Util
import Glean.Util.Observed as Observed
import Glean.Query.Typecheck
import Glean.Bytecode.SysCalls (userQuerySysCalls)
import Glean.Types (UserQueryCont)
import qualified Glean.Backend.Types as StackedDbOpts
import Glean.Util.Some
import Glean.Query.Typecheck.Types (TcPred)
-- NOTE: We keep the public interface monomorphic, at least for now.

--
-- | Perform a query using the JSON query syntax, and return the results
-- as JSON-encoded Thrift data.
--
userQuery
  :: Database.Env
  -> Thrift.Repo
  -> Thrift.UserQuery
  -> IO Thrift.UserQueryResults
userQuery env repo query = chooseEncoding
  (Thrift.userQuery_options query)
  (Thrift.userQuery_encodings query)
  $ genericUserQuery env repo query

--
-- | Query a list of facts and return the result as JSON-encoded Thrift data.
--
userQueryFacts
  :: Database.Env
  -> Thrift.Repo
  -> Thrift.UserQueryFacts
  -> IO Thrift.UserQueryResults
userQueryFacts env repo query = chooseEncoding
  (Thrift.userQueryFacts_options query)
  (Thrift.userQueryFacts_encodings query)
  $ genericUserQueryFacts env repo query

-- | A generic implementation of scan queries.
genericUserQuery
  :: Encoding e
  => Database.Env
  -> Thrift.Repo
  -> Thrift.UserQuery
  -> e
  -> IO Thrift.UserQueryResults
{-# INLINE genericUserQuery #-}
genericUserQuery env repo query enc = do
  config@ServerConfig.Config{..} <- Observed.get (envServerConfig env)
  readDatabaseWithBoundaries env repo $ \odb bounds lookup ->
    maybe id limitAllocsThrow config_query_alloc_limit
      $ performUserQuery enc (odbSchema odb)
      $ userQueryImpl env odb config NoExtraSteps bounds lookup repo query

-- | A generic implementation of lookup queries.
genericUserQueryFacts
  :: Encoding e
  => Database.Env
  -> Thrift.Repo
  -> Thrift.UserQueryFacts
  -> e
  -> IO Thrift.UserQueryResults
{-# INLINE genericUserQueryFacts #-}
genericUserQueryFacts env repo query enc = do
  config <- Observed.get (envServerConfig env)
  readDatabase env repo $ \odb lookup ->
    performUserQuery enc (odbSchema odb) $
      userQueryFactsImpl (odbSchema odb) config lookup query

-- | Results returned from a query, parametrised of the types of facts and
-- statistics
data Results stats fact = Results
  { -- | Facts matching the query
    resFacts :: [(Fid, fact)]

    -- | Type of the results, if known
  , resPredicate :: Maybe PredicateDetails

    -- | Additional facts satisfying nested parts of the query
  , resNestedFacts :: IntMap fact

    -- | Continuation for paging
  , resCont :: Maybe Thrift.UserQueryCont

    -- | Statistics
  , resStats :: stats

    -- | Diagnostics
  , resDiags :: [Text]

    -- | Write handle for derived facts
  , resWriteHandle :: Maybe Thrift.Handle

    -- | Count of the facts searched per Pid
  , resFactsSearched :: Maybe (Map Int64 Int64)

    -- | Inferred type of the query, for logging
  , resType :: Maybe Text

    -- | Size of compiled bytecode, for logging
  , resBytecodeSize :: Maybe Int

    -- | Query compile time, for logging, in seconds
  , resCompileTime :: Maybe Double

  , resCodegenTime :: Maybe Double

    -- | Query execution time after compilation, for logging, in nanoseconds
  , resExecutionTime :: Maybe Word64

  -- | Size of query results, for logging
  , resResultBytes :: Maybe Int64
  }

class Encoding e where
  type EncodedFact e

  shouldExpand :: e -> Bool
  serializeFact
    :: e
    -> IntMap (EncodedFact e)
    -> Fid
    -> PredicateDetails
    -> Thrift.Fact
    -> IO (EncodedFact e)
  setResults
    :: e
    -> Results stats (EncodedFact e)
    -> Thrift.UserQueryResults
    -> Thrift.UserQueryResults

withEncoding
  :: Thrift.UserQueryEncoding
  -> (forall e. Encoding e => e -> a)
  -> Maybe a
{-# INLINE withEncoding #-}
withEncoding (Thrift.UserQueryEncoding_bin x) f = Just $ f x
withEncoding (Thrift.UserQueryEncoding_json x) f = Just $ f x
withEncoding (Thrift.UserQueryEncoding_compact x) f = Just $ f x
withEncoding (Thrift.UserQueryEncoding_listbin x) f = Just $ f x
withEncoding _ _ = Nothing

chooseEncoding
  :: Maybe Thrift.UserQueryOptions
  -> [Thrift.UserQueryEncoding]
  -> (forall e. Encoding e => e -> IO a)
  -> IO a
{-# INLINE chooseEncoding #-}
chooseEncoding mopts encs f = do
  forM_ (Thrift.userQueryOptions_continuation =<< mopts) checkUserQueryCont
  case encs of
    [] -> f $ LegacyJSONEncoding $ fromMaybe def mopts
    _ | act : _ <- mapMaybe (`withEncoding` f) encs -> act
      | otherwise -> throwIO $ Thrift.BadQuery
          "none of the requested encodings are supported"

newtype LegacyJSONEncoding = LegacyJSONEncoding Thrift.UserQueryOptions

instance Encoding LegacyJSONEncoding where
  type EncodedFact LegacyJSONEncoding = ByteString

  shouldExpand (LegacyJSONEncoding opts) =
    Thrift.userQueryOptions_expand_results opts

  serializeFact
    (LegacyJSONEncoding opts)
    serialized
    fid
    predicateDetails
    Thrift.Fact{..} = factToJSON
      (Thrift.userQueryOptions_no_base64_binary opts)
      serialized
      predicateDetails
      fid
      fact_key
      fact_value

  setResults _ res qres = qres
      { Thrift.userQueryResults_facts = snd <$> resFacts res
      , Thrift.userQueryResults_nestedFacts = Map.fromList
          [ (fromIntegral i, fact)
            | (i,fact) <- IntMap.toList $ resNestedFacts res ]
      }

instance Encoding Thrift.UserQueryEncodingJSON where
  type EncodedFact Thrift.UserQueryEncodingJSON = ByteString

  shouldExpand = Thrift.userQueryEncodingJSON_expand_results

  serializeFact enc serialized fid predicateDetails Thrift.Fact{..} =
    factToJSON
      (Thrift.userQueryEncodingJSON_no_base64_binary enc)
      serialized
      predicateDetails
      fid
      fact_key
      fact_value

  setResults enc res qres = qres
    { Thrift.userQueryResults_results = Thrift.UserQueryEncodedResults_json
        Thrift.UserQueryResultsJSON
          { userQueryResultsJSON_encoding = enc
          , userQueryResultsJSON_facts = snd <$> resFacts res
          , userQueryResultsJSON_nestedFacts = Map.fromList
              [ (fromIntegral i, fact)
                | (i,fact) <- IntMap.toList $ resNestedFacts res ]
          }
    }

instance Encoding Thrift.UserQueryEncodingCompact where
  type EncodedFact Thrift.UserQueryEncodingCompact = ByteString

  shouldExpand = Thrift.userQueryEncodingCompact_expand_results

  serializeFact _ serialized fid predicateDetails Thrift.Fact{..} =
    factToCompact
      serialized
      predicateDetails
      fid
      fact_key
      fact_value

  setResults enc res qres = qres
    { Thrift.userQueryResults_results = Thrift.UserQueryEncodedResults_compact
        Thrift.UserQueryResultsCompact
          { userQueryResultsCompact_encoding = enc
          , userQueryResultsCompact_facts = snd <$> resFacts res
          , userQueryResultsCompact_nestedFacts = Map.fromList
              [ (fromIntegral i, fact)
                | (i,fact) <- IntMap.toList $ resNestedFacts res ]
          }
    }

instance Encoding Thrift.UserQueryEncodingBin where
  type EncodedFact Thrift.UserQueryEncodingBin = Thrift.Fact

  shouldExpand _ = False

  serializeFact _ _ _ _ = return

  setResults enc res qres = qres
    { Thrift.userQueryResults_results = Thrift.UserQueryEncodedResults_bin
        Thrift.UserQueryResultsBin
          { userQueryResultsBin_encoding = enc
          , userQueryResultsBin_facts =
              Map.fromList
              $ coerce
              $ resFacts res
          , userQueryResultsBin_nestedFacts =
              Map.fromList
              $ map (first fromIntegral)
              $ IntMap.toList
              $ resNestedFacts res
          }
    }

instance Encoding Thrift.UserQueryEncodingListBin where
  type EncodedFact Thrift.UserQueryEncodingListBin = Thrift.Fact

  shouldExpand _ = False

  serializeFact _ _ _ _ = return

  setResults enc res qres = qres
    { Thrift.userQueryResults_results = Thrift.UserQueryEncodedResults_listbin
        Thrift.UserQueryResultsListBin
          { userQueryResultsListBin_encoding = enc
          , userQueryResultsListBin_ids =
              coerce $ Vector.fromList (map fst (resFacts res))
          , userQueryResultsListBin_facts =
              Vector.fromList (map snd (resFacts res))
          , userQueryResultsListBin_nestedFacts =
              Map.fromList
              $ map (first fromIntegral)
              $ IntMap.toList
              $ resNestedFacts res
          }
    }

-- | Perform a query and convert the 'Results' to a Thrift result
performUserQuery
  :: Encoding e
  => e
  -> DbSchema
  -> IO (Results Stats Thrift.Fact)
  -> IO Thrift.UserQueryResults
{-# INLINE performUserQuery #-}
performUserQuery encoding schema query = do
  !results <- withStats $ do
    res <- query

    -- Convert nested facts - we do it in the order of their fact ids
    -- which means we can only depend on facts converted earlier.
    nested <- foldM
      (\expanded (fid, fact@Thrift.Fact{..}) -> do
          details <- pidDetails schema fact_type
          encoded <- serializeFact
            encoding
            (if shouldExpand encoding then expanded else IntMap.empty)
            (Fid $ fromIntegral fid)
            details
            fact
          return $ IntMap.insert fid encoded expanded)
      IntMap.empty
      (IntMap.toList $ resNestedFacts res)

    let
      -- avoid lookupPid if we know the result type
      getDetails = case resPredicate res of
        Just d -> const (return d)
        Nothing -> pidDetails schema

    facts <- forM (resFacts res) $ \(i, fact) -> do
      details <- getDetails (Thrift.fact_type fact)
      enc <- serializeFact
        encoding
        (if shouldExpand encoding then nested else IntMap.empty)
        i
        details
        fact
      return (i,enc)

    return res
      { resFacts = facts
      , resNestedFacts = if shouldExpand encoding then IntMap.empty else nested
      }
  return $ setResults encoding results def
    { Thrift.userQueryResults_stats = Just $ resStats results
    , Thrift.userQueryResults_continuation = resCont results
    , Thrift.userQueryResults_diagnostics = resDiags results
    , Thrift.userQueryResults_handle = resWriteHandle results
    , Thrift.userQueryResults_type = resType results
    }

pidDetails :: DbSchema -> Int64 -> IO PredicateDetails
pidDetails schema ty = do
  case lookupPid (Pid (fromIntegral ty)) schema of
    Nothing ->  throwIO $ Thrift.Exception $
      "failed looking up fact type: " <> Text.pack (show ty)
    Just d -> return d

userQueryFactsImpl
  :: DbSchema
  -> ServerConfig.Config
  -> Lookup
  -> Thrift.UserQueryFacts
  -> IO (Results Stats Thrift.Fact)
     -- The length of the result list is guaranteed to be the same
     -- as the userQueryFacts_facts list in the input.
userQueryFactsImpl
    schema@DbSchema{..}
    config
    lookup
    query@Thrift.UserQueryFacts{..} = do
  let opts = fromMaybe def userQueryFacts_options

  schemaSelector <- schemaVersionForQuery schema config userQueryFacts_schema_id

  expandPids <- optsExpandPids opts schemaSelector schema
  let limits = mkQueryRuntimeOptions opts config expandPids

  trans <- transformationsForQuery schema schemaSelector

  vlog 2 $ "userQueryFactsImpl: " <> show (length userQueryFacts_facts)
  (qResults@QueryResults{..}, fullScans) <- do
    nextId <- firstFreeId lookup
    -- executeCompiled needs a Define, even though we won't use it
    bracket (FactSet.new nextId) release $ \derived -> do
    let stack = stacked lookup derived
    bracket
      (compileQueryFacts userQueryFacts_facts)
      (release . compiledQuerySub) $ \sub -> do
        results <- executeCompiled schemaInventory Nothing stack sub limits
        appliedTrans <- either (throwIO . Thrift.BadQuery) return $
          userQueryFactsTransformations trans schemaSelector schema query results
        -- use Pids in result facts to apply a suitable transformation if neded.
        return
          ( transformResultsBack appliedTrans results
          , compiledQueryFullScans sub
          )

  stats <- getStats schema fullScans qResults

  let results = Results
        { resFacts = Vector.toList queryResultsFacts
        , resPredicate = Nothing
        , resNestedFacts = mkNestedFacts queryResultsNestedFacts
        , resCont = Nothing
        , resStats = stats
        , resDiags = []
        , resWriteHandle = Nothing
        , resFactsSearched = Nothing
        , resType = Nothing  -- could be facts of different predicates
        , resBytecodeSize = Nothing
        , resCompileTime = Nothing
        , resCodegenTime = Nothing
        , resExecutionTime = Nothing
        , resResultBytes = Nothing
        }

  return $ if Thrift.userQueryOptions_omit_results opts
     then withoutFacts results
     else results

userQueryFactsTransformations
  :: QueryTransformations
  -> SchemaSelector
  -> DbSchema
  -> Thrift.UserQueryFacts
  -> QueryResults
  -> Either Text ResultTransformations
userQueryFactsTransformations qtrans selector schema query results = do
  nameEnv <-  maybe (Left "invalid schema_id") return $ do
    schemaNameEnv schema selector

  let predRefs :: [PidRef]
      predRefs = mapMaybe (toPidRef nameEnv) sourceRefs

      -- a type containing all types in the response
      allTypes :: Type
      allTypes = Angle.RecordTy
        [ Angle.FieldDef "" (PredicateTy () predId) | predId <- predRefs ]

  -- errors if multiple versions of the same predicate are requested.
  transformationsFor schema qtrans allTypes
  where
    sourceRefs :: [SourceRef]
    sourceRefs = mapMaybe toRef $ Set.toList vset
      where
        vset :: Set (Pid, Maybe Version)
        vset = Vector.foldr insert mempty zipped
        zipped = Vector.zip (queryResultsFacts results) versions
        insert ((_, fact), version) acc =
          Set.insert (Pid $ Thrift.fact_type fact, version) acc

        versions :: Vector (Maybe Version)
        versions = Vector.fromList $
          Thrift.factQuery_predicate_version <$>
            Thrift.userQueryFacts_facts query

        toRef :: (Pid, Maybe Version) -> Maybe SourceRef
        toRef (pid, mversion) = do
          details <- lookupPid pid schema
          let PredicateRef name _ = predicateRef details
          return $ SourceRef name mversion

    toPidRef :: NameEnv RefTargetId -> SourceRef -> Maybe PidRef
    toPidRef nameEnv ref = do
      ResolvesTo (RefPred predId) <- return $ resolveRef nameEnv ref
      details <- lookupPredicateId predId schema
      return (pidRef details)

-- | A version of userQuery where we only care about the resulting writes
-- caused by the query. Used for stored predicate derivation.
userQueryWrites
  :: Database.Env
  -> OpenDB s
  -> ServerConfig.Config
  -> Boundaries
  -> Lookup
  -> Thrift.Repo
  -> PredicateId
  -> Thrift.UserQuery
  -> IO (
       Thrift.UserQueryStats,
       Maybe Thrift.UserQueryCont,
       Maybe Thrift.Handle
     )
userQueryWrites env odb config bounds lookup repo pred q = do
  meta <- atomically $ Catalog.readMeta (envCatalog env) repo
  inc <- shouldDeriveIncrementally meta
  mode <-
    if inc
    then IncrementalDerivation <$> sectionsStats
    else return NoExtraSteps
  Results{..} <- withStats $
    userQueryImpl env odb config mode bounds lookup repo q
  return (resStats, resCont, resWriteHandle)
  where
    -- We derive incrementally if
    --   1. the DB is stacked, and
    --   2. the predicate was derived in the base DB
    --
    -- (2) might be false if we're deriving a predicate in a
    -- stacked DB that we didn't derive on the base DB.
    shouldDeriveIncrementally meta =
      case Thrift.metaDependencies meta of
        Just (Thrift.Dependencies_stacked Thrift.Stacked{..}) ->
          check $ Thrift.Repo stacked_name stacked_hash
        Just (Thrift.Dependencies_pruned pruned) ->
          check (Thrift.pruned_base pruned)
        Nothing -> return False
      where
      check base = do
        meta <- atomically $ Catalog.readMeta (envCatalog env) base
        return $ predicateIdRef pred `elem` Thrift.metaCompletePredicates meta

    -- Here the best we can do is say whether a Pid could have facts in a DB.
    -- Because of ownership slicing it may be that even though
    -- `Backend.predicateStats` gives a positive number all facts could be
    -- filtered out.
    sectionsStats :: IO (SeekSection -> Pid -> Bool)
    sectionsStats = do
      stackStats <- stats Backend.IncludeBase
      topStats <- stats Backend.ExcludeBase
      let baseStats = stackStats `minus` topStats
          pidHasFacts seekWhere (Pid pid) = case seekWhere  of
            SeekOnAllFacts -> hasFacts pid stackStats
            SeekOnBase -> hasFacts pid baseStats
            SeekOnStacked -> hasFacts pid topStats
      return pidHasFacts

    stats opts =
      fmap Thrift.predicateStats_count
      <$> PredicateStats.predicateStats env repo opts
    minus = Map.unionWith (-)
    hasFacts pid m = maybe False (> 0) $ Map.lookup pid m

userQueryImpl
  :: Database.Env
  -> OpenDB s
  -> ServerConfig.Config
  -> CompilationMode
  -> Boundaries
  -> Lookup
  -> Thrift.Repo
  -> Thrift.UserQuery
  -> IO (Results Stats Thrift.Fact)

userQueryImpl
  env
  odb
  config
  mode
  bounds
  lookup
  repo
  query@Thrift.UserQuery{..} = do
    let opts = fromMaybe def userQuery_options

    case Thrift.userQueryOptions_syntax opts of
      Thrift.QuerySyntax_ANGLE -> return ()
      other -> throwIO $ Thrift.Exception $
        "query syntax not supported: " <> Text.pack (show other)

    let
      schema = odbSchema odb
      stored = Thrift.userQueryOptions_store_derived_facts opts
      debug = Thrift.userQueryOptions_debug opts

    schemaVersion <-
      schemaVersionForQuery schema config userQuery_schema_id

    (returnType,compileTime,diag,cont) <-
      case Thrift.userQueryOptions_continuation opts of
        Just ucont
          | Just retTy <- Thrift.userQueryCont_returnType ucont -> do
          (compileTime, _, returnType) <-
            timeIt $ compileType schema schemaVersion retTy
          return (returnType,compileTime,[],Right ucont)

        -- This is either a new query or the continuation of a query
        -- that returns a temporary predicate.
        _ -> do
          (compileTime, _, (query@QueryWithInfo{..}, ty, preds)) <-
            timeIt $ compileAngleQuery
              (envEnableRecursion env)
              schemaVersion
              schema
              mode
              userQuery_query
              stored
              (envDebug env)

          predDiag <- if Thrift.queryDebugOptions_pred_has_facts debug then
              getPredDiags env repo schema preds
              else return []
          let
            irDiag =
              [ "ir:\n" <> Text.pack (show (displayDefault qiQuery))
              | Thrift.queryDebugOptions_ir debug ]

            cont = case Thrift.userQueryOptions_continuation opts of
              Just c -> Right c
              Nothing -> Left query
          return (ty,compileTime,irDiag <> predDiag,cont)

    details <- getReturnPredicateDetails schema returnType

    if Thrift.userQueryOptions_just_check opts then do
      return $ emptyResult {resDiags = diag}
    else do
      let compileInfo = CompileInfo {
        returnType = returnType,
        compileTime = compileTime,
        compileDiag = diag,
        cont = cont
        }
      runQuery env odb config bounds lookup repo details compileInfo query

data CompileInfo = CompileInfo {
  returnType :: Type,
  compileTime :: Double,
  compileDiag :: [Text],
  cont :: Either CodegenQuery UserQueryCont
}

getPredDiags
  :: Database.Env
  -> Thrift.Repo
  -> DbSchema
  -> [TcPred]
  -> IO [Text]
getPredDiags env repo schema preds = do
  predStats <- PredicateStats.predicateStats env repo StackedDbOpts.IncludeBase
  let predsMissing = filter (\p -> not (tcPredExist p predStats)) preds
  return $ map warnDiag predsMissing
    where
      tcPredExist (PidRef pid _, _) predStats =
        derived schema pid || hasFacts predStats pid
      -- we assume derived predicates "exist" since we won't find any facts
      derived schema pid = case lookupPid pid schema of
        Just details -> case predicateDeriving details of
          Derive DeriveOnDemand _ -> True
          Derive DeriveIfEmpty _ -> True
          _ -> False
        Nothing -> error "Unknown Pid in getPredDiags"
      hasFacts predStats pid = case Map.lookup (fromPid pid) predStats of
        Just stat -> Thrift.predicateStats_count stat > 0
        Nothing -> False
      warnDiag :: TcPred -> Text
      warnDiag (PidRef _ (PredicateId predRef _), Some span) = Text.pack $
        "Warning: "
        <> show (pretty span) ++ " "
        <> Text.unpack (predicateRef_name predRef)
        <> Text.unpack " doesn't exist in " ++ show (Thrift.repo_name repo)

getReturnPredicateDetails :: DbSchema -> Type -> IO PredicateDetails
getReturnPredicateDetails schema@DbSchema{..} returnType = do
  case returnType of
    Angle.PredicateTy _ (PidRef pid _) ->
      case IntMap.lookup (fromIntegral (fromPid pid)) predicatesByPid of
        Nothing -> throwIO $ Thrift.Exception "internal: no predicate"
        Just d -> return d

    _not_a_predicate -> do
      return
          -- This is a temporary Pid generated by Flatten.captureKey,
          -- because the query doesn't return facts. In this case we
          -- are pretending the query returns facts for a new predicate,
          -- so we need a PredicateDetails to pass to unfoldFacts below.
          -- (this is temporary, unfoldFacts wil go away in due course
          -- when we compile it to bytecode).
          PredicateDetails
            { predicatePid = tempPid schema
            , predicateKeyType = returnType
            , predicateValueType = unit
            , predicateId = tempPredicateId
            , predicateSchema = error "predicateSchema"
            , predicateTraversal = error "predicateTraversal"
            , predicateTypecheck = error "predicateTypecheck"
            , predicateDeriving = NoDeriving
            , predicateInStoredSchema = False
            }

runQuery
  :: Database.Env
  -> OpenDB s
  -> ServerConfig.Config
  -> Boundaries
  -> Lookup
  -> Thrift.Repo
  -> PredicateDetails
  -> CompileInfo
  -> Thrift.UserQuery
  -> IO (Results Stats Thrift.Fact)
runQuery
  env
  odb
  config
  bounds
  lookup
  repo
  details
  CompileInfo{..}
  Thrift.UserQuery{..} = do
    vlog 2 $ "return type: " <> show (displayDefault returnType)

    let
      schema@DbSchema{..} = odbSchema odb
      opts = fromMaybe def userQuery_options
      stored = Thrift.userQueryOptions_store_derived_facts opts

    schemaVersion <-
        schemaVersionForQuery schema config userQuery_schema_id
    trans <- transformationsForQuery schema schemaVersion

    unless (Text.null userQuery_predicate) $ do
      -- With Angle queries, setting userQuery_predicate is
      -- optional. If the client sets it, we will check it: this
      -- can be a useful way to catch errors in the client.
      -- If the query is not returning whole facts, then the
      -- client should set this field to "".
      let ref = SourceRef userQuery_predicate userQuery_predicate_version
      checkPredicatesMatch schema details ref schemaVersion

    expandPids <- optsExpandPids opts schemaVersion schema
    let limits = mkQueryRuntimeOptions opts config expandPids

    nextId <- case Thrift.userQueryOptions_continuation opts of
      Just Thrift.UserQueryCont{..}
        | userQueryCont_nextId > 0 -> return (Fid userQueryCont_nextId)
      _otherwise -> firstFreeId lookup
    derived <- FactSet.new nextId
    let stack = stacked lookup derived

    defineOwners <- if stored
      then do
        maybeOwnership <- readTVarIO (odbOwnership odb)
        forM maybeOwnership $ \ownership ->
          newDefineOwnership ownership nextId
      else return Nothing

    appliedTrans <- either (throwIO . Thrift.BadQuery) return $
      transformationsFor schema trans returnType

    ( qResults@QueryResults{..}
      , queryDiag
      , bytecodeSize
      , codegenTime
      , fullScans ) <-
      case cont of
        Right ucont -> do
          let binaryCont = Thrift.userQueryCont_continuation ucont
          results <- transformResultsBack appliedTrans <$>
            restartCompiled
              schemaInventory
              defineOwners
              stack
              (Just $ predicatePid details)
              limits
              binaryCont
          return (results, [], B.length binaryCont, 0, [])

        Left query -> do
          let
            debug = Thrift.userQueryOptions_debug opts
            bytecodeDiag sub =
              [ "bytecode:\n" <> Text.unlines
                (disassemble "Query" userQuerySysCalls $ compiledQuerySub sub)
              | Thrift.queryDebugOptions_bytecode debug ]

          bracket
            (timeIt $ compileQuery (envEnableRecursion env) trans bounds query)
            (\(_, _, sub) -> release $ compiledQuerySub sub)
            $ \(codegenTime, _, sub) -> do
              results <- transformResultsBack appliedTrans <$>
                executeCompiled schemaInventory defineOwners stack sub limits

              diags <-
                evaluate $ force (bytecodeDiag sub) -- don't keep sub alive
              sz <- evaluate $ Bytecode.size (compiledQuerySub sub)
              let fullScans = compiledQueryFullScans sub
              return (results, diags, sz, codegenTime, fullScans)

    -- If we're storing derived facts, queue them for writing and
    -- return the handle. We allow querying for stored derived
    -- predicates with stored=True on a read-only DB; this is used
    -- by the regression testing framework to test derived predicates.
    maybeWriteHandle <-
      if stored && (
          isJust (odbWriting odb) ||
          Thrift.userQueryOptions_omit_results opts)
        then writeDerivedFacts env repo nextId derived defineOwners
          queryResultsFacts
        else return Nothing

    userCont <- case queryResultsCont of
      Nothing -> return Nothing
      Just bs -> do
        nextId <- firstFreeId derived
        return $ Just $ mkUserQueryCont (Right returnType) bs nextId

    stats <- getStats schema fullScans qResults

    when (isJust userCont) $
      addStatValueType "glean.query.truncated" 1 Stats.Sum

    let ppType = renderStrict $ layoutPretty defaultLayoutOptions $
          displayDefault returnType

        results = Results
          { resFacts = Vector.toList queryResultsFacts
          , resPredicate = Just details
          , resNestedFacts = mkNestedFacts queryResultsNestedFacts
          , resCont = userCont
          , resStats = stats
          , resDiags = compileDiag ++ queryDiag
          , resWriteHandle = maybeWriteHandle
          , resFactsSearched = queryResultsStats
          , resType = Just ppType
          , resBytecodeSize = Just bytecodeSize
          , resCompileTime = Just compileTime
          , resCodegenTime = Just codegenTime
          , resExecutionTime = Just queryResultsElapsedNs
          , resResultBytes = Just queryResultsResultBytes
          }

    return $ if Thrift.userQueryOptions_omit_results opts
      then withoutFacts results
      else results

transformationsForQuery
  :: DbSchema
  -> SchemaSelector
  -> IO QueryTransformations
transformationsForQuery schema selector = do
  case allSchemaVersion schema selector of
    Nothing -> throwIO $ Thrift.BadQuery "invalid schema_id"
    Just schemaId ->
      let transMap = predicatesTransformations schema in
      case HashMap.lookup schemaId transMap of
        Just trans -> return trans
        Nothing -> throwIO $ Thrift.BadQuery "no transformations for schema_id"

schemaVersionForQuery
  :: DbSchema
  -> ServerConfig.Config
  -> Maybe Thrift.SchemaId  -- ^ SchemaId specified by client
  -> IO SchemaSelector
schemaVersionForQuery schema ServerConfig.Config{..} qid = do
  use <-
    case qid of
      Nothing -> return LatestSchema
      Just id
        | id `Map.member` schemaEnvs schema -> return (SpecificSchemaId id)
        | config_strict_query_schema_id ->
            throwIO (Thrift.UnknownSchemaId id)
        | otherwise -> do
            logWarning $ "schema unavailable: " <> show id
            return LatestSchema

  vlog 1 $ "using schema ID: " <> show (pretty use)
  return use

optsExpandPids
  :: Thrift.UserQueryOptions
  -> SchemaSelector
  -> DbSchema
  -> IO (Set Pid)
optsExpandPids opts schemaVersion dbSchema =
  fmap Set.fromList $ forM (Thrift.userQueryOptions_expand_predicates opts) $
    \(Thrift.SourcePredicate name version) -> do
      let ref = SourceRef name version
      case lookupPredicateSourceRef ref schemaVersion dbSchema of
        Left err -> throwIO $ Thrift.BadQuery err
        Right details -> return (predicatePid details)

data CompilationMode
  = NoExtraSteps
  | IncrementalDerivation (SeekSection -> Pid -> Bool)

compileAngleQuery
  :: EnableRecursion
  -> SchemaSelector
    -- ^ Schema version to resolve unversioned predicates
  -> DbSchema
  -> CompilationMode
    -- ^ only used in predicate derivations on incremental dbs
  -> ByteString
  -> Bool
  -> DebugFlags
  -> IO (CodegenQuery, Type, [TcPred])
compileAngleQuery rec ver dbSchema mode source stored debug = do
  parsed <- checkBadQuery Text.pack $ Angle.parseQuery source
  ifDebug $ "parsed query: " <> show (displayDefault parsed)

  let scope = addTmpPredicate $ fromMaybe HashMap.empty $
        schemaNameEnv dbSchema ver

  resolved <- checkBadQuery id $ runExcept $
    runResolve latestAngleVersion scope $ resolveQuery parsed
  ifDebug $ "resolved query: " <> show (displayDefault resolved)

  (typechecked, preds) <- (checkBadQuery id =<<) $ runExceptT $
    typecheck dbSchema (defaultTcOpts debug latestAngleVersion)
      (dbSchemaRtsType dbSchema) resolved
  ifDebug $ "typechecked query: " <> show (displayDefault (qiQuery typechecked))

  flattened <- checkBadQuery id $ runExcept $
    flatten rec dbSchema latestAngleVersion stored typechecked
  ifDebug $ "flattened query: " <> show (displayDefault (qiQuery flattened))

  optimised <- checkBadQuery id $ runExcept $ optimise flattened
  ifDebug $ "optimised query: " <> show (displayDefault (qiQuery optimised))

  reordered <- checkBadQuery id $ runExcept $ reorder dbSchema optimised
  ifDebug $ "reordered query: " <> show (displayDefault (qiQuery reordered))

  final <- case mode of
    NoExtraSteps -> return reordered
    IncrementalDerivation getStats -> do
      vlog 2 "made incremental"
      return $ makeIncremental getStats reordered

  return (final, qiReturnType typechecked, preds)
  where
  ifDebug = when (queryDebug debug) . hPutStrLn stderr

  checkBadQuery :: (err -> Text) -> Either err a -> IO a
  checkBadQuery txt act = case act of
    Left str -> throwIO $ Thrift.BadQuery $ txt str
    Right a -> return a

-- | Put the nested facts in the right form for the conversion to
-- (JSON, Compact, Bin).
mkNestedFacts :: Vector (Fid, Thrift.Fact) -> IntMap Thrift.Fact
mkNestedFacts facts =
  IntMap.fromList
    [ (fromIntegral (fromFid id), f)
    | (id,f) <- Vector.toList facts ]

withoutFacts :: Results stats fact -> Results stats fact
withoutFacts results = results
    { resFacts = mempty
    , resNestedFacts = mempty
    }

mkQueryRuntimeOptions
  :: Thrift.UserQueryOptions
  -> ServerConfig.Config
  -> Set Pid
  -> QueryRuntimeOptions
mkQueryRuntimeOptions
    Thrift.UserQueryOptions{..} ServerConfig.Config{..} expandPids =
  QueryRuntimeOptions
    { queryMaxResults = userQueryOptions_max_results
        <|> config_default_max_results -- from ServerConfig
    , queryMaxBytes = userQueryOptions_max_bytes
        <|> config_default_max_bytes -- from ServerConfig
    , queryMaxTimeMs = userQueryOptions_max_time_ms
        <|> config_default_max_time_ms -- from ServerConfig
    , queryMaxSetSize = config_max_set_size_bytes
    , queryWantStats = userQueryOptions_collect_facts_searched
    , queryDepth = if
        | userQueryOptions_recursive && not userQueryOptions_omit_results ->
          ExpandRecursive
        | not (null expandPids) -> ExpandPartial expandPids
        | otherwise -> ResultsOnly
    }

emptyResult :: Results Stats fact
emptyResult = Results {
    resFacts = mempty
  , resPredicate = Nothing
  , resNestedFacts = mempty
  , resCont = Nothing
  , resStats = Stats {
      statFactCount = 0
    , statResultCount = 0
    , statFullScans = []
    }
  , resDiags = []
  , resWriteHandle = Nothing
  , resFactsSearched = Nothing
  , resType = Nothing
  , resBytecodeSize = Nothing
  , resCompileTime = Nothing
  , resCodegenTime = Nothing
  , resExecutionTime = Nothing
  , resResultBytes = Nothing
  }


{- Note [Writing derived facts]

When deriving, we
  (1) Run a query that produces all the derived facts
  (2) Serialize the FactSet
  (3) Write this to the DB

As the query runs, it writes derived facts into a FactSet and also
produces a set of results. The results are correct, however the
FactSet may contain some additional invalid facts. For example, this
can happen if the derived predicate is something like

predicate P : string
  X where
    Q X;
    X != "foo"

when we compile the query, it can look something like

F where
  Q X;
  F = P<- X;    -- (*)
  X != "foo"

The statement labelled (*) produces the derived fact. Note that this
is just a regular statement; it can get arbitrarily reordered relative
to the other statements. In this case the compiler chose to put it
before the final filter X != "foo", which means that we'll create some
derived facts that aren't actually true. There's nothing wrong with
this: the result of the query is still correct, but we have to be
careful to use the query results and not just write the whole contents
of the FactSet to the DB.

It would be difficult to ensure the compiler always ordered F = P<- X
after the filter, there's nothing requiring it to do this. We can't
use statement ordering for this, because statement ordering is for
optimisation and shouldn't change sementics. The compiler can override
ordering hints if it wants.

The right thing to do is to use the query results to filter the
FactSet, and fortunately there's a good place to do this: we already
reorder the results when we serialize the FactSet (for ownership
reasons), so we can do the filtering there.
-}

writeDerivedFacts
  :: Env
  -> Thrift.Repo
  -> Fid
  -> FactSet
  -> Maybe DefineOwnership
  -> Vector (Fid, Thrift.Fact)
  -> IO (Maybe Thrift.Handle)
writeDerivedFacts env repo firstId derived owned results = do
  -- See Note [Writing Derived Facts] for why we filter here
  let order = VS.fromList
         [ fromFid fid
         | (fid,_) <- Vector.toList results
         , fid >= firstId  -- ignore existing facts
         ]
  batch <- case owned of
    Nothing -> FactSet.serializeReorder derived order
    Just define -> do
      nextId <- firstFreeId derived
      let count = fromIntegral (fromFid nextId - fromFid firstId)
      sorted <- defineOwnershipSortByOwner define count order
      FactSet.serializeReorder derived sorted
  -- If the batch is empty, we may still have new ownership data about
  -- existing facts, so we have to write that
  if Thrift.batch_count batch == 0 && isNothing owned
    then return Nothing
    else do
      resp <- enqueueBatch env Thrift.ComputedBatch
        { computedBatch_repo = repo
        , computedBatch_remember = True
        , computedBatch_batch = batch }
        owned
      case resp of
        Thrift.SendResponse_handle handle -> return (Just handle)
        Thrift.SendResponse_retry (Thrift.BatchRetry s) ->
          throwIO $ Thrift.Retry s


-- | Check that the predicate declared in the UserQuery request
-- matches the actual type of the query as determined by the
-- typechecker. This adds some rudimentary type safety: the predicate
-- declared in the UserQuery is typically the one expected at the call
-- site.
checkPredicatesMatch
  :: DbSchema
  -> PredicateDetails
  -> SourceRef
  -> SchemaSelector
  -> IO ()
checkPredicatesMatch dbSchema details predicate schemaVer = do
  case lookupPredicateSourceRef predicate schemaVer dbSchema of
    Left err -> throwIO $ Thrift.BadQuery err
    Right details' ->
      if predicatePid details == predicatePid details'
        then return ()
        else throwIO $ Thrift.BadQuery $ mconcat
          [ "predicate " <> showRef predicate <>
            " does not match type of query: "
          <> predicateRef_name (predicateRef details) <> "."
          <> showt (predicateRef_version (predicateRef details)) ]


data Stats = Stats
  { statFactCount :: {-# UNPACK #-} !Int
  , statResultCount :: {-# UNPACK #-} !Int
  , statFullScans :: [PredicateRef]
  }

getStats :: DbSchema -> [Pid] -> QueryResults -> IO Stats
getStats schema fullScans QueryResults{..} = do
  let
    results =
      Vector.length queryResultsFacts

    facts =
      Vector.length queryResultsFacts +
      Vector.length queryResultsNestedFacts

    pref pid = case lookupPid pid schema of
      Nothing -> error "Unknown Pid in getStats"
      Just details -> predicateIdRef $ predicateId details

  addStatValueType "glean.query.facts" facts Stats.Sum
  addStatValueType "glean.query.results" results Stats.Sum
  return $ Stats
    { statFactCount = facts
    , statResultCount  = results
    , statFullScans = map pref fullScans
    }

withStats :: IO (Results Stats fact) -> IO (Results Thrift.UserQueryStats fact)
withStats io = do
  (secs, bytes, res) <- timeIt io
  let stats = Thrift.UserQueryStats
        { Thrift.userQueryStats_num_facts =
            fromIntegral $ statFactCount $ resStats res
        , Thrift.userQueryStats_result_count =
            fromIntegral $ statResultCount $ resStats res
        , Thrift.userQueryStats_elapsed_ns = truncate (secs * 1000000000)
        , Thrift.userQueryStats_allocated_bytes = fromIntegral bytes
        , Thrift.userQueryStats_facts_searched = resFactsSearched res
        , Thrift.userQueryStats_bytecode_size =
            fromIntegral <$> resBytecodeSize res
        , Thrift.userQueryStats_compile_time_ns =
            fmap (round . (* 1000000000)) (resCompileTime res )
        , Thrift.userQueryStats_codegen_time_ns =
            fmap (round . (* 1000000000)) (resCodegenTime res )
        , Thrift.userQueryStats_execute_time_ns =
            fromIntegral <$> resExecutionTime res
        , Thrift.userQueryStats_full_scans = statFullScans $ resStats res
        , Thrift.userQueryStats_result_bytes =
            fromIntegral <$> resResultBytes res
        }
  return res{ resStats = stats }

mkUserQueryCont
  :: Either (Set Pid) Type
  -> ByteString
  -> Fid
  -> Thrift.UserQueryCont
mkUserQueryCont contInfo cont nextId =
  hashUserQueryCont $ Thrift.UserQueryCont
  { userQueryCont_continuation = cont
  , userQueryCont_nextId = fromFid nextId
  , userQueryCont_version = fromIntegral Bytecode.version
  , userQueryCont_hash = 0
  , userQueryCont_returnType = returnType
  , userQueryCont_pids = pids
  }
  where
    returnType = case contInfo of
      Right ty -> Just $ serializeType ty
      Left _ -> Nothing
    pids = case contInfo of
      Right _ -> []
      Left ps -> map fromPid $ Set.elems ps

checkUserQueryCont :: Thrift.UserQueryCont -> IO ()
checkUserQueryCont cont@Thrift.UserQueryCont{..} = do
  when
    (userQueryCont_version < fromIntegral Bytecode.lowestSupportedVersion
      || userQueryCont_version > fromIntegral Bytecode.version)
    $ throwIO $ Thrift.BadQuery $
        "unsupported query continuation version "
          <> Text.pack (show userQueryCont_version)
  when
    (userQueryCont_hash /= Thrift.userQueryCont_hash (hashUserQueryCont cont))
    $ throwIO $ Thrift.BadQuery "invalid query continuation hash"

hashUserQueryCont :: Thrift.UserQueryCont -> Thrift.UserQueryCont
hashUserQueryCont Thrift.UserQueryCont{..} = Thrift.UserQueryCont
  -- NOTE: The hash is really just a checksum to detect accidental corruption
  -- so a 64 bit non-crypto hash should work fine. Hashable currently uses a
  -- slightly broken (https://github.com/tibbe/hashable/issues/190) version of
  -- FNV-1 but should still be good enough.
  { userQueryCont_hash = fromIntegral $ hash
      ( userQueryCont_continuation
      , userQueryCont_nextId
      , userQueryCont_version
      , userQueryCont_returnType
      )
  , ..
  }

serializeType :: Type -> ByteString
serializeType = Text.encodeUtf8 . renderStrict . layoutCompact .
  displayDefault
    -- Note that we have to print PredicateIds as PredicateRefs, because
    -- the parser will parse it as a source type. The default display options
    -- do exactly that.

compileType :: DbSchema -> SchemaSelector -> ByteString -> IO Type
compileType schema version src = do
  parsed <- checkParsed $ Angle.parseType src
  let scope = addTmpPredicate $ fromMaybe HashMap.empty $
        schemaNameEnv schema version
  resolved <- checkResolved $ runResolve latestAngleVersion scope $
    resolveType parsed
  checkConverted $ dbSchemaRtsType schema resolved
  where
    checkParsed = either (badQuery . Text.pack) return
    checkResolved = either badQuery return . runExcept
    checkConverted = maybe (badQuery "type not present in schema") return

    badQuery :: Text -> IO a
    badQuery err = throwIO $ Thrift.BadQuery $ Text.unlines
      ["unable to compile type: ", err, "type was: ", Text.decodeUtf8 src]