{-
  Copyright (c) Meta Platforms, Inc. and affiliates.
  All rights reserved.

  This source code is licensed under the BSD-style license found in the
  LICENSE file in the root directory of this source tree.
-}

{-

This file implements a set of benchmarks for FactSets which can use real-world
data. It copies all facts from a DB (selected as usual via `--db-root` and
`--db`) and then runs the benchmarks on FactSets initialised with these facts.

For best results, the DB shouldn't be too small but also shouldn't be huge
(since all facts will be stored in RAM multiple times).

For comparison, it can also run the same benchmarks on the DB it obtained the
facts from.

Run each benchmark once:
  factset-bench --db-root=... --db=... --schema=dir

Run only FactSet (but not DB) benchmarks:
  factset-bench ... --only-factset

Run specific benchmark 6 times:
  factset-bench ... --repeat=6 redefine

Run specific benchmark forever (useful for `perf` etc.):
  factset-bench ... --forever redefine

-}

{-# LANGUAGE ApplicativeDo #-}
module FactSetBench (main) where

import Control.Monad
import qualified Data.Vector.Storable as V
import Options.Applicative
import System.Exit (die)

import Glean (Repo, parseRepo)
import qualified Glean.Database.Config as Database (Config, options)
import Glean.Database.Env (withDatabases)
import Glean.Database.Open (readDatabase)
import Glean.Impl.ConfigProvider
import qualified Glean.RTS.Foreign.Benchmarking as B
import qualified Glean.RTS.Foreign.FactSet as FactSet
import qualified Glean.RTS.Foreign.Lookup as Lookup

import Glean.Util.ConfigProvider
import Util.EventBase (withEventBaseDataplane)
import Util.Timing

data Config = Config
  { configOptions :: Database.Config
  , configRepo :: Repo
  , configBenchmarks :: [[Benchmark]]
  , configRunner :: Runner
  , configOnlyFactSet :: Bool
  }

data Ctx = Ctx
  { ctxReport :: String -> IO ()
  , ctxFail :: IO ()
  , ctxDB :: Lookup.Lookup
  , ctxFactBlock :: B.FactBlock
  , ctxFactSet :: FactSet.FactSet
  , ctxRunner :: Runner
  }

type Runner = (String -> IO ()) -> IO Double -> IO ()

runOnce :: Runner
runOnce report action = do
  t <- action
  report $ showTime t

runRepeat :: Int -> Runner
runRepeat n report action = when (n /= 0) $ do
  ts <- replicateM n action
  let min_time = minimum ts
      max_time = maximum ts
      avg_time = sum ts / fromIntegral n
  report $ unwords
    [ "min:", showTime min_time
    , "avg:", showTime avg_time
    , "max:", showTime max_time
    ]

runForever :: Runner
runForever _ = forever

time :: (Ctx -> IO Bool) -> Ctx -> IO ()
time action ctx = ctxRunner ctx (ctxReport ctx) $ do
  (time, _, ok) <- timeIt $ action ctx
  when (not ok) $ ctxFail ctx
  return time

data Benchmark = Benchmark
  { bName :: String
  , bEnabled :: Config -> Bool
  , bRun :: Ctx -> IO ()
  }

bench :: String -> (Ctx -> IO ()) -> [Benchmark]
bench name f = [Benchmark name (const True) f]

benchBoth
  :: String
  -> (forall l. Lookup.CanLookup l => l -> Ctx -> IO ())
  -> [Benchmark]
benchBoth name f =
  [ Benchmark (name ++ "-db") (not . configOnlyFactSet) $ \ctx -> f (ctxDB ctx) ctx
  , Benchmark name (const True) $ \ctx -> f (ctxFactSet ctx) ctx ]

lookupB
  :: Lookup.CanLookup l => (l -> B.FactBlock -> IO Bool) -> l -> Ctx -> IO ()
lookupB f l = time $ f l <$> ctxFactBlock

benchmarks :: [Benchmark]
benchmarks = concat
  [ bench "define" $ \ctx@Ctx{..} -> do
      id <- Lookup.startingId ctxDB
      time (const $  do
        factset <- FactSet.new id
        B.defineEach factset ctxFactBlock) ctx

  , bench "redefine" $ time $ B.defineEach <$> ctxFactSet <*> ctxFactBlock

  , benchBoth "type" $ lookupB B.lookupEachType
  , benchBoth "byId" $ lookupB B.lookupEachById
  , benchBoth "byKey" $ lookupB B.lookupEachByKey
  , benchBoth "seekToEach" $ lookupB B.seekToEach
  -- The first seekToEach initialised things in the FactSet so lets' measure
  -- again
  , benchBoth "seekToEach2" $ lookupB B.seekToEach
  , benchBoth "seekAll" $ \l ctx@Ctx{..} -> do
      count <- FactSet.factCount ctxFactSet
      pids <- V.fromList . map fst <$> FactSet.predicateStats ctxFactSet
      time (const $ (count == ) <$> B.seekCount l pids) ctx
  ]

options :: ParserInfo Config
options = info (parser <**> helper)
  (fullDesc <> progDesc "Generate a random batch of facts")
  where
    parser = do
      configOptions <- Database.options
      configRepo <- option (maybeReader Glean.parseRepo)
        (  long "db"
        <> metavar "NAME/INSTANCE"
        <> help "identifies the database"
        )
      configBenchmarks <- many $ argument
        (maybeReader $ \s -> case filter (\b -> bName b == s) benchmarks of
          [] -> Nothing
          xs -> Just xs)
        ( metavar "BENCHMARK"
        <> help "selects benchmark"
        )
      configRunner <-
        fmap runRepeat (option auto
          (  long "repeat"
          <> metavar "N"
          <> help "repeat each benchmark N times" ))
        <|> flag' runForever (long "forever")
        <|> pure runOnce
      configOnlyFactSet <- switch
        ( long "only-factset"
        <> help "skip DB benchmarks"
        )
      return Config{..}

main :: IO ()
main =
  withConfigOptions options $ \(config@Config{..}, cfg) ->
  withEventBaseDataplane $ \evb ->
  withConfigProvider cfg $ \(cfgAPI :: ConfigAPI) ->
  withDatabases evb configOptions cfgAPI $ \env ->
  readDatabase env configRepo $ \_ db -> do
    block <- B.createFactBlock db
    startingId <- Lookup.startingId db
    factset <- FactSet.new startingId
    ok <- B.defineEach factset block
    when (not ok) $ die "FactSet creation failed"

    block_count <- B.factCount block
    block_memory <- B.factMemory block
    putStrLn $ unwords
      [ "FactBlock facts:", show block_count
      , "factmem:", showAllocs $ fromIntegral block_memory
      ]
    count <- FactSet.factCount factset
    mem <- FactSet.allocatedMemory factset
    factmem <- FactSet.factMemory factset
    putStrLn $ unwords
      [ "FactSet facts:", show count
      , "mem:", showAllocs $ fromIntegral mem
      , "factmem:", showAllocs $ fromIntegral factmem
      ]

    let bs = case configBenchmarks of
          [] -> benchmarks
          _ -> concat configBenchmarks

        width = maximum $ map (length . bName) bs

    forM_ bs $ \Benchmark{..} -> when (bEnabled config) $ bRun Ctx
      { ctxReport = \s ->
          putStrLn $ bName ++ replicate (width - length bName + 1) ' ' ++ s
      , ctxFail = die $ bName ++ " failed"
      , ctxDB = db
      , ctxFactBlock = block
      , ctxFactSet = factset
      , ctxRunner = configRunner
      }