{- Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. -} {-# LANGUAGE CPP #-} module Glean.Database.Create ( kickOffDatabase, updateProperties, ) where import Control.Applicative import qualified Control.Concurrent.Async as Async import Control.Exception hiding(handle) import Control.Monad.Catch (handle) import Control.Monad.Extra import qualified Data.HashMap.Strict as HashMap import qualified Data.Map as Map import Data.Default import Data.Maybe import Data.Text (Text) import qualified Data.Text as Text import qualified Data.UUID as Guid ( toText ) import qualified Data.UUID.V4 as Guid ( nextRandom ) import Text.Printf #ifdef GLEAN_FACEBOOK import Facebook.Process #endif import Util.Log import Util.STM import Glean.Backend.Types (StackedDbOpts(..)) import Glean.BuildInfo import qualified Glean.Database.Catalog as Catalog import Glean.Database.Config import Glean.Database.Exception import Glean.Database.Meta import Glean.Database.Repo import qualified Glean.Database.Storage as Storage import Glean.Database.Open import Glean.Database.PredicateStats import Glean.Database.Types import Glean.Database.Schema ( toStoredSchema, compareSchemaPredicates, renderSchemaSource, toStoredVersions) import Glean.Database.Schema.ComputeIds import Glean.Database.Schema.Types import Glean.Internal.Types hiding (SchemaIndex) import Glean.RTS.Foreign.Lookup (firstFreeId) import Glean.RTS.Types (lowestFid, fromPid) import qualified Glean.ServerConfig.Types as ServerConfig import Glean.Types hiding (Database) import qualified Glean.Types as Thrift import Glean.Util.Observed as Observed -- | Kick off a specifc database, scheduling its tasks as necessary. kickOffDatabase :: Env -> Thrift.KickOff -> IO Thrift.KickOffResponse kickOffDatabase env@Env{..} kickOff@Thrift.KickOff{..} | envReadOnly = dbError kickOff_repo "can't create database in read only mode" | Just err <- validateDbName kickOff_repo = dbError kickOff_repo $ "Can't create database: " <> err | otherwise = do let schemaToUse = case HashMap.lookup "glean.schema_id" kickOff_properties of Just id -> Storage.UseSpecificSchema (SchemaId id) Nothing -> Storage.UseDefaultSchema (mode, kickOff_dependencies') <- case kickOff_dependencies of Nothing -> return (Storage.Create lowestFid Nothing schemaToUse , kickOff_dependencies) Just (Dependencies_stacked stacked) -> do let Thrift.Stacked{..} = stacked repo = Thrift.Repo stacked_name stacked_hash (mode, guid) <- stackedCreate env repo kickOff schemaToUse return ( mode, Just (Dependencies_stacked stacked{stacked_guid=stacked_guid <|> guid})) Just (Dependencies_pruned update) -> do (mode, guid) <- stackedCreate env (pruned_base update) kickOff schemaToUse return ( mode, Just (Dependencies_pruned update{pruned_guid=pruned_guid update <|> guid})) creationTime <- envGetCurrentTime serverProps <- serverProperties fbServerProps <- facebookServerProperties guidProps <- guidProperties let allProps = mconcat [ kickOff_properties , serverProps , fbServerProps , guidProps ] time = DBTimestamp { timestampCreated = creationTime , timestampRepoHash = posixEpochTimeToUTCTime <$> kickOff_repo_hash_time } version <- fromMaybe Storage.currentVersion . ServerConfig.config_db_create_version <$> Observed.get envServerConfig when (not $ Storage.canOpenVersion Storage.ReadWrite version) $ dbError kickOff_repo "can't create databases (unsupported binary version)" db <- atomically $ newDB kickOff_repo handle (\Catalog.EntryAlreadyExists{} -> return $ Thrift.KickOffResponse True) $ mask $ \unmask -> -- FIXME: There is a tiny race here where we might fail in a weird way -- if kick off a DB that is being deleted after it got removed from -- the Catalog but before it got removed from the storage. The entire -- concept of deleting DBs will change with the new metadata handling so -- it's not worth fixing at this point, especially since we aren't -- supposed to be kicking off DBs we've previously deleted. bracket_ (Catalog.create envCatalog kickOff_repo (newMeta version time (Incomplete def) allProps (lightDeps kickOff_dependencies')) $ do modifyTVar' envActive $ HashMap.insert kickOff_repo db writeTVar (dbState db) Opening acquireDB db) (atomically $ releaseDB envCatalog envActive db) $ do -- Open the new db in Create mode which will create the -- physical storage. This might fail - in that case, we -- mark the db as failed. NB. pass the full dependencies -- here, not lightDeps. opener <- asyncOpenDB env envStorage db version mode kickOff_dependencies' (do logInfo $ inRepo kickOff_repo "created") (\exc -> atomically $ void $ -- If opening the db fails for any reason, mark the db as -- failed. Catalog.modifyMeta envCatalog kickOff_repo $ \meta -> return meta { metaCompleteness = Broken DatabaseBroken { databaseBroken_task = "" , databaseBroken_reason = "couldn't create: " <> Text.pack (show exc) } }) OpenDB{..} <- unmask $ Async.wait opener addSchemaIdProperty envCatalog kickOff_repo (schemaId odbSchema) return $ Thrift.KickOffResponse False where addSchemaIdProperty :: Catalog.Catalog -> Repo -> SchemaId -> IO () addSchemaIdProperty catalog repo hash = void $ atomically $ Catalog.modifyMeta catalog repo $ \meta -> return meta { metaProperties = HashMap.insertWith (\_ old -> old) -- if one was provided already, keep it "glean.schema_id" (unSchemaId hash) (metaProperties meta) } guidProperties = do guid <- Guid.toText <$> Guid.nextRandom return $ HashMap.fromList [("glean.guid", guid)] -- The dependencies that we keep in the Meta have the units -- removed, because the units can be large and the Meta has a size -- limit. The units are stored separately in the DB; see -- Glean.Database.Data.storeUnits. lightDeps kickOff_deps = case kickOff_deps of Just (Thrift.Dependencies_pruned pruned) -> Just (Thrift.Dependencies_pruned pruned { pruned_units = [] }) _other -> _other -- | Returns Just an error, or Nothing if valid validateDbName :: Repo -> Maybe String validateDbName Repo {repo_hash} | Just c <- Text.find (`elem` ("\\/. " :: String)) repo_hash = Just $ printf "DB instance contains illegal character '%c'" c | otherwise = Nothing stackedCreate :: Env -> Repo -> KickOff -> Storage.CreateSchema -> IO (Storage.Mode, Maybe Text {- GUID -}) stackedCreate env@Env{..} base KickOff{..} schemaToUse = readDatabase env base $ \OpenDB{..} lookup -> do guid <- atomically $ do meta <- Catalog.readMeta envCatalog base case metaCompleteness meta of Complete{} -> return $ HashMap.lookup "glean.guid" $ metaProperties meta c -> throwSTM $ InvalidDependency kickOff_repo base $ "database is " <> showCompleteness c start <- firstFreeId lookup index <- Observed.get envSchemaSource let storedSchema = toStoredSchema odbSchema ownership <- readTVarIO odbOwnership if | Storage.UseDefaultSchema <- schemaToUse, not kickOff_update_schema_for_stacked -> do return ( Storage.Create start ownership (Storage.UseThisSchema storedSchema), guid ) | otherwise -> do stats <- predicateStats env base IncludeBase -- If update_schema_for_stacked is enabled or the client -- specified glean.schema_id, then we need to check that the -- specified schema agrees with the stored schema in the base -- DB about the definitions of predicates and types. We can do -- a fast check using the hashes, and throw an exception if -- there are any differences. let DbSchema{..} = odbSchema proc = case schemaToUse of Storage.UseSpecificSchema id | Just proc <- schemaForSchemaId index id -> proc _otherwise -> schemaIndexCurrent index hasFacts pred = case HashMap.lookup pred predicatesById of Just PredicateDetails{..} | Just stat <- Map.lookup (fromPid predicatePid) stats -> predicateStats_count stat > 0 _otherwise -> False HashedSchema{..} = procSchemaHashed proc errors = compareSchemaPredicates (filter hasFacts (HashMap.keys predicatesById)) (HashMap.keys hashedPreds) chooseSchema <- if null errors then return $ Storage.UseThisSchema (StoredSchema (renderSchemaSource (procSchemaSource proc)) (storedSchema_predicateIds storedSchema) -- Note: we *must* use the Pids from the base DB (toStoredVersions hashedSchemaAllVersion hashedSchemaId)) else throwIO $ Thrift.Exception $ "update_schema_for_stacked specified, but schemas are " <> "incompatible: " <> Text.intercalate ", " errors return (Storage.Create start ownership chooseSchema, guid) serverProperties :: IO DatabaseProperties serverProperties = return (HashMap.fromList rev) where rev | Text.null buildRevision = [] | otherwise = [ ("glean.server.build_revision", buildRevision) ] facebookServerProperties :: IO DatabaseProperties facebookServerProperties = do #if GLEAN_FACEBOOK twJob <- getTupperwareJob return $ HashMap.fromList (case twJob of Nothing -> [] Just job -> [ ("glean.server.tw_job", job) ]) #else return HashMap.empty #endif updateProperties :: Env -> Repo -> DatabaseProperties -> [Text] -> IO () updateProperties env repo set unset = do when (envReadOnly env) $ throwIO $ Thrift.Exception "updateProperties: server in read-only mode" atomically $ void $ Catalog.modifyMeta (envCatalog env) repo $ \meta -> case metaCompleteness meta of Incomplete{} -> return meta { metaProperties = HashMap.union set $ foldr HashMap.delete (metaProperties meta) unset } c -> throwSTM $ Thrift.Exception $ "updateProperties: database is " <> showCompleteness c