{- Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. -} {- | Generate thrift/Haskell/C++/Python headers from a Glean schema This script is run automatically via Buck custom_rule()s, see glean/schema/TARGETS. buck run @mode/opt //glean/schema/gen:gen-schema -- --help Usage: gen-schema ([--cpp ARG] | [--thrift ARG] | [--hs ARG] | [--py ARG]) (-i|--input FILE) [-d|--install_dir DIR] Available options: -i,--input FILE materialized JSON input file -h,--help Show this help text -} {-# LANGUAGE ApplicativeDo #-} module Glean.Schema.Gen.Main ( main ) where import Control.Exception import Control.Monad import Control.Monad.Except import Data.Bifoldable (bifoldMap) import qualified Data.ByteString.Char8 as BC import Data.Default import qualified Data.HashMap.Strict as HashMap import qualified Data.IntMap.Strict as IntMap import Data.Graph import Data.List import qualified Data.Map.Strict as Map import Data.Map.Strict (Map) import Data.Maybe import qualified Data.Set as Set import Data.Text (Text, splitOn) import qualified Data.Text as Text import qualified Data.Text.IO as Text import qualified Data.Tree as Tree import Options.Applicative import System.Directory import System.FilePath import System.IO import qualified Text.Fuzzy as Fuzzy import Text.Printf import Thrift.Util import Util.IO import Util.Timing import Util.OptParse (commandParser) import Glean.Query.Typecheck (tcQueryDeps) import Glean.Query.Codegen.Types (QueryWithInfo(..)) import Glean.Angle.Types import Glean.Angle.Parser import Glean.Database.Config hiding (options) import Glean.Database.Schema import Glean.Database.Schema.ComputeIds import Glean.Database.Schema.Types import Glean.Display import qualified Glean.Internal.Types as Internal import Glean.RTS.Types (PidRef(..), ExpandedType(..)) import Glean.Schema.Resolve (resolveSchemaRefs) import Glean.Schema.Util (showRef) import Glean.Schema.Gen.Thrift import Glean.Schema.Gen.Cpp ( genSchemaCpp ) import Glean.Schema.Gen.HackJson ( genSchemaHackJson ) import Glean.Schema.Gen.Haskell ( genSchemaHS ) import Glean.Schema.Gen.Python ( genSchemaPy ) import Glean.Schema.Gen.OCaml ( genSchemaOCaml ) import Glean.Schema.Gen.Rust ( genSchemaRust ) import Glean.Schema.Gen.Utils ( NameSpaces, Oncall ) import Glean.Schema.Types import Glean.Types (SchemaId(..)) data Options = Options { input :: Either FilePath FilePath , omitArchive :: Bool , actOptions :: Either GraphOptions GenOptions } data GraphOptions = GraphOptions { target :: Maybe Text , maxDepth :: Maybe Int , reverseDeps :: Bool , topologicalSort :: Bool , graphType :: GraphType , ignoreDerivations :: Bool } data GraphType = SchemaGraph | PredicateGraph data GenOptions = GenOptions { thrift :: Maybe FilePath , cpp :: Maybe FilePath , hackjson :: Maybe FilePath , hs :: Maybe FilePath , py :: Maybe FilePath , ocaml :: Maybe FilePath , rust :: Maybe FilePath , source :: Maybe FilePath , updateIndex :: Maybe FilePath , install_dir :: FilePath , restrictSchemas :: Maybe [NameSpaces] , oncall :: Maybe Oncall } toNameSpaces :: [Text] -> [NameSpaces] toNameSpaces names = splitOn "." <$> names optSchemas :: Parser (Maybe [NameSpaces]) optSchemas = optional ( toNameSpaces . splitOn "," <$> strOption ( long "schemas" <> metavar "SCHEMAS" <> help "Restrict schemas to generate" )) options :: Parser Options options = do omitArchive <- switch $ long "omit-archive" <> help ("ignore .angle files under archive/ directories. " <> "Non-archived schemas should not depend on archived schemas; this " <> "flag is for testing that that property holds.") let dir = strOption (long "dir" <> metavar "DIR") oneFile = strOption (long "input" <> metavar "FILE") input <- (Left <$> oneFile) <|> (Right <$> dir) actOptions <- fmap Left graphOptions <|> fmap Right genOptions return Options{..} where graphOptions = do commandParser "graph" (progDesc "View the dependency graph of a local schema") $ do reverseDeps <- switch ( long "reverse-deps" <> help "view reverse dependencies instead" ) topologicalSort <- switch ( long "topsort" <> help "print a topologically sorted list of dependencies" ) target <- optional $ strOption ( long "root" <> metavar "NODE" <> help "reference schema/predicate" ) maxDepth <- optional $ option auto ( long "depth" <> metavar "INT" <> help "restrict how many levels deep the tree should go" ) graphType <- flag SchemaGraph PredicateGraph ( long "predicates" <> help "show a graph of predicates rather than of schemas" ) ignoreDerivations <- switch ( long "ignore-derivations" <> help "in a predicate graph ignore dependencies \ \from derivation queries." ) return GraphOptions{..} genOptions = do thrift <- optional $ strOption $ long "thrift" <> metavar "FILE" cpp <- optional $ strOption $ long "cpp" <> metavar "FILE" hackjson <- optional $ strOption $ long "hackjson" <> metavar "FILE" hs <- optional $ strOption $ long "hs" <> metavar "FILE" py <- optional $ strOption $ long "py" <> metavar "FILE" ocaml <- optional $ strOption $ long "ocaml" <> metavar "FILE" rust <- optional $ strOption $ long "rust" <> metavar "FILE" source <- optional $ strOption $ long "source" <> metavar "FILE" updateIndex <- optional $ strOption $ long "update-index" <> metavar "FILE" <> help ("Add the schema to the index at FILE, making it the new " <> "current schema") install_dir <- strOption $ long "install_dir" <> short 'd' <> metavar "DIR" <> value "" oncall <- optional $ strOption ( long "oncall" <> value "" <> metavar "NAME" <> help "Adds oncall annotation with oncall NAME to generated files " ) restrictSchemas <- optSchemas return GenOptions{..} main :: IO () main = do Options{..} <- execParser (info (options <**> helper) fullDesc) (src, schema, dbschema) <- reportTime "parse/resolve/typecheck" $ do str <- case input of Left one -> BC.readFile one Right dir -> do files <- listDirectoryRecursive dir catSchemaFiles $ if omitArchive then filter (not . ("/archive/" `isInfixOf`)) files else files schema <- case processSchema Nothing str of Left err -> throwIO $ ErrorCall err Right schema -> return schema -- Just check that parseSchemaCached works, because the server -- will be using it. case processSchemaCached Nothing HashMap.empty str of Left err -> throwIO $ ErrorCall $ err Right{} -> return () -- for typechecking dbschema <- newDbSchema Nothing (SchemaIndex schema []) LatestSchema readWriteContent def return (str, schema, dbschema) let ProcessedSchema sourceSchemas resolved _ = schema reportTime "checking schema roundtrip" $ do let pp = show (displayDefault sourceSchemas) case parseSchema (BC.pack pp) of Left err -> throwIO $ ErrorCall $ "schema roundtrip error: " <> err ++ "\n" ++ pp Right schemasRoundTrip -> when (rmLocSchemas sourceSchemas /= rmLocSchemas schemasRoundTrip) $ throwIO $ ErrorCall "schema did not roundtrip successfully" refsResolved <- either (die 1 . Text.unpack) return $ runExcept $ resolveSchemaRefs sourceSchemas let -- We have to ensure the types and predicates exported by each -- "all" schema is transitively closed, otherwise either the code -- generator will fail, or the generated code will fail to compile. allSchemas = [ schema { resolvedSchemaReExportedPredicates = preds, resolvedSchemaReExportedTypes = types } | schema <- schemasResolved resolved , resolvedSchemaName schema == "all" , let deps :: [ResolvedSchemaRef] deps = [ s | Just v <- [toVertex (schemaSrcRef (schemaRef schema))] , (_, n, _) <- reachableFrom v , Just s <- [HashMap.lookup n schemaMap] ] types = HashMap.unions (map resolvedSchemaTypes deps) preds = HashMap.unions (map resolvedSchemaPredicates deps) ] schemaSrcRef (SchemaRef n v) = SourceRef n (Just v) (depGraph, fromVertex, toVertex) = graphFromEdges edges reachableFrom v = map fromVertex $ concatMap Tree.flatten (dfs depGraph [v]) edges = [ (schema, schemaName schema, schemaDependencies schema) | schema <- srcSchemas refsResolved ] schemaMap = HashMap.fromList [ (schemaSrcRef (schemaRef s), s) | s <- schemasResolved resolved ] findVersion v = listToMaybe [ s | s@ResolvedSchema{..} <- allSchemas , resolvedSchemaVersion == v ] versions <- case schemasHighestVersion resolved of Just ver | Just schema <- findVersion ver, ver == schemaAllVersion dbschema -> return [(ver, schemaId dbschema, schema, Nothing)] _otherwise -> fail "missing 'all' schema" case actOptions of Left opts -> graph opts dbschema refsResolved [ v | (v,_,_,_) <- versions ] Right opts -> do forM_ (source opts) $ \f -> BC.writeFile f src forM_ (updateIndex opts) (doUpdateIndex src schema) reportTime "gen" $ gen opts versions graph :: GraphOptions -> DbSchema -> SourceSchemas -> [Version] -> IO () graph opts dbschema sourceSchemas versions = Text.putStrLn $ drawGraph opts graph roots where predicatesGraph = predicateGraph (ignoreDerivations opts) dbschema (roots, graph) = case graphType opts of PredicateGraph -> case target opts of Nothing -> (Map.keys predicatesGraph, predicatesGraph) Just t -> case Map.lookup t predicatesGraph of Nothing | cand:_ <- Fuzzy.simpleFilter t (Map.keys predicatesGraph) -> errorWithoutStackTrace $ Text.unpack $ "did you mean " <> cand <> " ?" Nothing -> errorWithoutStackTrace $ Text.unpack $ "predicate not found in graph: " <> t Just _ -> (pure t, predicatesGraph) SchemaGraph -> let everyAllSchema = map (\v -> "all." <> Text.pack (show v)) versions roots = maybe everyAllSchema pure (target opts) in (roots, schemaGraph sourceSchemas) schemaGraph :: SourceSchemas -> Map Text [Text] schemaGraph sourceSchemas = Map.fromList [ (showRef (schemaName s), map showRef (schemaDependencies s)) | s <- srcSchemas sourceSchemas ] schemaDependencies :: SourceSchema -> [SourceRef] schemaDependencies SourceSchema{..} = schemaInherits ++ [ name | SourceImport name _ <- schemaDecls ] predicateGraph :: Bool -> DbSchema -> Map Text [Text] predicateGraph ignoreDerivations dbschema = Map.fromList [ (ref, deps) | details <- IntMap.elems (predicatesByPid dbschema) , let ref = showRef (predicateRef details) deps = showRef <$> Set.toList (dependencies details) ] where dependencies details = asRefs $ keyValueDeps <> derivationDeps where keyValueDeps = typeDeps (predicateKeyType details) <> typeDeps (predicateValueType details) derivationDeps | not ignoreDerivations , Derive _ (QueryWithInfo query _ _ _) <- predicateDeriving details = tcQueryDeps query | otherwise = mempty typeDeps = bifoldMap overPidRef overExpanded where overExpanded (ExpandedType _ ty) = typeDeps ty overPidRef (PidRef _ ref) = Set.singleton ref asRefs = Set.fromList . fmap predicateIdRef . Set.toList drawGraph :: GraphOptions -> Map Text [Text] -> [Text] -> Text drawGraph GraphOptions{..} deps' roots = Text.unlines $ if topologicalSort then topsort forest else drawTree <$> forest where forest = map (mkTree deps mempty 1) roots deps | reverseDeps = Map.fromListWith (<>) $ concatMap (\(s,ss) -> (,[s]) <$> ss) $ Map.toList deps' | otherwise = deps' mkTree graph seen depth root = Tree.Node root subtree where subtree = map (mkTree graph (Set.insert root seen) (depth + 1)) children continue = notRecursing && canGoDeeper where notRecursing = not (root `Set.member` seen) canGoDeeper = maybe True (depth <=) maxDepth children | continue = Map.findWithDefault [] root graph | otherwise = [] drawTree :: Tree Text -> Text drawTree (Node root children) = Text.unlines $ root : draw children where draw [] = [] draw (x:xs) = drawNode (null xs) x <> draw xs drawNode isLast (Node name children) = (pre <> name) : map (spacer <>) (draw children) where (spacer, pre) = if isLast then (" ", "└── ") else ("│   ", "├── ") topsort :: Ord a => [Tree a] -> [a] topsort forest = reverse postorder where postorder = go mempty forest (const []) where go seen [] cont = cont seen go seen (Node node children : rest) cont | node `Set.member` seen = go seen rest cont | otherwise = go (Set.insert node seen) children $ \seen' -> node : go seen' rest cont gen :: GenOptions -> [(Version, SchemaId, ResolvedSchemaRef, Maybe FilePath)] -> IO () gen GenOptions{..} versions = mapM_ genFor versions where genFor :: (Version, SchemaId, ResolvedSchemaRef, Maybe FilePath) -> IO () genFor (_, hash, ResolvedSchema{..}, dir) = do let ts = HashMap.elems resolvedSchemaReExportedTypes ps = HashMap.elems resolvedSchemaReExportedPredicates doGen _ Nothing = return () doGen gen (Just output) = do let odir = install_dir fromMaybe "" dir output forM_ (gen resolvedSchemaVersion ps ts oncall) $ \(file,text) -> do let path = odir file createDirectoryIfMissing True (takeDirectory path) Text.writeFile path (text <> "\n") doGen (genSchemaCpp hash) cpp doGen genSchemaHackJson hackjson doGen genSchemaHS hs doGen genSchemaPy py doGen (genSchemaOCaml restrictSchemas) ocaml doGen (genSchemaRust hash) rust doGen (genSchemaThrift dir hash) thrift -- ----------------------------------------------------------------------------- -- Working with the SchemaIndex -- | Add a new schema to an existing SchemaIndex. doUpdateIndex :: BC.ByteString -> ProcessedSchema -> FilePath -> IO () doUpdateIndex src new@(ProcessedSchema _ _ hashed) indexFile = do hPutStrLn stderr $ "Updating index in " <> indexFile -- check that the new schema instance is compatible with the -- existing ones exists <- doesFileExist indexFile when exists $ do current <- parseSchemaIndex indexFile validateNewSchemaInstance $ current { schemaIndexCurrent = new, schemaIndexOlder = schemaIndexCurrent current : schemaIndexOlder current } let -- store the new schema in the file "instance/" relative -- to the index file, where is the SchemaId of the -- latest "all" schema file = "instance/" <> unSchemaId (hashedSchemaId hashed) versions = Map.fromList [( unSchemaId (hashedSchemaId hashed), fromIntegral (hashedSchemaAllVersion hashed) )] baseIndex = def { Internal.schemaIndex_current = def { Internal.schemaInstance_versions = versions, Internal.schemaInstance_file = file } } -- If this exact schema is present already (perhaps as an older -- instance) then promote it to be the current schema. If the new -- schema is identical to the current one, this will leave -- everything as it was. newIndex <- do exists <- doesFileExist indexFile if exists then do oldIndex <- loadJSON indexFile let existingInstances = filter (((/=) versions) . Internal.schemaInstance_versions) $ Internal.schemaIndex_current oldIndex : Internal.schemaIndex_older oldIndex hPutStrLn stderr $ printf "Old index contains %d instance(s)" (1 + length (Internal.schemaIndex_older oldIndex)) return baseIndex { Internal.schemaIndex_older = existingInstances } else return baseIndex hPutStrLn stderr $ printf "New index contains %d instance(s)" (1 + length (Internal.schemaIndex_older newIndex)) saveJSON indexFile newIndex let fileToWrite = takeDirectory indexFile Text.unpack file createDirectoryIfMissing True (takeDirectory fileToWrite) BC.writeFile fileToWrite src