{- Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved. This source code is licensed under the BSD-style license found in the LICENSE file in the root directory of this source tree. -} {- Convert Data.LSIF into glean/schema/lsif.angle-compatible data via JSON. Note: this module generates Angle but has no dependency on the lsif schema, to make developer iteration quicker. -} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} module Data.LSIF.Angle ( factToAngle, Predicate, PredicateMap, generateJSON, insertPredicateMap, emitFileFactSets, -- parse state Env(..), emptyEnv ) where import Control.Monad import Control.Monad.Extra ( concatMapM ) import Control.Monad.State.Strict import Data.Aeson import Data.Aeson.Types ( Pair ) import Data.Maybe ( catMaybes, fromMaybe, listToMaybe, mapMaybe ) import qualified Data.IntMap.Strict as IMap import Data.Text ( Text ) import qualified Data.Text as Text import qualified Data.Vector as V import qualified Data.Vector.Unboxed as U import Data.LSIF.Types import Data.LSIF.Moniker import Data.LSIF.Env import Data.LSIF.Gen import Data.LSIF.JSON ({- instances -}) -- Drop projectRoot prefix from URI to yield repo-relative paths for Glean -- Some indexers generate uris outside of the repo. In those cases we -- just try to remove local environment specific-stuff filterRoot :: Text -> Parse Text filterRoot path = do prefixes <- root <$> get -- try a few paths let clean = mapMaybe (\p -> Text.stripPrefix (p <> "/") path) prefixes pure (fromMaybe path (listToMaybe clean)) -- -- | Convert LSIF json to Glean json -- -- Process each key/fact pair -- -- We map over the statements, in order, generating 0 or more facts for Glean -- LSIF guarantees things are defined before references to them are used, and -- uses unique ids for facts. We can use that to simplify processing -- -- vertexes correspond to facts holding new data -- edges relate existnig vertex facts to each other (with new facts) -- factToAngle :: KeyFact -> Parse [Predicate] factToAngle (KeyFact _ MetaData{..}) = do appendRoot projectRoot predicate "lsif.Metadata" ([ "lsifVersion" .= version, "positionEncoding" .= positionEncoding ] ++ (case toolInfo of -- optional Nothing -> [] Just ToolInfo{..} -> ["toolInfo" .= object [ "toolName" .= toolName, "toolArgs" .= toolArgs, "version" .= version ]] )) factToAngle (KeyFact n Project{..}) = do insertType n ProjectType predicateId "lsif.Project" n [ "kind" .= fromEnum kind ] factToAngle (KeyFact _ PackageInformation{..}) = predicate "lsif.PackageInformation" [ "name" .= name, "manager" .= manager, "version" .= version ] -- LSIF Documents are uri/language pairs -- Rather than key on src.File, we use lsif.Document to keep the language of the -- symbol accessible, for mixed-language lsif dbs. -- factToAngle (KeyFact n Document{..}) = do insertType n FileType path <- filterRoot uri predicateId "lsif.Document" n [ "file" .= string path, -- n.b. anonymous src.File fact "language" .= fromEnum language ] -- Push document or project identifier onto open stack. This is list of -- documents or projects for which facts may still be emitted. -- we don't currently use these event markers for anything factToAngle (KeyFact _ Event{}) = pure [] -- Associate a range fact with a result set. -- We use this to track chains of ref -> resultset -> def for later generation -- note: inV targets are always resultSets, outVs may be ranges or resultSets factToAngle (KeyFact _ (Edge EdgeNext outV inV)) = [] <$ do addToResultSet outV (tagResultSet inV) -- record which resultset this hover text is a member of factToAngle (KeyFact _ (Edge EdgeTextDocumentHover outV inV)) = [] <$ addHoverToResultSet inV outV -- record which resultset this moniker points at factToAngle (KeyFact _ (Edge EdgeMoniker outV inV)) = [] <$ addMonikerToResultSet inV outV -- edge:item for property:references associates an inV range with a -- textDocument/references result. record that the range is a reference type -- n.b. the spec doesn't guarantee property:references will be set. factToAngle (KeyFact _ (Item _outV inVs _fileId (Just References))) = [] <$ insertTypes inVs ReferenceType -- items : these add ranges to definition results -- we check the result set of this definitionResult, then record -- that the result set points at this definition ranges / document pair -- -- other ranges may point at the same result set, but they will (implicitly) -- be reference ranges. -- factToAngle (KeyFact _ (Item outV inVs{-ranges-} fileId Nothing)) = [] <$ do -- if the item range inV points to a definitionResult, it must be a definition -- if the item range inV points to a referenceResult, it must be a reference mTy <- getTypeOf outV case mTy of Just DefinitionType -> insertTypes inVs DefinitionType Just ReferenceType -> insertTypes inVs ReferenceType Just DeclarationType -> insertTypes inVs DeclarationType _ -> pure () -- check if we already know the result set of this definitionResult -- this is generated by a textDocument/definition edge mResultSet <- getResultSetOf outV case mResultSet of Nothing -> -- don't know result set, so log the edge from -- definition to def/file addToDefinitionFile (tagResultSet outV) {- definitionResult -} (tagFile fileId) (tagDefinitions inVs) Just resultSetId -> addToDefinitionFile resultSetId (tagFile fileId) (tagDefinitions inVs) -- collect textDocument/definition and textDocument/hover edges to resultsets -- If we have seen the definitionResult inV already, then just add an entry -- from the result set. Otherwise, record the edge from definition to result factToAngle (KeyFact _ (Edge EdgeTextDocumentDefinition outV inV)) = [] <$ do mFileDefs <- getDefinitionFile (tagResultSet inV) case mFileDefs of Just fileDefs -> shareDefinitionFile (tagResultSet outV) fileDefs Nothing -> pure () -- always log that this definitionResult is member of resultset addToResultSet inV (tagResultSet outV) -- Range facts. These are range spans, 0-indexed, and may have optional -- tag/labels indicating what kind of span they are factToAngle (KeyFact n (SymbolRange range mtag)) = do -- record the type of the range if we have the tag handy -- it's convenient , but not required in the specification case mtag of Just Definition{} -> insertType n DefinitionType Just Declaration{} -> insertType n DeclarationType Just Reference{} -> insertType n ReferenceType _ -> pure () -- record the symbol kind if it exists (definitions only) case tagToKind =<< mtag of Just kind -> insertSymbolKind n kind Nothing -> pure () -- emit a range fact for this id predicateId "lsif.Range" n $ [ "range" .= toRange range , "text" .= maybe (string "" {- better to use nothing? -}) toName mtag ] ++ mFullRange where mFullRange = fromMaybe [] (tagToRange =<< mtag) -- Hover text factToAngle (KeyFact n (HoverResult contents)) = do facts <- V.forM contents $ \case HoverSignature language str -> predicateId "lsif.HoverContent" n [ "text" .= object [ "key" .= str ] -- bare lsif.HoverText , "language" .= fromEnum language ] HoverText str -> predicateId "lsif.HoverContent" n [ "text" .= object [ "key" .= str ] , "language" .= fromEnum UnknownLanguage ] return $ concat (V.toList facts) -- Moniker payloads factToAngle (KeyFact n (Moniker kind scheme ident)) = case processMoniker kind scheme ident of Nothing -> predicateId "lsif.Moniker" n [ "kind" .= fromEnum kind , "scheme" .= string scheme , "ident" .= string ident ] Just (ident', kindLiteral) -> do a <- predicateId "lsif.Moniker" n [ "kind" .= fromEnum kind , "scheme" .= string scheme , "ident" .= string ident' ] b <- predicate "lsif.MonikerSymbolKind" [ "moniker" .= n , "kind" .= fromEnum kindLiteral ] pure (a <> b) -- These are output nodes. We generally need to track the type of the id, -- as untyped item edges will piont at these, and it may be the only way to -- find the type of the underlying range factToAngle (KeyFact n DefinitionResult) = [] <$ insertType n DefinitionType factToAngle (KeyFact n DeclarationResult) = [] <$ insertType n DeclarationType factToAngle (KeyFact n ReferenceResult) = [] <$ insertType n ReferenceType factToAngle (KeyFact _ ResultSet) = pure [] -- record the range ids that are contained in a file id -- or from file id to project id. factToAngle (KeyFact _ (Contains fileId inVs)) = [] <$ addFileContainsIds (tagFile fileId) inVs factToAngle _ = pure [] -- -- After consuming the LSIF graph, we should have the full set of -- {def,decl,ref} -> resutlset -> definitionResult data -- emitFileFactSets :: Parse [Predicate] emitFileFactSets = do fileSet <- IMap.toList . fileContains <$> get ps <- mapM (\(k,v) -> emitFileFacts (tagFile (Id $ fromIntegral k)) v) fileSet pure (concat ps) data IdSet = IdSet { defIds :: !IdVector, declIds :: !IdVector, refIds :: !IdVector, fileIds :: !IdVector } -- Lookup the type of each id as we recorded it in the env partitionByType :: IdVector -> Parse IdSet partitionByType ids = do -- get types of all identifiers tys <- V.generateM (U.length ids) (\i -> getTypeOf (Id (ids `U.unsafeIndex` i))) -- partition by type, we don't care about ordering let defIds = U.ifilter (\i _ -> tys `V.unsafeIndex` i == Just DefinitionType) ids declIds = U.ifilter (\i _ -> tys `V.unsafeIndex` i == Just DeclarationType) ids refIds = U.ifilter (\i _ -> tys `V.unsafeIndex` i == Just ReferenceType) ids fileIds = U.ifilter (\i _ -> tys `V.unsafeIndex` i == Just FileType) ids return $ IdSet defIds declIds refIds fileIds emitFileFacts :: Id_ FileTy -> [IdVector] -> Parse [Predicate] emitFileFacts fileId = concatMapM (emitFileFacts_ fileId) -- We delay this until the post-processing phase to ensure all -- - item, textDocument/definition, etc.. nodes are added emitFileFacts_ :: Id_ FileTy -> IdVector -> Parse [Predicate] emitFileFacts_ fileId rawIds = do IdSet{..} <- partitionByType rawIds let defFacts = emitDefinitions fileId defIds declFacts = emitDeclarations fileId declIds projectFacts = emitProjects fileId fileIds {- from projectId to files -} xrefFacts <- emitReferences fileId refIds useFacts <- emitTargetUses fileId refIds hoverFacts <- emitHovers fileId defIds monikerFacts <- emitMonikers fileId defIds symbolKindFacts <- emitSymbolKinds fileId defIds return $ catMaybes [defFacts, declFacts, xrefFacts, useFacts, hoverFacts, projectFacts, monikerFacts, symbolKindFacts] emitProjects :: Id -> IdVector -> Maybe Predicate emitProjects projId ids | U.null ids = Nothing | otherwise = Just $ Predicate "lsif.ProjectDocument" (map (\rangeId -> (object . pure . key) [ "file" .= rangeId , "project" .= projId ] ) (U.toList ids)) emitSymbolKinds :: Id -> IdVector -> Parse (Maybe Predicate) emitSymbolKinds fileId ids = do factBodies <- catMaybes <$> mapM (generateSymbolKindFacts fileId . Id) (U.toList ids) if null factBodies then pure Nothing else return $ Just $ Predicate "lsif.DefinitionKind" factBodies emitMonikers :: Id -> IdVector -> Parse (Maybe Predicate) emitMonikers fileId ids = do factBodies <- catMaybes <$> mapM (generateMonikerFacts fileId . Id) (U.toList ids) if null factBodies then pure Nothing else return $ Just $ Predicate "lsif.DefinitionMoniker" factBodies emitHovers :: Id -> IdVector -> Parse (Maybe Predicate) emitHovers fileId ids = do hoverBodies <- catMaybes <$> mapM (generateHoverFacts fileId . Id) (U.toList ids) if null hoverBodies then pure Nothing else return $ Just $ Predicate "lsif.DefinitionHover" hoverBodies emitReferences :: Id -> IdVector -> Parse (Maybe Predicate) emitReferences fileId ids = do xrefBodies <- concat <$> mapM (generateFileReferences fileId . Id) (U.toList ids) if null xrefBodies then pure Nothing else return $ Just $ Predicate "lsif.Reference" $ map (object . pure . key) xrefBodies emitTargetUses :: Id -> IdVector -> Parse (Maybe Predicate) emitTargetUses fileId ids = do useBodies <- concat <$> mapM (generateTargetUses fileId . Id) (U.toList ids) if null useBodies then pure Nothing else return $ Just $ Predicate "lsif.DefinitionUse" $ map (object . pure . key) useBodies -- For each refId, look up the result set, find the result sets file and defs -- and generate a single flat file -> ref -> targetDef fat generateFileReferences :: Id -> Id -> Parse [[Pair]] generateFileReferences fileId refRangeId = withResultSet refRangeId getDefinitionFile $ \case FileDefs targetFileId targetRanges -> pure [ [ "file" .= fileId , "range" .= refRangeId , "target" .= (object . pure . key) -- inner key to lsif.Definition [ "file" .= targetFileId , "range" .= targetRange ] ] | targetRange <- U.toList targetRanges ] -- inverse of file references, emit them here since they're handy -- maybe later we want to use textDocument/reference edges generateTargetUses :: Id -> Id -> Parse [[Pair]] generateTargetUses fileId refRangeId = withResultSet refRangeId getDefinitionFile $ \case FileDefs targetFileId targetRanges -> pure [ [ "target" .= (object . pure . key) -- inner key to lsif.Definition [ "file" .= targetFileId , "range" .= targetRange ] , "file" .= fileId , "range" .= refRangeId ] | targetRange <- U.toList targetRanges ] emitDefinitions :: Id_ FileTy -> IdVector -> Maybe Predicate emitDefinitions = emitDeclDefs "lsif.Definition" emitDeclarations :: Id_ FileTy -> IdVector -> Maybe Predicate emitDeclarations = emitDeclDefs "lsif.Declaration" emitDeclDefs :: Text -> Id_ FileTy -> IdVector -> Maybe Predicate emitDeclDefs name fileId ids | U.null ids = Nothing | otherwise = Just $ Predicate name (map (\rangeId -> (object . pure . key) [ "file" .= fileId , "range" .= rangeId ] ) (U.toList ids)) generateHoverFacts :: Id -> Id -> Parse (Maybe Value) generateHoverFacts fileId defRangeId = withResultSet defRangeId getHoverTextId $ \hoverFactId -> pure $ pure $ object $ pure $ key [ "defn" .= (object . pure . key) [ "file" .= fileId , "range" .= defRangeId ] , "hover" .= hoverFactId ] -- Monikers are the symbol ids of LSIF. But they are optional. -- We want to generate 'nothing' for definitions that are missing -- monikers, so they will still be useful in entity lookups as keys generateMonikerFacts :: Id -> Id -> Parse (Maybe Value) generateMonikerFacts fileId defRangeId = do mId <- withResultSet defRangeId getMonikerId (pure . Just) pure $ pure $ object $ pure $ key $ ( "defn" .= (object . pure . key) [ "file" .= fileId , "range" .= defRangeId ] ) : (case mId of Nothing -> [] Just monikerId -> [ "moniker" .= monikerId ] ) generateSymbolKindFacts :: Id -> Id -> Parse (Maybe Value) generateSymbolKindFacts fileId defRangeId = do mKind <- getSymbolKind defRangeId case mKind of Nothing -> pure mzero Just kindLiteral -> pure $ pure $ object $ pure $ key [ "defn" .= (object . pure . key) [ "file" .= fileId , "range" .= defRangeId ] , "kind" .= fromEnum kindLiteral ] -- get the result set of an id, use that result set id to look up another -- environment, then apply a function to the result. -- used to jump from A to B via a [resultset] node withResultSet :: (MonadPlus m) => Id -> (Id_ ResultSetTy -> Parse (Maybe t)) -> (t -> Parse (m a)) -> Parse (m a) withResultSet id f g = do mResultSet <- getResultSetOf id case mResultSet of Nothing -> pure mzero Just resultSetId -> do mv <- f resultSetId case mv of Nothing -> pure mzero Just a -> g a -- -- JSON-generating utilities -- -- | Identifier text string toName :: Tag -> Value toName = string . tagText #if MIN_VERSION_aeson(2,2,0) tagToRange :: KeyValue e a => Tag -> Maybe [a] #else tagToRange :: KeyValue a => Tag -> Maybe [a] #endif tagToRange Definition{..} = Just ["fullRange" .= toRange fullRange] tagToRange Declaration{..} = Just ["fullRange" .= toRange fullRange] tagToRange _ = Nothing tagToKind :: Tag -> Maybe SymbolKind tagToKind Definition{..} = Just tagKind tagToKind Declaration{..} = Just tagKind tagToKind _ = Nothing