module HsBindgen.Frontend.ProcessIncludes ( GetMainHeadersAndInclude , processIncludes , GetMainHeaders , toGetMainHeaders -- * Auxiliary , getIncludeTo ) where import Data.DynGraph.Labelled qualified as DynGraph import Data.List qualified as List import Data.List.Compat (unsnoc) import Data.List.NonEmpty qualified as NonEmpty import Data.Map.Strict qualified as Map import Data.Text qualified as Text import Clang.Enum.Simple import Clang.HighLevel qualified as HighLevel import Clang.HighLevel.Types import Clang.LowLevel.Core import Clang.Paths import HsBindgen.Errors import HsBindgen.Frontend.Analysis.IncludeGraph (Include, IncludeGraph) import HsBindgen.Frontend.Analysis.IncludeGraph qualified as IncludeGraph import HsBindgen.Frontend.Predicate import HsBindgen.Frontend.RootHeader (HashIncludeArg) import HsBindgen.Frontend.RootHeader qualified as RootHeader import HsBindgen.Imports {------------------------------------------------------------------------------- Process includes. == Context When the user invokes @hs-bindgen@, they provide us with one or more headers to process; we refer to these as the "main headers". In order to process these, we construct a new "root header" (see "HsBindgen.Frontend.RootHeader"), which has one @#include@ per user-specified main header. For example, the root header might look like > #include > #include These paths must be interpreted with respect to the @C_INCLUDE_PATH@, the @-I@ command line options, etc.; we use 'HashIncludeArg' for this concept. == Selecting declarations When we see a declaration in the @clang@ AST, we might need to check if this declaration is from one of these main headers (as opposed to a header /included by/ one of the main headers). Unfortunately, @clang@ does not give us a 'HashIncludeArg' for the declaration, but rather a 'SourcePath'. The exact nature of this 'SourcePath' is a @clang@ internal detail, but it might for example be @/the/full/path/to/b.h@. In this example, we then /know/ that the set of main headers is @@ and @@, and we need to check if the 'SourcePath' @/the/full/path/to/b.h@ happens to correspond to one of these main headers. Simply comparing the @basename@ is insufficient: it's entirely possible that for example both @b.h@ and @internal/b.h@ exist in the library (or indeed, this particular @b.h@ might be from a different library altogether). Therefore we need a /mapping/ from 'HashIncludeArg' to 'SourcePath', at least for the includes in the root header. The only reliable way that we found to get this mapping is by looking at how @clang@ resolves these headers as it parses the root header (there is an API specifically for resolving header paths, but it does subtly different things than the @clang@ parser does). Unfortunately, this means that we need to process all includes /prior/ to processing the rest of the AST, because we are not guaranteed to see the include of @b.h@ from the root header prior to processing it: this will /usually/ be the case, but not if @@ /itself/ also includes @@. == Setting the current main header When we see a function declaration, we must associate that function declaration with one of the main headers (so that we can generate the correct @#include@ when producing code for that function). It's not entirely obvious if we should use a 'HashIncludeArg' or a 'SourcePath' for this purpose; we currently choose the former, so that we can generate the somewhat cleaner lookling > foreign import capi "b.h foo" .. rather than > foreign import capi "/full/path/to/b.h foo" .. Arguably, the lattter is more correct, but the former is perhaps a little more portable (although @hs-bindgen@ is in general not intended to produce portable code anyway). When we see the @#include@ in the root header, @clang@ again only gives us a 'SourcePath' for the file-to-be-included. We ignore this, and instead use its /location/ as an index into the root header. (Note that we cannot really build a map from 'SourcePath' to 'HashIncludeArg': multiple 'HashIncludeArg's in the root header could in principle resolve to the /same/ 'SourcePath.') -------------------------------------------------------------------------------} -- | Function to get the main headers that (transitively) include a source path, -- as well as the @#include@ argument used to include the source path type GetMainHeadersAndInclude = SourcePath -> Either String (NonEmpty HashIncludeArg, HashIncludeArg) -- | Process includes -- -- We do this as separate pass over the clang AST; this should be relatively -- cheap, as we can reuse the same 'CXTranslationUnit'. processIncludes :: CXTranslationUnit -> IO (IncludeGraph, IsMainHeader, IsInMainHeaderDir, GetMainHeadersAndInclude) processIncludes unit = do root <- clang_getTranslationUnitCursor unit includes <- HighLevel.clang_visitChildren root $ simpleFold $ \curr -> do mKind <- fromSimpleEnum <$> clang_getCursorKind curr case mKind of Right CXCursor_InclusionDirective -> do include <- processInclude unit curr foldContinueWith include _otherwise -> foldContinue let includeGraph :: IncludeGraph includeGraph = IncludeGraph.fromList $ map (\incDir -> (incDir.from, incDir.include, incDir.to)) includes mainPathPairs :: [(SourcePath, HashIncludeArg)] mainPathPairs = [ (incDir.to, IncludeGraph.includeArg incDir.include) | incDir <- includes , incDir.inRoot ] mainPathMap :: Map SourcePath HashIncludeArg mainPathMap = Map.fromList mainPathPairs mainPaths :: Set SourcePath mainPaths = Map.keysSet mainPathMap isMainHeader :: IsMainHeader isMainHeader = mkIsMainHeader mainPaths isInMainHeaderDir :: IsInMainHeaderDir isInMainHeaderDir = mkIsInMainHeaderDir mainPaths getMainHeadersAndInclude :: GetMainHeadersAndInclude getMainHeadersAndInclude path = let error' msg = Left $ "getMainHeadersAndInclude failed for " ++ show path ++ ": " ++ msg in case IncludeGraph.getIncludes includeGraph path of DynGraph.FindEdgesFound startIncludes termIncludes -> Right $ ( IncludeGraph.includeArg <$> termIncludes , IncludeGraph.includeArg (NonEmpty.head startIncludes) ) DynGraph.FindEdgesNone -> error' "none" DynGraph.FindEdgesInvalid -> error' "invalid" return ( includeGraph , isMainHeader , isInMainHeaderDir , getMainHeadersAndInclude ) -- | Function to get the main headers that (transitively) include a source path type GetMainHeaders = SourcePath -> Either String (NonEmpty HashIncludeArg) toGetMainHeaders :: GetMainHeadersAndInclude -> GetMainHeaders toGetMainHeaders f = fmap fst . f {------------------------------------------------------------------------------- Process inclusion directives -------------------------------------------------------------------------------} -- | Include directive -- -- Suppose we have file @a.h@ containing -- -- > #include "b.h" -- -- Then -- -- * 'from' will be @/full/path/to/a.h@ -- * 'include' will be @#include "b.h"@ (exact path as in source) -- * 'to' will be @/full/path/to/b.h@ -- * 'inRoot' will be 'True' if the include is in the root header -- -- The full 'SourcePath's are constructed by @libclang@, and depend on factors -- such as @-I@ command line arguments, environment variables such as -- @C_INCLUDE_PATH@, etc. data IncDir = IncDir { from :: SourcePath , include :: Include , to :: SourcePath , inRoot :: Bool } processInclude :: CXTranslationUnit -> CXCursor -> IO IncDir processInclude unit curr = do incDirFrom <- getIncludeFrom curr incDirInclude <- getInclude unit curr incDirTo <- getIncludeTo curr incDirInRoot <- clang_Location_isFromMainFile =<< clang_getCursorLocation curr return IncDir{ from = incDirFrom , include = incDirInclude , to = incDirTo , inRoot = incDirInRoot } {------------------------------------------------------------------------------- Internal auxiliary -------------------------------------------------------------------------------} getIncludeFrom :: MonadIO m => CXCursor -> m SourcePath getIncludeFrom curr = singleLocPath <$> HighLevel.clang_getCursorLocation' curr getIncludeTo :: MonadIO m => CXCursor -> m SourcePath getIncludeTo curr = do file <- clang_getIncludedFile curr SourcePath <$> clang_getFileName file getInclude :: CXTranslationUnit -> CXCursor -> IO Include getInclude unit curr = do tokens <- HighLevel.clang_tokenize unit . fmap multiLocExpansion =<< HighLevel.clang_getCursorExtent curr let err = "unable to parse #include: " ++ show tokens maybe (panicIO err) return $ parseInclude tokens parseInclude :: [Token TokenSpelling] -> Maybe Include parseInclude = \case t0 : t1 : ts2 -> do guard $ isPunctuation t0 && t0 `hasSpelling` "#" guard $ isIdentifier t1 let isIncludeNext = t1 `hasSpelling` "include_next" unless isIncludeNext $ guard (t1 `hasSpelling` "include") case ts2 of -- quote include arguments are parsed as literals [t] -> do guard $ isLiteral t let s = Text.unpack $ getTokenSpelling (tokenSpelling t) (cL, s1) <- List.uncons s guard $ cL == '"' (s', cR) <- unsnoc s1 guard $ cR == '"' let (_, arg) = RootHeader.hashIncludeArg s' return $ if isIncludeNext then IncludeGraph.QuoteIncludeNext arg else IncludeGraph.QuoteInclude arg -- bracket include arguments are parsed using punctuation t2 : ts3 -> do guard $ isPunctuation t2 && t2 `hasSpelling` "<" (ts, tR) <- unsnoc ts3 guard $ isPunctuation tR && tR `hasSpelling` ">" -- ts may contain many token kinds, not just identifier/punctuation let (_, arg) = RootHeader.hashIncludeArg $ concatMap (Text.unpack . getTokenSpelling . tokenSpelling) ts return $ if isIncludeNext then IncludeGraph.BracketIncludeNext arg else IncludeGraph.BracketInclude arg [] -> Nothing _otherwise -> Nothing where isIdentifier, isLiteral, isPunctuation :: Token a -> Bool isIdentifier = (== Right CXToken_Identifier) . fromSimpleEnum . tokenKind isLiteral = (== Right CXToken_Literal) . fromSimpleEnum . tokenKind isPunctuation = (== Right CXToken_Punctuation) . fromSimpleEnum . tokenKind hasSpelling :: Token TokenSpelling -> Text -> Bool hasSpelling = (==) . (getTokenSpelling . tokenSpelling)