# Copyright (c) Meta Platforms, Inc. and affiliates. schema src.1 { # Path to the source file relative to the repository root predicate File : string # Source code digests produced by unspecified hash function predicate FileDigest : File -> string # Common source code location type type Loc = { file : File, line : nat, column : nat, # Each Unicode code point occupies one column, even if it spans multiple # bytes. Combining characters are not taken into account. # A tab character occupies a single column. } # Common source code range type # Expect: 1-based line and column numbers, Begin and End are inclusive. type Range = { file : File, lineBegin : nat, columnBegin : nat, lineEnd : nat, columnEnd : nat, } # DEPRECATED type ByteRange = { begin : nat, end : nat, } predicate FileLines : { file : File, lengths : [nat], # length in bytes of each line, including the terminating newline (if any) # NOTE: we store length rather than offset because it is shorter; # offsets can be recovered via scanl' (+) 0 endsInNewline : bool, # does the last line end in a newline? hasUnicodeOrTabs : bool, # does the file have any multibyte UTF-8 code points or tabs # i.e. char > 0x7F || c == '\t' } # A span of bytes within a file # NB. Byte spans don't assume any conversion of the source code, so CRLF counts # as two bytes type ByteSpan = { start : nat, length : nat, } # A span of bytes where the start is given as an offset from the start of # the previous span type RelByteSpan = { offset : nat, length : nat, } # Predicate to check if one bytespan contains another. This includes # if the two spans are equal predicate ByteSpanContains : { byteSpan: ByteSpan, contains: ByteSpan } {S1, S2} where {Start1, Len1} = S1; {Start2, Len2} = S2; End1=Start1+Len1; End2=Start2+Len2; Start1<=Start2; End2<=End1 predicate RangeContains : { fileLines : Range, contains: Range } {S1, S2} where {F, LB1, CB1, LE1, CE1} = S1; {F, LB2, CB2, LE2, CE2} = S2; LB1 <= LB2; LE2 <= LE1; (LB1 < LB2 | CB1 <= CB2); (LE2 < LE1 | CE2 <= CE1) # Packed representation for a list of byte spans # # This is represented as a list of relative offsets grouped by length. # Each group represents a sequence of byte spans of the same length. # # Offsets are relative to the previous offset, to the last offset of # the previous group if this is the first offset of a group, or to # the start of the file if this is the first offset of the first group. # # Note that we can only group spans of the same length if they are adjacent # in the sequence; each span that differs in length from the previous span # will result in a new group. # # For example, given a list of offsets like: # # [ # { length: 2, offsets: [1] }, # { length: 3, offsets: [3, 5] }, # { length: 2, offsets: [4] }, # ] # # The full, expanded list of (absolute offset, length) is: # # [ (1, 2), (4, 3), (9, 3), (13, 2) ] # # Use the primitive `prim.unpackByteSpans` to convert this to `[ByteSpan]`. type PackedByteSpansGroup = { length : nat, offsets : [nat], } type PackedByteSpans = [PackedByteSpansGroup] # Kind of language supported by Glean type Language = enum { Buck | C | Cpp | Hack | Haskell | ObjC | ObjCpp | Python | Thrift | Java | GraphQL } # Maps a file into the kind of language predicate FileLanguage : { file : File, language : Language } type FileLocation = { file : File, span : ByteSpan, } # Why didn't Glean Indexer index a particular file? type IndexFailureReason = enum { CompileError | BuildSystemError | Unclassified | DiscoveryError } # It is a good practice to add all errors directly into db # so it is easy to classify and investigate errors in future predicate IndexFailure : { file : File, reason : IndexFailureReason, details : string, } }