# Copyright (c) Meta Platforms, Inc. and affiliates.

schema src.1 {

# Path to the source file relative to the repository root
predicate File : string

# Source code digests produced by unspecified hash function
predicate FileDigest : File -> string

# Common source code location type
type Loc = {
  file : File,
  line : nat,
  column : nat,
   # Each Unicode code point occupies one column, even if it spans multiple
   # bytes. Combining characters are not taken into account.
   # A tab character occupies a single column.
}

# Common source code range type
# Expect: 1-based line and column numbers, Begin and End are inclusive.
type Range = {
  file : File,
  lineBegin : nat,
  columnBegin : nat,
  lineEnd : nat,
  columnEnd : nat,
}

# DEPRECATED
type ByteRange = {
  begin : nat,
  end : nat,
}

predicate FileLines : {
  file : File,
  lengths : [nat],
    # length in bytes of each line, including the terminating newline (if any)
    # NOTE: we store length rather than offset because it is shorter;
    # offsets can be recovered via scanl' (+) 0
  endsInNewline : bool,
    # does the last line end in a newline?
  hasUnicodeOrTabs : bool,
    # does the file have any multibyte UTF-8 code points or tabs
    # i.e. char > 0x7F || c == '\t'
}

# A span of bytes within a file
# NB. Byte spans don't assume any conversion of the source code, so CRLF counts
# as two bytes
type ByteSpan = {
  start : nat,
  length : nat,
}

# A span of bytes where the start is given as an offset from the start of
# the previous span
type RelByteSpan = {
  offset : nat,
  length : nat,
}

# Predicate to check if one bytespan contains another. This includes
# if the two spans are equal
predicate ByteSpanContains :
  {
    byteSpan: ByteSpan,
    contains: ByteSpan
  }
  {S1, S2} where
    {Start1, Len1} = S1;
    {Start2, Len2} = S2;
    End1=Start1+Len1;
    End2=Start2+Len2;
    Start1<=Start2;
    End2<=End1

predicate RangeContains :
  {
    fileLines : Range,
    contains: Range
  }
  {S1, S2} where
    {F, LB1, CB1, LE1, CE1} = S1;
    {F, LB2, CB2, LE2, CE2} = S2;
    LB1 <= LB2;
    LE2 <= LE1;
    (LB1 < LB2 | CB1 <= CB2);
    (LE2 < LE1 | CE2 <= CE1)

# Packed representation for a list of byte spans
#
# This is represented as a list of relative offsets grouped by length.
# Each group represents a sequence of byte spans of the same length.
#
# Offsets are relative to the previous offset, to the last offset of
# the previous group if this is the first offset of a group, or to
# the start of the file if this is the first offset of the first group.
#
# Note that we can only group spans of the same length if they are adjacent
# in the sequence; each span that differs in length from the previous span
# will result in a new group.
#
# For example, given a list of offsets like:
#
#   [
#     { length: 2, offsets: [1] },
#     { length: 3, offsets: [3, 5] },
#     { length: 2, offsets: [4] },
#   ]
#
# The full, expanded list of (absolute offset, length) is:
#
#   [ (1, 2), (4, 3), (9, 3), (13, 2) ]
#
# Use the primitive `prim.unpackByteSpans` to convert this to `[ByteSpan]`.
type PackedByteSpansGroup = { length : nat, offsets : [nat], }
type PackedByteSpans = [PackedByteSpansGroup]

# Kind of language supported by Glean
type Language = enum {
  Buck | C | Cpp | Hack | Haskell  |
  ObjC | ObjCpp | Python | Thrift | Java | GraphQL
}

# Maps a file into the kind of language
predicate FileLanguage : {
  file : File,
  language : Language
}

type FileLocation = {
  file : File,
  span : ByteSpan,
}

# Why didn't Glean Indexer index a particular file?
type IndexFailureReason = enum {
  CompileError |
  BuildSystemError |
  Unclassified |
  DiscoveryError
}

# It is a good practice to add all errors directly into db
# so it is easy to classify and investigate errors in future
predicate IndexFailure : {
  file : File,
  reason : IndexFailureReason,
  details : string,
}

}