/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ /* * A very simple types C++ interface to Glean batches. * */ #pragma once #include #include #include #include #include #include "glean/rts/binary.h" #include "glean/rts/cache.h" #include "glean/rts/id.h" #include "glean/rts/inventory.h" #include "glean/rts/stacked.h" #include "glean/rts/substitution.h" namespace facebook { namespace glean { namespace cpp { using rts::Id; using rts::Pid; // There are two kinds of types: // * Representation types // * Value types // // Representation Types // -------------------- // // These aren't real values, they're just tags which can be filled // by different types. For instance, a Tuple might be filled by // std::tuple and an Array might be filled by // std::string, fbstring etc. // // The following types are supported: // // Byte bytes // // Nat packed natural numbers (up to 64 bits) // // Array arrays // // Tuple<...> tuples // Unit = Tuple<> // // Sum<...> sum types // Maybe = Sum // // Enum enums with range N // (map to sums underneath) // Bool = Enum<2> // // Predicates, e.g.: // struct Foo : Predicate { // static const char *name() { return "Foo"; } // }; // NOTE: KeyType and ValueType are *value* types. This is to // increase type safety when constructing facts: in fact

(x), // x must have the right type, not just the right shape. // // // Value Types // ----------- // // Value types are inhabited instances of representation types. // A value type has a single representation type, however there // are multiple possible value types for a given representation. // e.g. for the representation Array, value types might be // std::string or std::vector. // // std::vector // value type corresponding to Array> // // std::tuple // value type corresponding to Tuple...> // // facts // Fact

is a reference to a fact of predicate P // // enumerated types // enum class Foo { thing1, thing2 }; // template<> struct Repr_ { // using Type = Enum<2>; // } // // user-defined arbitrary types which map to any representation type // struct Foo { // uint64_t x,y; // void outputRepr(Output> output) const { // outputValue(output, std::make_pair(x,y)); // } // }; // template<> struct Repr_ { // using Type = Tuple; // } // // // The Repr template // -------------------- // // Repr is the representation type corresponding to the // value type V. // // Repr is defined for: // * user-defined structs // * enumerated types template struct Repr_; // Convenience: template using Repr = typename Repr_::Type; struct Byte; template <> struct Repr_ { using Type = Byte; }; struct Nat; template <> struct Repr_ { using Type = Nat; }; template <> struct Repr_ { using Type = Nat; }; struct String; template <> struct Repr_ { using Type = String; }; template struct Array; template struct Repr_> { using Type = Array>; }; template struct Set; template struct Repr_> { using Type = Set>; }; template struct Tuple; using Unit = Tuple<>; template struct Repr_> { using Type = Tuple...>; }; template struct Sum; template using Maybe = Sum; template struct Alt : private boost::operators> { T value; explicit Alt(const T& x) : value(x) {} explicit Alt(T&& x) : value(std::forward(x)) {} bool operator==(const Alt& other) const { return value == other.value; } bool operator<(const Alt& other) const { return value < other.value; } }; template struct Repr_...>> { using Type = Sum...>; }; template Alt> alt(T&& x) { return Alt>(std::forward(x)); } inline Alt<0, std::tuple<>> nothing() { return alt<0>(std::make_tuple()); } template Alt<1, std::decay_t> just(T&& x) { return alt<1>(std::forward(x)); } template using maybe_type = boost::variant>, Alt<1, T>>; template maybe_type maybe(const folly::Optional& x) { if (x) { return alt<1>(x.value()); } else { return alt<0>(std::make_tuple()); } } // Enums - a special case of sums // // T is the actual value type, N is the range of the enum template struct Enum {}; using Bool = Enum<2>; template <> struct Repr_ { using Type = Bool; }; // Predicates // // Predicates are type tags. Fact

(see below) is the corresponding value. // // A predicate is reflected into C++ as follows: // // struct Foo : Predicate> { // static const char *name() { return "foo"; } // }; // // struct Bar : Predicate> { // static const char *name() { return "bar"; } // }; // template > struct Predicate { using KeyType = Key; using ValueType = Value; }; // Facts template struct Fact : private boost::operators> { Id getId() const { return id; } bool operator==(const Fact& other) const { return id == other.id; } bool operator<(const Fact& other) const { return id < other.id; } private: template friend class Batch; explicit Fact(Id i) : id(i) {} Id id; }; // The representation type of a Fact is its predicate: template struct Repr_> { using Type = P; }; // Typed value output buffers template struct Output { binary::Output& output; explicit Output(binary::Output& o) : output(o) {} }; template Output unsafeAs(Output o) { return Output(o.output); } inline void outputValue(Output o, uint8_t x) { o.output.fixed(x); } inline void outputValue(Output o, uint64_t x) { o.output.packed(x); } inline void outputValue(Output o, const std::string& s) { // TODO: Validate UTF-8? o.output.mangleString(binary::byteRange(s)); } template void outputValue(Output> o, std::initializer_list xs) { o.output.packed(xs.size()); for (const auto& x : xs) { outputValue(unsafeAs(o), x); } } template void outputValue(Output> o, const std::vector& xs) { o.output.packed(xs.size()); for (const auto& x : xs) { outputValue(unsafeAs(o), x); } } template void outputValue(Output> o, const folly::Range& range) { o.output.packed(range.size()); for (const auto& x : range) { outputValue(unsafeAs(o), x); } } inline void outputValue(Output> o, folly::ByteRange r) { o.output.packed(r.size()); o.output.put(r); } inline void outputValue(Output> o, const std::string& s) { outputValue(o, binary::byteRange(s)); } inline void outputValue(Output> o, const folly::fbstring& s) { outputValue(o, binary::byteRange(s)); } template inline void outputValue(Output> o, const std::set& xs) { o.output.packed(xs.size()); for (const auto& x : xs) { outputValue(unsafeAs(o), x); } } namespace detail { template void outputValues(binary::Output&, const U&) {} template void outputValues(binary::Output& output, const U& u) { outputValue(Output(output), std::get(u)); outputValues(output, u); } } // namespace detail template < typename... Ts, typename... Us, typename = std::enable_if_t> void outputValue(Output> o, const std::tuple& xs) { detail::outputValues<0, std::tuple, Ts...>(o.output, xs); } template < size_t i, typename U, typename... Ts, typename = std::enable_if_t> void outputValue(Output> o, const Alt& alt) { o.output.packed(i); outputValue( unsafeAs>::type>(o), alt.value); } namespace { template struct OutputAlt : public boost::static_visitor { explicit OutputAlt(Output> o) : out(o) {} template void operator()(const Alt& alt) const { outputValue(out, alt); } Output> out; }; } // namespace template < typename... Ts, typename... Us, typename = std::enable_if_t> void outputValue(Output> o, const boost::variant& v) { boost::apply_visitor(OutputAlt(o), v); } template void outputValue(Output> o, const folly::Optional& x) { if (x) { o.output.packed(1); outputValue(unsafeAs(o), x.value()); } else { o.output.packed(0); } } template void outputValue(Output> o, folly::None) { o.output.packed(0); } template void outputValue(Output> o, T x) { uint64_t n = static_cast(x); assert(n < N); outputValue(unsafeAs(o), n); } template void outputValue(Output

o, Fact

fact) { o.output.packed(fact.getId()); } template void outputValue(Output> o, const T& x) { x.outputRepr(o); } struct FactStats { size_t memory; size_t count; }; struct SchemaInventory { rts::Inventory inventory; /// Mapping from the autogenerated Schema::index

::value to the Id of P /// in the inventory (if it exists there). std::vector predicates; }; class BatchBase { public: explicit BatchBase( const SchemaInventory* inventory, size_t cache_capacity, std::string schemaId); BatchBase(const BatchBase&) = delete; BatchBase(BatchBase&&) = delete; BatchBase& operator=(const BatchBase&) = delete; BatchBase& operator=(BatchBase&&) = delete; const rts::Predicate* FOLLY_NULLABLE predicate(size_t i) const { return inventory->predicates[i]; } Id define(Pid ty, rts::Fact::Clause clause); void rebase(const rts::Substitution&); rts::FactSet::Serialized serialize() const; std::map> serializeOwnership() const; void clearOwnership(); FactStats bufferStats() const { return FactStats{buffer.factMemory(), buffer.size()}; } // TODO: This is a temporary hack for backwards compatibility, add proper // stats reporting struct CacheStats { FactStats facts; size_t hits = 0; size_t misses = 0; }; CacheStats cacheStats(); Id firstFreeId() const { return facts.firstFreeId(); } const std::string& getSchemaId() const { return schemaId; } void beginUnit(std::string); void endUnit(); void logEnd() const; private: const SchemaInventory* inventory; std::shared_ptr stats; rts::LookupCache cache; rts::LookupCache::Anchor anchor; rts::FactSet buffer; rts::Stacked facts; std::string schemaId; struct Owned { std::string unit; rts::closed_interval_set facts; Id start; Id finish; }; std::deque owned; Owned* current = nullptr; size_t seen_units = 0; mutable size_t last_serialized_units = 0; mutable size_t total_serialized_units = 0; std::set unique_units; }; /// A typed instantiation of an Inventory for a particular Schema. template struct DbSchema { SchemaInventory inventory; explicit DbSchema(rts::Inventory inv) : inventory{std::move(inv), {}} { inventory.predicates = getPredicates(inventory.inventory); } private: using ref_t = std::pair; template using pred_t = typename Schema::template predicate::type; template static std::vector getRefs(std::index_sequence) { return std::vector{ ref_t{pred_t::GLEAN_name(), pred_t::GLEAN_version()}...}; } using pred_map_t = std::unordered_map, size_t>; static std::vector getPredicates( const rts::Inventory& inventory) { const auto seq = std::make_index_sequence(); auto refs = getRefs(seq); std::unordered_map indices; for (size_t i = 0; i < refs.size(); ++i) { indices.insert({refs[i], i}); } const auto inventory_preds = inventory.predicates(); std::vector preds( refs.size(), nullptr); for (auto p : inventory_preds) { auto i = indices.find({p->name, p->version}); if (i != indices.end()) { preds[i->second] = p; } } // TODO: verify that predicates have the expected types return preds; } }; // A Batch encapsulates a set of cached facts which is initially empty and a // set of facts local to the Batch. Local facts can reference cached ones but // not vice versa. New local facts can be added to the Batch. // // A Batch can be serialized (typically for sending to the server) and rebased // based on a Substitution (typically received from the server). Local facts // that are in range of the substitution are moved to the cache; the remaining // local facts are assigned new Ids which don't clash which cached ones. // template class Batch : private BatchBase { public: Batch(const DbSchema* schema, size_t cache_capacity) : BatchBase(&(schema->inventory), cache_capacity, Schema::schemaId) {} template const rts::Predicate* predicate() const { if (auto p = base().predicate(Schema::template index

::value)) { return p; } else { throw std::runtime_error( std::string("unknown predicate ") + P::GLEAN_name() + "[" + folly::to(P::GLEAN_version()) + "]"); } } template < typename P, typename = std::enable_if_t< std::is_same>::value>> Fact

fact(const typename P::KeyType& x) { binary::Output output; outputValue(Output>(output), x); auto fact = base().define( predicate

()->id, rts::Fact::Clause::fromKey(output.bytes())); assert(fact); return Fact

(fact); } template < typename P, typename... Ts, typename = std::enable_if_t<1 < sizeof...(Ts)>> Fact

fact(Ts&&... xs) { return fact

(std::make_tuple(std::forward(xs)...)); } template Fact

factV(const typename P::KeyType& k, const typename P::ValueType& v) { binary::Output clause; outputValue(Output>(clause), k); const auto key_size = clause.size(); outputValue(Output>(clause), v); auto fact = base().define( predicate

()->id, rts::Fact::Clause::from(clause.bytes(), key_size)); assert(fact); return Fact

(fact); } BatchBase& base() { return *this; } const BatchBase& base() const { return *this; } using BatchBase::beginUnit; using BatchBase::bufferStats; using BatchBase::CacheStats; using BatchBase::cacheStats; using BatchBase::endUnit; using BatchBase::logEnd; using BatchBase::rebase; }; } // namespace cpp } // namespace glean } // namespace facebook