-- |
-- SPDX-License-Identifier: BSD-3-Clause
module Swarm.Game.Scenario.Topography.Structure.Recognition.Prep (
  mkEntityLookup,
  binTuplesHM,
) where

import Control.Arrow ((&&&))
import Control.Lens.Indexed (imap)
import Data.HashMap.Strict qualified as HM
import Data.HashSet qualified as HS
import Data.Hashable (Hashable)
import Data.Int (Int32)
import Data.List.NonEmpty (NonEmpty)
import Data.List.NonEmpty qualified as NE
import Data.List.Split (wordsBy)
import Data.Maybe (catMaybes, mapMaybe)
import Data.Semigroup (sconcat)
import Data.Tuple (swap)
import Swarm.Game.Scenario.Topography.Grid
import Swarm.Game.Scenario.Topography.Structure.Recognition.Type
import Text.AhoCorasick (makeStateMachine)

-- | Given all candidate structures, explode them into annotated rows.
-- These annotations entail both the row index with the original structure
-- and a reference to the original structure definition.
--
-- This operation may result in multiple entries that contain the same contents
-- (but different annotations), either because the same contents appear
-- in multiple rows within the same structure, or occur across structures.
allStructureRows :: [StructureWithGrid b a] -> [StructureRow b a]
allStructureRows :: forall b a. [StructureWithGrid b a] -> [StructureRow b a]
allStructureRows =
  (StructureWithGrid b a -> [StructureRow b a])
-> [StructureWithGrid b a] -> [StructureRow b a]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((StructureWithGrid b a -> [StructureRow b a])
 -> [StructureWithGrid b a] -> [StructureRow b a])
-> (StructureWithGrid b a -> [StructureRow b a])
-> [StructureWithGrid b a]
-> [StructureRow b a]
forall a b. (a -> b) -> a -> b
$ NonEmpty (StructureRow b a) -> [StructureRow b a]
forall a. NonEmpty a -> [a]
NE.toList (NonEmpty (StructureRow b a) -> [StructureRow b a])
-> (StructureWithGrid b a -> NonEmpty (StructureRow b a))
-> StructureWithGrid b a
-> [StructureRow b a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. StructureWithGrid b a -> NonEmpty (StructureRow b a)
forall {b} {a}.
StructureWithGrid b a -> NonEmpty (StructureRow b a)
transformRows
 where
  transformRows :: StructureWithGrid b a -> NonEmpty (StructureRow b a)
transformRows StructureWithGrid b a
g = (Int -> NonEmpty (AtomicKeySymbol a) -> StructureRow b a)
-> NonEmpty (NonEmpty (AtomicKeySymbol a))
-> NonEmpty (StructureRow b a)
forall a b. (Int -> a -> b) -> NonEmpty a -> NonEmpty b
forall i (f :: * -> *) a b.
FunctorWithIndex i f =>
(i -> a -> b) -> f a -> f b
imap (StructureWithGrid b a
-> Int32 -> NonEmpty (AtomicKeySymbol a) -> StructureRow b a
forall b a.
StructureWithGrid b a
-> Int32 -> NonEmpty (AtomicKeySymbol a) -> StructureRow b a
StructureRow StructureWithGrid b a
g (Int32 -> NonEmpty (AtomicKeySymbol a) -> StructureRow b a)
-> (Int -> Int32)
-> Int
-> NonEmpty (AtomicKeySymbol a)
-> StructureRow b a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral) NonEmpty (NonEmpty (AtomicKeySymbol a))
rows
   where
    NonEmptyGrid NonEmpty (NonEmpty (AtomicKeySymbol a))
rows = ExtractedArea b a -> NonEmptyGrid (AtomicKeySymbol a)
forall b a. ExtractedArea b a -> NonEmptyGrid (AtomicKeySymbol a)
extractedGrid (ExtractedArea b a -> NonEmptyGrid (AtomicKeySymbol a))
-> ExtractedArea b a -> NonEmptyGrid (AtomicKeySymbol a)
forall a b. (a -> b) -> a -> b
$ StructureWithGrid b a -> ExtractedArea b a
forall b a. StructureWithGrid b a -> ExtractedArea b a
entityGrid StructureWithGrid b a
g

-- | If this entity is encountered in the world,
-- how far left of it and how far right of it do we need to
-- scan the world row to ensure we can recognize every possible
-- structure that features this entity?
mkOffsets :: Int32 -> RowWidth -> InspectionOffsets
mkOffsets :: Int32 -> RowWidth -> InspectionOffsets
mkOffsets Int32
pos (RowWidth Int32
w) =
  Min Int32 -> Max Int32 -> InspectionOffsets
InspectionOffsets
    (Int32 -> Min Int32
forall {f :: * -> *}. Applicative f => Int32 -> f Int32
subtractPosFrom Int32
0)
    (Int32 -> Max Int32
forall {f :: * -> *}. Applicative f => Int32 -> f Int32
subtractPosFrom Int32
rightMostShapeRowIndex)
 where
  subtractPosFrom :: Int32 -> f Int32
subtractPosFrom Int32
minuend = Int32 -> f Int32
forall a. a -> f a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int32 -> f Int32) -> Int32 -> f Int32
forall a b. (a -> b) -> a -> b
$ Int32
minuend Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- Int32
pos
  rightMostShapeRowIndex :: Int32
rightMostShapeRowIndex = Int32
w Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- Int32
1

-- | Make the first-phase lookup map, keyed by 'Entity',
-- along with automatons whose key symbols are "Maybe Entity".
--
-- Each automaton in this first layer will attempt to match the
-- underlying world row against all rows within all structures
-- (so long as they contain the keyed entity).
--
-- = Preparation steps
--
-- 1. Consolidate all identical rows across all structures into a map
-- 2. Consolidate all entities across these rows into an entity-keyed lookup map
-- 3. Extract the contiguous chunks from each unique row
-- 4. Put the expected indices of these chunks into a lookup structure
-- 5. Prepare Aho-Corasick state machines for recognizing these chunks
mkEntityLookup ::
  (Hashable a, Eq a) =>
  [StructureWithGrid b a] ->
  HM.HashMap a (AutomatonInfo b a)
mkEntityLookup :: forall a b.
(Hashable a, Eq a) =>
[StructureWithGrid b a] -> HashMap a (AutomatonInfo b a)
mkEntityLookup [StructureWithGrid b a]
grids =
  (NonEmpty (SingleRowEntityOccurrences b a) -> AutomatonInfo b a)
-> HashMap a (NonEmpty (SingleRowEntityOccurrences b a))
-> HashMap a (AutomatonInfo b a)
forall v1 v2 k. (v1 -> v2) -> HashMap k v1 -> HashMap k v2
HM.map NonEmpty (SingleRowEntityOccurrences b a) -> AutomatonInfo b a
forall {k} {v}.
Hashable k =>
NonEmpty (SingleRowEntityOccurrences v k) -> AutomatonInfo v k
mkRowAutomatons HashMap a (NonEmpty (SingleRowEntityOccurrences b a))
rowsByEntityParticipation
 where
  -- Produces an automaton to evaluate whenever a given entity
  -- is encountered.
  mkRowAutomatons :: NonEmpty (SingleRowEntityOccurrences v k) -> AutomatonInfo v k
mkRowAutomatons NonEmpty (SingleRowEntityOccurrences v k)
neList =
    InspectionOffsets -> PiecewiseRecognition v k -> AutomatonInfo v k
forall v k.
InspectionOffsets -> PiecewiseRecognition v k -> AutomatonInfo v k
AutomatonInfo InspectionOffsets
bounds (PiecewiseRecognition v k -> AutomatonInfo v k)
-> PiecewiseRecognition v k -> AutomatonInfo v k
forall a b. (a -> b) -> a -> b
$
      StateMachine (AtomicKeySymbol k) (NonEmpty k)
-> NonEmpty (RowChunkMatchingReference v k)
-> PiecewiseRecognition v k
forall b a.
StateMachine (AtomicKeySymbol a) (NonEmpty a)
-> NonEmpty (RowChunkMatchingReference b a)
-> PiecewiseRecognition b a
PiecewiseRecognition StateMachine (AtomicKeySymbol k) (NonEmpty k)
chunksStateMachine NonEmpty (RowChunkMatchingReference v k)
extractedChunksForLookup
   where
    bounds :: InspectionOffsets
bounds = NonEmpty InspectionOffsets -> InspectionOffsets
forall a. Semigroup a => NonEmpty a -> a
sconcat (NonEmpty InspectionOffsets -> InspectionOffsets)
-> NonEmpty InspectionOffsets -> InspectionOffsets
forall a b. (a -> b) -> a -> b
$ (SingleRowEntityOccurrences v k -> InspectionOffsets)
-> NonEmpty (SingleRowEntityOccurrences v k)
-> NonEmpty InspectionOffsets
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NE.map SingleRowEntityOccurrences v k -> InspectionOffsets
forall b a. SingleRowEntityOccurrences b a -> InspectionOffsets
expandedOffsets NonEmpty (SingleRowEntityOccurrences v k)
neList

    -- Prepare lookup structure for use with results of the
    -- Aho-Corasick matcher.
    extractedChunksForLookup :: NonEmpty (RowChunkMatchingReference v k)
extractedChunksForLookup = (SingleRowEntityOccurrences v k -> RowChunkMatchingReference v k)
-> NonEmpty (SingleRowEntityOccurrences v k)
-> NonEmpty (RowChunkMatchingReference v k)
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NE.map SingleRowEntityOccurrences v k -> RowChunkMatchingReference v k
forall {b}.
SingleRowEntityOccurrences b k -> RowChunkMatchingReference b k
f NonEmpty (SingleRowEntityOccurrences v k)
neList
     where
      f :: SingleRowEntityOccurrences b k -> RowChunkMatchingReference b k
f SingleRowEntityOccurrences b k
x = ConsolidatedRowReferences b k
-> HashMap (NonEmpty k) (NonEmpty Int)
-> RowChunkMatchingReference b k
forall b a.
ConsolidatedRowReferences b a
-> HashMap (NonEmpty a) (NonEmpty Int)
-> RowChunkMatchingReference b a
RowChunkMatchingReference (SingleRowEntityOccurrences b k -> ConsolidatedRowReferences b k
forall b a.
SingleRowEntityOccurrences b a -> ConsolidatedRowReferences b a
myRow SingleRowEntityOccurrences b k
x) (SingleRowEntityOccurrences b k
-> HashMap (NonEmpty k) (NonEmpty Int)
forall {b}.
SingleRowEntityOccurrences b k
-> HashMap (NonEmpty k) (NonEmpty Int)
mkRightMap SingleRowEntityOccurrences b k
x)
      mkRightMap :: SingleRowEntityOccurrences b k
-> HashMap (NonEmpty k) (NonEmpty Int)
mkRightMap = [(NonEmpty k, Int)] -> HashMap (NonEmpty k) (NonEmpty Int)
forall (t :: * -> *) a b.
(Foldable t, Hashable a, Eq a) =>
t (a, b) -> HashMap a (NonEmpty b)
binTuplesHM ([(NonEmpty k, Int)] -> HashMap (NonEmpty k) (NonEmpty Int))
-> (SingleRowEntityOccurrences b k -> [(NonEmpty k, Int)])
-> SingleRowEntityOccurrences b k
-> HashMap (NonEmpty k) (NonEmpty Int)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (PositionedChunk k -> (NonEmpty k, Int))
-> [PositionedChunk k] -> [(NonEmpty k, Int)]
forall a b. (a -> b) -> [a] -> [b]
map (PositionedChunk k -> NonEmpty k
forall a. PositionedChunk a -> NonEmpty a
chunkContents (PositionedChunk k -> NonEmpty k)
-> (PositionedChunk k -> Int)
-> PositionedChunk k
-> (NonEmpty k, Int)
forall b c c'. (b -> c) -> (b -> c') -> b -> (c, c')
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& PositionedChunk k -> Int
forall a. PositionedChunk a -> Int
chunkStartPos) ([PositionedChunk k] -> [(NonEmpty k, Int)])
-> (SingleRowEntityOccurrences b k -> [PositionedChunk k])
-> SingleRowEntityOccurrences b k
-> [(NonEmpty k, Int)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SingleRowEntityOccurrences b k -> [PositionedChunk k]
forall b a. SingleRowEntityOccurrences b a -> [PositionedChunk a]
contiguousChunks

    extractedChunksForStateMachine :: HashSet (NonEmpty k)
extractedChunksForStateMachine =
      [NonEmpty k] -> HashSet (NonEmpty k)
forall a. (Eq a, Hashable a) => [a] -> HashSet a
HS.fromList ([NonEmpty k] -> HashSet (NonEmpty k))
-> (NonEmpty [NonEmpty k] -> [NonEmpty k])
-> NonEmpty [NonEmpty k]
-> HashSet (NonEmpty k)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[NonEmpty k]] -> [NonEmpty k]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[NonEmpty k]] -> [NonEmpty k])
-> (NonEmpty [NonEmpty k] -> [[NonEmpty k]])
-> NonEmpty [NonEmpty k]
-> [NonEmpty k]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty [NonEmpty k] -> [[NonEmpty k]]
forall a. NonEmpty a -> [a]
NE.toList (NonEmpty [NonEmpty k] -> HashSet (NonEmpty k))
-> NonEmpty [NonEmpty k] -> HashSet (NonEmpty k)
forall a b. (a -> b) -> a -> b
$
        (SingleRowEntityOccurrences v k -> [NonEmpty k])
-> NonEmpty (SingleRowEntityOccurrences v k)
-> NonEmpty [NonEmpty k]
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NE.map ((PositionedChunk k -> NonEmpty k)
-> [PositionedChunk k] -> [NonEmpty k]
forall a b. (a -> b) -> [a] -> [b]
map PositionedChunk k -> NonEmpty k
forall a. PositionedChunk a -> NonEmpty a
chunkContents ([PositionedChunk k] -> [NonEmpty k])
-> (SingleRowEntityOccurrences v k -> [PositionedChunk k])
-> SingleRowEntityOccurrences v k
-> [NonEmpty k]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SingleRowEntityOccurrences v k -> [PositionedChunk k]
forall b a. SingleRowEntityOccurrences b a -> [PositionedChunk a]
contiguousChunks) NonEmpty (SingleRowEntityOccurrences v k)
neList

    -- We wrap the entities with 'Just' since the Aho-Corasick
    -- matcher needs to compare against world cells, which are of 'Maybe' type.
    chunksStateMachine :: StateMachine (AtomicKeySymbol k) (NonEmpty k)
chunksStateMachine =
      [([AtomicKeySymbol k], NonEmpty k)]
-> StateMachine (AtomicKeySymbol k) (NonEmpty k)
forall keySymb val.
(Eq keySymb, Hashable keySymb) =>
[([keySymb], val)] -> StateMachine keySymb val
makeStateMachine ([([AtomicKeySymbol k], NonEmpty k)]
 -> StateMachine (AtomicKeySymbol k) (NonEmpty k))
-> [([AtomicKeySymbol k], NonEmpty k)]
-> StateMachine (AtomicKeySymbol k) (NonEmpty k)
forall a b. (a -> b) -> a -> b
$
        (NonEmpty k -> ([AtomicKeySymbol k], NonEmpty k))
-> [NonEmpty k] -> [([AtomicKeySymbol k], NonEmpty k)]
forall a b. (a -> b) -> [a] -> [b]
map (NonEmpty (AtomicKeySymbol k) -> [AtomicKeySymbol k]
forall a. NonEmpty a -> [a]
NE.toList (NonEmpty (AtomicKeySymbol k) -> [AtomicKeySymbol k])
-> (NonEmpty k -> NonEmpty (AtomicKeySymbol k))
-> NonEmpty k
-> [AtomicKeySymbol k]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (k -> AtomicKeySymbol k)
-> NonEmpty k -> NonEmpty (AtomicKeySymbol k)
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap k -> AtomicKeySymbol k
forall a. a -> Maybe a
Just (NonEmpty k -> [AtomicKeySymbol k])
-> (NonEmpty k -> NonEmpty k)
-> NonEmpty k
-> ([AtomicKeySymbol k], NonEmpty k)
forall b c c'. (b -> c) -> (b -> c') -> b -> (c, c')
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& NonEmpty k -> NonEmpty k
forall a. a -> a
id) ([NonEmpty k] -> [([AtomicKeySymbol k], NonEmpty k)])
-> [NonEmpty k] -> [([AtomicKeySymbol k], NonEmpty k)]
forall a b. (a -> b) -> a -> b
$
          HashSet (NonEmpty k) -> [NonEmpty k]
forall a. HashSet a -> [a]
HS.toList HashSet (NonEmpty k)
extractedChunksForStateMachine

  -- The values of this map are guaranteed to contain only one
  -- entry per row of each structure, even if some of those
  -- rows contain repetition of the same entity.
  rowsByEntityParticipation :: HashMap a (NonEmpty (SingleRowEntityOccurrences b a))
rowsByEntityParticipation =
    [(a, SingleRowEntityOccurrences b a)]
-> HashMap a (NonEmpty (SingleRowEntityOccurrences b a))
forall (t :: * -> *) a b.
(Foldable t, Hashable a, Eq a) =>
t (a, b) -> HashMap a (NonEmpty b)
binTuplesHM
      ([(a, SingleRowEntityOccurrences b a)]
 -> HashMap a (NonEmpty (SingleRowEntityOccurrences b a)))
-> ([ConsolidatedRowReferences b a]
    -> [(a, SingleRowEntityOccurrences b a)])
-> [ConsolidatedRowReferences b a]
-> HashMap a (NonEmpty (SingleRowEntityOccurrences b a))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (SingleRowEntityOccurrences b a
 -> (a, SingleRowEntityOccurrences b a))
-> [SingleRowEntityOccurrences b a]
-> [(a, SingleRowEntityOccurrences b a)]
forall a b. (a -> b) -> [a] -> [b]
map (SingleRowEntityOccurrences b a -> a
forall b a. SingleRowEntityOccurrences b a -> a
myEntity (SingleRowEntityOccurrences b a -> a)
-> (SingleRowEntityOccurrences b a
    -> SingleRowEntityOccurrences b a)
-> SingleRowEntityOccurrences b a
-> (a, SingleRowEntityOccurrences b a)
forall b c c'. (b -> c) -> (b -> c') -> b -> (c, c')
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& SingleRowEntityOccurrences b a -> SingleRowEntityOccurrences b a
forall a. a -> a
id)
      ([SingleRowEntityOccurrences b a]
 -> [(a, SingleRowEntityOccurrences b a)])
-> ([ConsolidatedRowReferences b a]
    -> [SingleRowEntityOccurrences b a])
-> [ConsolidatedRowReferences b a]
-> [(a, SingleRowEntityOccurrences b a)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ConsolidatedRowReferences b a -> [SingleRowEntityOccurrences b a])
-> [ConsolidatedRowReferences b a]
-> [SingleRowEntityOccurrences b a]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ConsolidatedRowReferences b a -> [SingleRowEntityOccurrences b a]
forall a b.
(Hashable a, Eq a) =>
ConsolidatedRowReferences b a -> [SingleRowEntityOccurrences b a]
explodeRowEntities
      ([ConsolidatedRowReferences b a]
 -> HashMap a (NonEmpty (SingleRowEntityOccurrences b a)))
-> [ConsolidatedRowReferences b a]
-> HashMap a (NonEmpty (SingleRowEntityOccurrences b a))
forall a b. (a -> b) -> a -> b
$ [ConsolidatedRowReferences b a]
structureRowsByContent

  -- Consolidate all identical rows, whether those rows appear in
  -- same structure or a different structures.
  structureRowsByContent :: [ConsolidatedRowReferences b a]
structureRowsByContent =
    ((NonEmpty (AtomicKeySymbol a), NonEmpty (StructureRow b a))
 -> ConsolidatedRowReferences b a)
-> [(NonEmpty (AtomicKeySymbol a), NonEmpty (StructureRow b a))]
-> [ConsolidatedRowReferences b a]
forall a b. (a -> b) -> [a] -> [b]
map (\(NonEmpty (AtomicKeySymbol a)
x, NonEmpty (StructureRow b a)
y) -> NonEmpty (AtomicKeySymbol a)
-> NonEmpty (StructureRow b a)
-> RowWidth
-> ConsolidatedRowReferences b a
forall b a.
NonEmpty (AtomicKeySymbol a)
-> NonEmpty (StructureRow b a)
-> RowWidth
-> ConsolidatedRowReferences b a
ConsolidatedRowReferences NonEmpty (AtomicKeySymbol a)
x NonEmpty (StructureRow b a)
y (RowWidth -> ConsolidatedRowReferences b a)
-> (StructureRow b a -> RowWidth)
-> StructureRow b a
-> ConsolidatedRowReferences b a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. StructureWithGrid b a -> RowWidth
forall b a. StructureWithGrid b a -> RowWidth
gridWidth (StructureWithGrid b a -> RowWidth)
-> (StructureRow b a -> StructureWithGrid b a)
-> StructureRow b a
-> RowWidth
forall b c a. (b -> c) -> (a -> b) -> a -> c
. StructureRow b a -> StructureWithGrid b a
forall b a. StructureRow b a -> StructureWithGrid b a
wholeStructure (StructureRow b a -> ConsolidatedRowReferences b a)
-> StructureRow b a -> ConsolidatedRowReferences b a
forall a b. (a -> b) -> a -> b
$ NonEmpty (StructureRow b a) -> StructureRow b a
forall a. NonEmpty a -> a
NE.head NonEmpty (StructureRow b a)
y)
      ([(NonEmpty (AtomicKeySymbol a), NonEmpty (StructureRow b a))]
 -> [ConsolidatedRowReferences b a])
-> ([StructureRow b a]
    -> [(NonEmpty (AtomicKeySymbol a), NonEmpty (StructureRow b a))])
-> [StructureRow b a]
-> [ConsolidatedRowReferences b a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HashMap
  (NonEmpty (AtomicKeySymbol a)) (NonEmpty (StructureRow b a))
-> [(NonEmpty (AtomicKeySymbol a), NonEmpty (StructureRow b a))]
forall k v. HashMap k v -> [(k, v)]
HM.toList
      (HashMap
   (NonEmpty (AtomicKeySymbol a)) (NonEmpty (StructureRow b a))
 -> [(NonEmpty (AtomicKeySymbol a), NonEmpty (StructureRow b a))])
-> ([StructureRow b a]
    -> HashMap
         (NonEmpty (AtomicKeySymbol a)) (NonEmpty (StructureRow b a)))
-> [StructureRow b a]
-> [(NonEmpty (AtomicKeySymbol a), NonEmpty (StructureRow b a))]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(NonEmpty (AtomicKeySymbol a), StructureRow b a)]
-> HashMap
     (NonEmpty (AtomicKeySymbol a)) (NonEmpty (StructureRow b a))
forall (t :: * -> *) a b.
(Foldable t, Hashable a, Eq a) =>
t (a, b) -> HashMap a (NonEmpty b)
binTuplesHM
      ([(NonEmpty (AtomicKeySymbol a), StructureRow b a)]
 -> HashMap
      (NonEmpty (AtomicKeySymbol a)) (NonEmpty (StructureRow b a)))
-> ([StructureRow b a]
    -> [(NonEmpty (AtomicKeySymbol a), StructureRow b a)])
-> [StructureRow b a]
-> HashMap
     (NonEmpty (AtomicKeySymbol a)) (NonEmpty (StructureRow b a))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (StructureRow b a
 -> (NonEmpty (AtomicKeySymbol a), StructureRow b a))
-> [StructureRow b a]
-> [(NonEmpty (AtomicKeySymbol a), StructureRow b a)]
forall a b. (a -> b) -> [a] -> [b]
map (StructureRow b a -> NonEmpty (AtomicKeySymbol a)
forall b a. StructureRow b a -> NonEmpty (AtomicKeySymbol a)
rowContent (StructureRow b a -> NonEmpty (AtomicKeySymbol a))
-> (StructureRow b a -> StructureRow b a)
-> StructureRow b a
-> (NonEmpty (AtomicKeySymbol a), StructureRow b a)
forall b c c'. (b -> c) -> (b -> c') -> b -> (c, c')
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& StructureRow b a -> StructureRow b a
forall a. a -> a
id)
      ([StructureRow b a] -> [ConsolidatedRowReferences b a])
-> [StructureRow b a] -> [ConsolidatedRowReferences b a]
forall a b. (a -> b) -> a -> b
$ [StructureWithGrid b a] -> [StructureRow b a]
forall b a. [StructureWithGrid b a] -> [StructureRow b a]
allStructureRows [StructureWithGrid b a]
grids

-- | Utilizes the convenient 'wordsBy' function
-- from the "split" package.
getContiguousChunks :: SymbolSequence a -> [PositionedChunk a]
getContiguousChunks :: forall a. SymbolSequence a -> [PositionedChunk a]
getContiguousChunks SymbolSequence a
rowMembers =
  (NonEmpty (Int, a) -> PositionedChunk a)
-> [NonEmpty (Int, a)] -> [PositionedChunk a]
forall a b. (a -> b) -> [a] -> [b]
map NonEmpty (Int, a) -> PositionedChunk a
forall {a}. NonEmpty (Int, a) -> PositionedChunk a
mkChunk
    ([NonEmpty (Int, a)] -> [PositionedChunk a])
-> ([(Int, Maybe a)] -> [NonEmpty (Int, a)])
-> [(Int, Maybe a)]
-> [PositionedChunk a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([(Int, Maybe a)] -> Maybe (NonEmpty (Int, a)))
-> [[(Int, Maybe a)]] -> [NonEmpty (Int, a)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ([(Int, a)] -> Maybe (NonEmpty (Int, a))
forall a. [a] -> Maybe (NonEmpty a)
NE.nonEmpty ([(Int, a)] -> Maybe (NonEmpty (Int, a)))
-> ([(Int, Maybe a)] -> [(Int, a)])
-> [(Int, Maybe a)]
-> Maybe (NonEmpty (Int, a))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, Maybe a) -> Maybe (Int, a))
-> [(Int, Maybe a)] -> [(Int, a)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (Int, Maybe a) -> Maybe (Int, a)
forall (t :: * -> *) (f :: * -> *) a.
(Traversable t, Applicative f) =>
t (f a) -> f (t a)
forall (f :: * -> *) a. Applicative f => (Int, f a) -> f (Int, a)
sequenceA)
    ([[(Int, Maybe a)]] -> [NonEmpty (Int, a)])
-> ([(Int, Maybe a)] -> [[(Int, Maybe a)]])
-> [(Int, Maybe a)]
-> [NonEmpty (Int, a)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, Maybe a) -> Bool) -> [(Int, Maybe a)] -> [[(Int, Maybe a)]]
forall a. (a -> Bool) -> [a] -> [[a]]
wordsBy (Maybe a -> Bool
forall a. Maybe a -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (Maybe a -> Bool)
-> ((Int, Maybe a) -> Maybe a) -> (Int, Maybe a) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Maybe a) -> Maybe a
forall a b. (a, b) -> b
snd)
    ([(Int, Maybe a)] -> [PositionedChunk a])
-> [(Int, Maybe a)] -> [PositionedChunk a]
forall a b. (a -> b) -> a -> b
$ [Int] -> SymbolSequence a -> [(Int, Maybe a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0 :: Int ..] SymbolSequence a
rowMembers
 where
  mkChunk :: NonEmpty (Int, a) -> PositionedChunk a
mkChunk NonEmpty (Int, a)
xs = Int -> NonEmpty a -> PositionedChunk a
forall a. Int -> NonEmpty a -> PositionedChunk a
PositionedChunk ((Int, a) -> Int
forall a b. (a, b) -> a
fst ((Int, a) -> Int) -> (Int, a) -> Int
forall a b. (a -> b) -> a -> b
$ NonEmpty (Int, a) -> (Int, a)
forall a. NonEmpty a -> a
NE.head NonEmpty (Int, a)
xs) (((Int, a) -> a) -> NonEmpty (Int, a) -> NonEmpty a
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NE.map (Int, a) -> a
forall a b. (a, b) -> b
snd NonEmpty (Int, a)
xs)

-- | All of the occurrences of each unique entity within a row
-- are consolidated into one record, in which the repetitions are noted.
--
-- The members of "rowMembers" are of 'Maybe' type; the 'Nothing's
-- are dropped but accounted for positionally when indexing the columns.
explodeRowEntities ::
  (Hashable a, Eq a) =>
  ConsolidatedRowReferences b a ->
  [SingleRowEntityOccurrences b a]
explodeRowEntities :: forall a b.
(Hashable a, Eq a) =>
ConsolidatedRowReferences b a -> [SingleRowEntityOccurrences b a]
explodeRowEntities annotatedRow :: ConsolidatedRowReferences b a
annotatedRow@(ConsolidatedRowReferences NonEmpty (AtomicKeySymbol a)
rowMembers NonEmpty (StructureRow b a)
_ RowWidth
width) =
  ((a, NonEmpty (PositionWithinRow b a))
 -> SingleRowEntityOccurrences b a)
-> [(a, NonEmpty (PositionWithinRow b a))]
-> [SingleRowEntityOccurrences b a]
forall a b. (a -> b) -> [a] -> [b]
map (a, NonEmpty (PositionWithinRow b a))
-> SingleRowEntityOccurrences b a
forall {b} {a}.
(a, NonEmpty (PositionWithinRow b a))
-> SingleRowEntityOccurrences b a
f ([(a, NonEmpty (PositionWithinRow b a))]
 -> [SingleRowEntityOccurrences b a])
-> [(a, NonEmpty (PositionWithinRow b a))]
-> [SingleRowEntityOccurrences b a]
forall a b. (a -> b) -> a -> b
$ HashMap a (NonEmpty (PositionWithinRow b a))
-> [(a, NonEmpty (PositionWithinRow b a))]
forall k v. HashMap k v -> [(k, v)]
HM.toList (HashMap a (NonEmpty (PositionWithinRow b a))
 -> [(a, NonEmpty (PositionWithinRow b a))])
-> HashMap a (NonEmpty (PositionWithinRow b a))
-> [(a, NonEmpty (PositionWithinRow b a))]
forall a b. (a -> b) -> a -> b
$ [(a, PositionWithinRow b a)]
-> HashMap a (NonEmpty (PositionWithinRow b a))
forall (t :: * -> *) a b.
(Foldable t, Hashable a, Eq a) =>
t (a, b) -> HashMap a (NonEmpty b)
binTuplesHM [(a, PositionWithinRow b a)]
unconsolidatedEntityOccurrences
 where
  chunks :: [PositionedChunk a]
chunks = SymbolSequence a -> [PositionedChunk a]
forall a. SymbolSequence a -> [PositionedChunk a]
getContiguousChunks (SymbolSequence a -> [PositionedChunk a])
-> SymbolSequence a -> [PositionedChunk a]
forall a b. (a -> b) -> a -> b
$ NonEmpty (AtomicKeySymbol a) -> SymbolSequence a
forall a. NonEmpty a -> [a]
NE.toList NonEmpty (AtomicKeySymbol a)
rowMembers

  f :: (a, NonEmpty (PositionWithinRow b a))
-> SingleRowEntityOccurrences b a
f (a
e, NonEmpty (PositionWithinRow b a)
occurrences) =
    ConsolidatedRowReferences b a
-> a
-> [PositionedChunk a]
-> InspectionOffsets
-> SingleRowEntityOccurrences b a
forall b a.
ConsolidatedRowReferences b a
-> a
-> [PositionedChunk a]
-> InspectionOffsets
-> SingleRowEntityOccurrences b a
SingleRowEntityOccurrences ConsolidatedRowReferences b a
annotatedRow a
e [PositionedChunk a]
chunks (InspectionOffsets -> SingleRowEntityOccurrences b a)
-> InspectionOffsets -> SingleRowEntityOccurrences b a
forall a b. (a -> b) -> a -> b
$
      NonEmpty InspectionOffsets -> InspectionOffsets
forall a. Semigroup a => NonEmpty a -> a
sconcat (NonEmpty InspectionOffsets -> InspectionOffsets)
-> NonEmpty InspectionOffsets -> InspectionOffsets
forall a b. (a -> b) -> a -> b
$
        (PositionWithinRow b a -> InspectionOffsets)
-> NonEmpty (PositionWithinRow b a) -> NonEmpty InspectionOffsets
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
NE.map PositionWithinRow b a -> InspectionOffsets
forall b a. PositionWithinRow b a -> InspectionOffsets
deriveEntityOffsets NonEmpty (PositionWithinRow b a)
occurrences

  -- Tuples of (entity, rowOccurrenceOfEntity).
  -- Only row members for which an entity exists (is not Nothing)
  -- are retained here.
  unconsolidatedEntityOccurrences :: [(a, PositionWithinRow b a)]
unconsolidatedEntityOccurrences =
    ((PositionWithinRow b a, a) -> (a, PositionWithinRow b a))
-> [(PositionWithinRow b a, a)] -> [(a, PositionWithinRow b a)]
forall a b. (a -> b) -> [a] -> [b]
map (PositionWithinRow b a, a) -> (a, PositionWithinRow b a)
forall a b. (a, b) -> (b, a)
swap
      ([(PositionWithinRow b a, a)] -> [(a, PositionWithinRow b a)])
-> (NonEmpty (Maybe (PositionWithinRow b a, a))
    -> [(PositionWithinRow b a, a)])
-> NonEmpty (Maybe (PositionWithinRow b a, a))
-> [(a, PositionWithinRow b a)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe (PositionWithinRow b a, a)] -> [(PositionWithinRow b a, a)]
forall a. [Maybe a] -> [a]
catMaybes
      ([Maybe (PositionWithinRow b a, a)]
 -> [(PositionWithinRow b a, a)])
-> (NonEmpty (Maybe (PositionWithinRow b a, a))
    -> [Maybe (PositionWithinRow b a, a)])
-> NonEmpty (Maybe (PositionWithinRow b a, a))
-> [(PositionWithinRow b a, a)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NonEmpty (Maybe (PositionWithinRow b a, a))
-> [Maybe (PositionWithinRow b a, a)]
forall a. NonEmpty a -> [a]
NE.toList
      (NonEmpty (Maybe (PositionWithinRow b a, a))
 -> [(a, PositionWithinRow b a)])
-> NonEmpty (Maybe (PositionWithinRow b a, a))
-> [(a, PositionWithinRow b a)]
forall a b. (a -> b) -> a -> b
$ (Int -> AtomicKeySymbol a -> Maybe (PositionWithinRow b a, a))
-> NonEmpty (AtomicKeySymbol a)
-> NonEmpty (Maybe (PositionWithinRow b a, a))
forall a b. (Int -> a -> b) -> NonEmpty a -> NonEmpty b
forall i (f :: * -> *) a b.
FunctorWithIndex i f =>
(i -> a -> b) -> f a -> f b
imap (\Int
idx -> (a -> (PositionWithinRow b a, a))
-> AtomicKeySymbol a -> Maybe (PositionWithinRow b a, a)
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int32 -> ConsolidatedRowReferences b a -> PositionWithinRow b a
forall b a.
Int32 -> ConsolidatedRowReferences b a -> PositionWithinRow b a
PositionWithinRow (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
idx) ConsolidatedRowReferences b a
annotatedRow,)) NonEmpty (AtomicKeySymbol a)
rowMembers

  deriveEntityOffsets :: PositionWithinRow b a -> InspectionOffsets
  deriveEntityOffsets :: forall b a. PositionWithinRow b a -> InspectionOffsets
deriveEntityOffsets (PositionWithinRow Int32
pos ConsolidatedRowReferences b a
_) = Int32 -> RowWidth -> InspectionOffsets
mkOffsets Int32
pos RowWidth
width

-- * Util

-- | Place the second element of the tuples into bins by
-- the value of the first element.
binTuplesHM ::
  (Foldable t, Hashable a, Eq a) =>
  t (a, b) ->
  HM.HashMap a (NonEmpty b)
binTuplesHM :: forall (t :: * -> *) a b.
(Foldable t, Hashable a, Eq a) =>
t (a, b) -> HashMap a (NonEmpty b)
binTuplesHM = ((a, b) -> HashMap a (NonEmpty b) -> HashMap a (NonEmpty b))
-> HashMap a (NonEmpty b) -> t (a, b) -> HashMap a (NonEmpty b)
forall a b. (a -> b -> b) -> b -> t a -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (a, b) -> HashMap a (NonEmpty b) -> HashMap a (NonEmpty b)
f HashMap a (NonEmpty b)
forall a. Monoid a => a
mempty
 where
  f :: (a, b) -> HashMap a (NonEmpty b) -> HashMap a (NonEmpty b)
f = (a
 -> NonEmpty b -> HashMap a (NonEmpty b) -> HashMap a (NonEmpty b))
-> (a, NonEmpty b)
-> HashMap a (NonEmpty b)
-> HashMap a (NonEmpty b)
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry ((NonEmpty b -> NonEmpty b -> NonEmpty b)
-> a
-> NonEmpty b
-> HashMap a (NonEmpty b)
-> HashMap a (NonEmpty b)
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v
HM.insertWith NonEmpty b -> NonEmpty b -> NonEmpty b
forall a. Semigroup a => a -> a -> a
(<>)) ((a, NonEmpty b)
 -> HashMap a (NonEmpty b) -> HashMap a (NonEmpty b))
-> ((a, b) -> (a, NonEmpty b))
-> (a, b)
-> HashMap a (NonEmpty b)
-> HashMap a (NonEmpty b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (b -> NonEmpty b) -> (a, b) -> (a, NonEmpty b)
forall a b. (a -> b) -> (a, a) -> (a, b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap b -> NonEmpty b
forall a. a -> NonEmpty a
forall (f :: * -> *) a. Applicative f => a -> f a
pure