{-|
Fuzzy string search in Haskell.
Uses 'TextualMonoid' to be able to run on different types of strings.
-}
module Text.Fuzzily where

import Protolude (
  Bool (True),
  Char,
  Down (Down),
  Eq ((==)),
  Int,
  Maybe (..),
  Monoid (mempty),
  Num ((*), (+)),
  Ord ((>)),
  Semigroup ((<>)),
  Show,
  const,
  identity,
  isJust,
  map,
  mapMaybe,
  not,
  otherwise,
  sortOn,
  toLower,
  (.),
 )

import Data.Monoid.Textual qualified as T


{-|
Included in the return type of `match` and `filter`.
Contains the original value given, the rendered string
and the matching score.
-}
data Fuzzy val prettyText = Fuzzy
  { forall val prettyText. Fuzzy val prettyText -> val
original :: val
  , forall val prettyText. Fuzzy val prettyText -> prettyText
rendered :: prettyText
  , forall val prettyText. Fuzzy val prettyText -> Int
score :: Int
  }
  deriving (Int -> Fuzzy val prettyText -> ShowS
[Fuzzy val prettyText] -> ShowS
Fuzzy val prettyText -> String
(Int -> Fuzzy val prettyText -> ShowS)
-> (Fuzzy val prettyText -> String)
-> ([Fuzzy val prettyText] -> ShowS)
-> Show (Fuzzy val prettyText)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall val prettyText.
(Show val, Show prettyText) =>
Int -> Fuzzy val prettyText -> ShowS
forall val prettyText.
(Show val, Show prettyText) =>
[Fuzzy val prettyText] -> ShowS
forall val prettyText.
(Show val, Show prettyText) =>
Fuzzy val prettyText -> String
$cshowsPrec :: forall val prettyText.
(Show val, Show prettyText) =>
Int -> Fuzzy val prettyText -> ShowS
showsPrec :: Int -> Fuzzy val prettyText -> ShowS
$cshow :: forall val prettyText.
(Show val, Show prettyText) =>
Fuzzy val prettyText -> String
show :: Fuzzy val prettyText -> String
$cshowList :: forall val prettyText.
(Show val, Show prettyText) =>
[Fuzzy val prettyText] -> ShowS
showList :: [Fuzzy val prettyText] -> ShowS
Show, Fuzzy val prettyText -> Fuzzy val prettyText -> Bool
(Fuzzy val prettyText -> Fuzzy val prettyText -> Bool)
-> (Fuzzy val prettyText -> Fuzzy val prettyText -> Bool)
-> Eq (Fuzzy val prettyText)
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
forall val prettyText.
(Eq val, Eq prettyText) =>
Fuzzy val prettyText -> Fuzzy val prettyText -> Bool
$c== :: forall val prettyText.
(Eq val, Eq prettyText) =>
Fuzzy val prettyText -> Fuzzy val prettyText -> Bool
== :: Fuzzy val prettyText -> Fuzzy val prettyText -> Bool
$c/= :: forall val prettyText.
(Eq val, Eq prettyText) =>
Fuzzy val prettyText -> Fuzzy val prettyText -> Bool
/= :: Fuzzy val prettyText -> Fuzzy val prettyText -> Bool
Eq)


data CaseSensitivity
  = IgnoreCase
  | HandleCase
  deriving (Int -> CaseSensitivity -> ShowS
[CaseSensitivity] -> ShowS
CaseSensitivity -> String
(Int -> CaseSensitivity -> ShowS)
-> (CaseSensitivity -> String)
-> ([CaseSensitivity] -> ShowS)
-> Show CaseSensitivity
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CaseSensitivity -> ShowS
showsPrec :: Int -> CaseSensitivity -> ShowS
$cshow :: CaseSensitivity -> String
show :: CaseSensitivity -> String
$cshowList :: [CaseSensitivity] -> ShowS
showList :: [CaseSensitivity] -> ShowS
Show, CaseSensitivity -> CaseSensitivity -> Bool
(CaseSensitivity -> CaseSensitivity -> Bool)
-> (CaseSensitivity -> CaseSensitivity -> Bool)
-> Eq CaseSensitivity
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: CaseSensitivity -> CaseSensitivity -> Bool
== :: CaseSensitivity -> CaseSensitivity -> Bool
$c/= :: CaseSensitivity -> CaseSensitivity -> Bool
/= :: CaseSensitivity -> CaseSensitivity -> Bool
Eq)


null :: (T.TextualMonoid s) => s -> Bool
null :: forall s. TextualMonoid s => s -> Bool
null =
  Bool -> Bool
not (Bool -> Bool) -> (s -> Bool) -> s -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> s -> Bool
forall t. TextualMonoid t => (Char -> Bool) -> t -> Bool
T.any (Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
True)


{-|
Run one-pass algorithm on the given search text.
Returns (rendered, score) if the whole pattern was consumed.
-}
matchOnce
  :: (T.TextualMonoid text)
  => (Char -> Char)
  -- ^ normalisation function
  -> (text, text)
  -- ^ (pre, post)
  -> text
  -- ^ pattern
  -> text
  -- ^ search text
  -> Maybe (text, Int)
  -- ^ (rendered, score)
matchOnce :: forall text.
TextualMonoid text =>
(Char -> Char) -> (text, text) -> text -> text -> Maybe (text, Int)
matchOnce Char -> Char
norm (text
pre, text
post) text
pat text
txt = do
  let
    (Int
tot, Int
_, text
res, text
restPat) =
      ((Int, Int, text, text) -> Char -> (Int, Int, text, text))
-> (Int, Int, text, text) -> text -> (Int, Int, text, text)
forall t a. TextualMonoid t => (a -> Char -> a) -> a -> t -> a
forall a. (a -> Char -> a) -> a -> text -> a
T.foldl_'
        ( \(Int
tot_, Int
cur, text
acc, text
p) Char
c -> case text -> Maybe (Char, text)
forall t. TextualMonoid t => t -> Maybe (Char, t)
T.splitCharacterPrefix text
p of
            Maybe (Char, text)
Nothing -> (Int
tot_, Int
0, text
acc text -> text -> text
forall a. Semigroup a => a -> a -> a
<> Char -> text
forall t. TextualMonoid t => Char -> t
T.singleton Char
c, text
p)
            Just (Char
x, text
xs)
              | Char -> Char
norm Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Char
norm Char
c ->
                  let cur' :: Int
cur' = Int
cur Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
                  in  ( Int
tot_ Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
cur'
                      , Int
cur'
                      , text
acc text -> text -> text
forall a. Semigroup a => a -> a -> a
<> text
pre text -> text -> text
forall a. Semigroup a => a -> a -> a
<> Char -> text
forall t. TextualMonoid t => Char -> t
T.singleton Char
c text -> text -> text
forall a. Semigroup a => a -> a -> a
<> text
post
                      , text
xs
                      )
              | Bool
otherwise -> (Int
tot_, Int
0, text
acc text -> text -> text
forall a. Semigroup a => a -> a -> a
<> Char -> text
forall t. TextualMonoid t => Char -> t
T.singleton Char
c, text
p)
        )
        (Int
0, Int
0, text
forall a. Monoid a => a
mempty, text
pat)
        text
txt

  if text -> Bool
forall s. TextualMonoid s => s -> Bool
null text
restPat then (text, Int) -> Maybe (text, Int)
forall a. a -> Maybe a
Just (text
res, Int
tot) else Maybe (text, Int)
forall a. Maybe a
Nothing


{-|
Returns the rendered output and the
matching score for a pattern and a text.
Two examples are given below:

>>> match HandleCase ("", "") identity "fnt" "infinite"
Just (Fuzzy
  { original = "infinite"
  , rendered = "infinite"
  , score = 3
  })

>>> match IgnoreCase ("<", ">") fst "hsk" ("Haskell", 1995)
Just (Fuzzy
  { original = ("Haskell", 1995)
  , rendered = "<h>a<s><k>ell"
  , score = 5
  })
-}
{-# INLINEABLE match #-}
match
  :: (T.TextualMonoid text)
  => CaseSensitivity
  -- ^ Handle or ignore case of search text
  -> (text, text)
  -- ^ Text to add before and after each match
  -> (value -> text)
  -- ^ Function to extract the text from the container
  -> text
  -- ^ Pattern
  -> value
  -- ^ Value containing the text to search in
  -> Maybe (Fuzzy value text)
  -- ^ Original value, rendered string, and score
match :: forall text value.
TextualMonoid text =>
CaseSensitivity
-> (text, text)
-> (value -> text)
-> text
-> value
-> Maybe (Fuzzy value text)
match CaseSensitivity
caseSen (text, text)
preAndPost value -> text
extract text
pat value
value = do
  let
    norm :: Char -> Char
norm = if CaseSensitivity
caseSen CaseSensitivity -> CaseSensitivity -> Bool
forall a. Eq a => a -> a -> Bool
== CaseSensitivity
HandleCase then Char -> Char
forall a. a -> a
identity else Char -> Char
toLower
    searchText :: text
searchText = value -> text
extract value
value

    -- iterate over every suffix while carrying the already-passed prefix
    go :: text
-> text -> Maybe (Fuzzy value text) -> Maybe (Fuzzy value text)
go text
pref text
txt Maybe (Fuzzy value text)
best =
      case (Char -> Char) -> (text, text) -> text -> text -> Maybe (text, Int)
forall text.
TextualMonoid text =>
(Char -> Char) -> (text, text) -> text -> text -> Maybe (text, Int)
matchOnce Char -> Char
norm (text, text)
preAndPost text
pat text
txt of
        Just (text
rendSub, Int
sc) ->
          let cand :: Fuzzy value text
cand = value -> text -> Int -> Fuzzy value text
forall val prettyText.
val -> prettyText -> Int -> Fuzzy val prettyText
Fuzzy value
value (text
pref text -> text -> text
forall a. Semigroup a => a -> a -> a
<> text
rendSub) Int
sc
              best' :: Maybe (Fuzzy value text)
best' = Fuzzy value text
-> Maybe (Fuzzy value text) -> Maybe (Fuzzy value text)
forall {val} {prettyText}.
Fuzzy val prettyText
-> Maybe (Fuzzy val prettyText) -> Maybe (Fuzzy val prettyText)
chooseBetter Fuzzy value text
cand Maybe (Fuzzy value text)
best
          in  Maybe (Fuzzy value text) -> Maybe (Fuzzy value text)
step Maybe (Fuzzy value text)
best'
        Maybe (text, Int)
Nothing -> Maybe (Fuzzy value text) -> Maybe (Fuzzy value text)
step Maybe (Fuzzy value text)
best
      where
        step :: Maybe (Fuzzy value text) -> Maybe (Fuzzy value text)
step Maybe (Fuzzy value text)
b = case text -> Maybe (Char, text)
forall t. TextualMonoid t => t -> Maybe (Char, t)
T.splitCharacterPrefix text
txt of
          Maybe (Char, text)
Nothing -> Maybe (Fuzzy value text)
b
          Just (Char
c, text
rest') -> text
-> text -> Maybe (Fuzzy value text) -> Maybe (Fuzzy value text)
go (text
pref text -> text -> text
forall a. Semigroup a => a -> a -> a
<> Char -> text
forall t. TextualMonoid t => Char -> t
T.singleton Char
c) text
rest' Maybe (Fuzzy value text)
b

    chooseBetter :: Fuzzy val prettyText
-> Maybe (Fuzzy val prettyText) -> Maybe (Fuzzy val prettyText)
chooseBetter Fuzzy val prettyText
n Maybe (Fuzzy val prettyText)
Nothing = Fuzzy val prettyText -> Maybe (Fuzzy val prettyText)
forall a. a -> Maybe a
Just Fuzzy val prettyText
n
    chooseBetter Fuzzy val prettyText
n (Just Fuzzy val prettyText
o) = if Fuzzy val prettyText -> Int
forall val prettyText. Fuzzy val prettyText -> Int
score Fuzzy val prettyText
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Fuzzy val prettyText -> Int
forall val prettyText. Fuzzy val prettyText -> Int
score Fuzzy val prettyText
o then Fuzzy val prettyText -> Maybe (Fuzzy val prettyText)
forall a. a -> Maybe a
Just Fuzzy val prettyText
n else Fuzzy val prettyText -> Maybe (Fuzzy val prettyText)
forall a. a -> Maybe a
Just Fuzzy val prettyText
o

  text
-> text -> Maybe (Fuzzy value text) -> Maybe (Fuzzy value text)
go text
forall a. Monoid a => a
mempty text
searchText Maybe (Fuzzy value text)
forall a. Maybe a
Nothing


{-|
The function to filter a list of values
by fuzzy search on the text extracted from them.

>>> langs = [("Standard ML", 1990), ("OCaml", 1996), ("Scala", 2003)]
>>> filter "ML" langs ("<", ">") fst IgnoreCase
[ Fuzzy
  { original = ("Standard ML", 1990)
  , rendered = "standard <m><l>"
  , score = 4}
, Fuzzy
  { original = ("OCaml", 1996)
  , rendered = "oca<m><l>"
  , score = 4
  }
]
-}
{-# INLINEABLE filter #-}
filter
  :: (T.TextualMonoid text)
  => CaseSensitivity
  -- ^ Handle or ignore case of search text
  -> (text, text)
  -- ^ Text to add before and after each match
  -> (value -> text)
  -- ^ Function to extract the text from the container
  -> text
  -- ^ Pattern
  -> [value]
  -- ^ List of values containing the text to search in
  -> [Fuzzy value text]
  -- ^ List of results, sorted, highest score first
filter :: forall text value.
TextualMonoid text =>
CaseSensitivity
-> (text, text)
-> (value -> text)
-> text
-> [value]
-> [Fuzzy value text]
filter CaseSensitivity
caseSen (text
pre, text
post) value -> text
extractFunc text
textPattern [value]
texts =
  (Fuzzy value text -> Down Int)
-> [Fuzzy value text] -> [Fuzzy value text]
forall o a. Ord o => (a -> o) -> [a] -> [a]
sortOn
    (Int -> Down Int
forall a. a -> Down a
Down (Int -> Down Int)
-> (Fuzzy value text -> Int) -> Fuzzy value text -> Down Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Fuzzy value text -> Int
forall val prettyText. Fuzzy val prettyText -> Int
score)
    ( (value -> Maybe (Fuzzy value text))
-> [value] -> [Fuzzy value text]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe
        (CaseSensitivity
-> (text, text)
-> (value -> text)
-> text
-> value
-> Maybe (Fuzzy value text)
forall text value.
TextualMonoid text =>
CaseSensitivity
-> (text, text)
-> (value -> text)
-> text
-> value
-> Maybe (Fuzzy value text)
match CaseSensitivity
caseSen (text
pre, text
post) value -> text
extractFunc text
textPattern)
        [value]
texts
    )


{-|
Return all elements of the list that have a fuzzy
match against the pattern. Runs with default settings where
nothing is added around the matches, as case insensitive.

>>> simpleFilter "vm" ["vim", "emacs", "virtual machine"]
["vim","virtual machine"]
-}
{-# INLINEABLE simpleFilter #-}
simpleFilter
  :: (T.TextualMonoid text)
  => text
  -- ^ Pattern to look for.
  -> [text]
  -- ^ List of texts to check.
  -> [text]
  -- ^ The ones that match.
simpleFilter :: forall text. TextualMonoid text => text -> [text] -> [text]
simpleFilter text
textPattern [text]
xs =
  (Fuzzy text text -> text) -> [Fuzzy text text] -> [text]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
map
    Fuzzy text text -> text
forall val prettyText. Fuzzy val prettyText -> val
original
    (CaseSensitivity
-> (text, text)
-> (text -> text)
-> text
-> [text]
-> [Fuzzy text text]
forall text value.
TextualMonoid text =>
CaseSensitivity
-> (text, text)
-> (value -> text)
-> text
-> [value]
-> [Fuzzy value text]
filter CaseSensitivity
IgnoreCase (text
forall a. Monoid a => a
mempty, text
forall a. Monoid a => a
mempty) text -> text
forall a. a -> a
identity text
textPattern [text]
xs)


{-|
Returns false if the pattern and the text do not match at all.
Returns true otherwise.

>>> test "brd" "bread"
True
-}
test :: (T.TextualMonoid text) => text -> text -> Bool
test :: forall text. TextualMonoid text => text -> text -> Bool
test text
textPattern text
text =
  Maybe (Fuzzy text text) -> Bool
forall a. Maybe a -> Bool
isJust (CaseSensitivity
-> (text, text)
-> (text -> text)
-> text
-> text
-> Maybe (Fuzzy text text)
forall text value.
TextualMonoid text =>
CaseSensitivity
-> (text, text)
-> (value -> text)
-> text
-> value
-> Maybe (Fuzzy value text)
match CaseSensitivity
IgnoreCase (text
forall a. Monoid a => a
mempty, text
forall a. Monoid a => a
mempty) text -> text
forall a. a -> a
identity text
textPattern text
text)