-----------------------------------------------------------------------
--
--  Haskell: The Craft of Functional Programming, 3e
--  Simon Thompson
--  (c) Addison-Wesley, 1996-2011.
-- 
--  Index
--
-----------------------------------------------------------------------



module Index where

import Prelude hiding (Word)
import Chapter11 ((>.>))
import qualified Chapter7



-- Example: creating an index
-- ^^^^^^^^^^^^^^^^^^^^^^^^^^

-- The basic type symonyms

type Doc  = String
type Line = String
type Word = String

-- The type of the top-level function

makeIndex :: Doc -> [ ([Int],Word) ]

-- The top-level definition

makeIndex :: Word -> [([Int], Word)]
makeIndex
  = Word -> [Word]
lines       (Word -> [Word])
-> ([Word] -> [(Int, Word)]) -> Word -> [(Int, Word)]
forall a b c. (a -> b) -> (b -> c) -> a -> c
>.>     --   Doc            -> [Line]
    [Word] -> [(Int, Word)]
numLines    (Word -> [(Int, Word)])
-> ([(Int, Word)] -> [(Int, Word)]) -> Word -> [(Int, Word)]
forall a b c. (a -> b) -> (b -> c) -> a -> c
>.>     --   [Line]         -> [(Int,Line)] 
    [(Int, Word)] -> [(Int, Word)]
allNumWords (Word -> [(Int, Word)])
-> ([(Int, Word)] -> [(Int, Word)]) -> Word -> [(Int, Word)]
forall a b c. (a -> b) -> (b -> c) -> a -> c
>.>     --   [(Int,Line)]   -> [(Int,Word)]
    [(Int, Word)] -> [(Int, Word)]
sortLs      (Word -> [(Int, Word)])
-> ([(Int, Word)] -> [([Int], Word)]) -> Word -> [([Int], Word)]
forall a b c. (a -> b) -> (b -> c) -> a -> c
>.>     --   [(Int,Word)]   -> [(Int,Word)]
    [(Int, Word)] -> [([Int], Word)]
makeLists   (Word -> [([Int], Word)])
-> ([([Int], Word)] -> [([Int], Word)]) -> Word -> [([Int], Word)]
forall a b c. (a -> b) -> (b -> c) -> a -> c
>.>     --   [(Int,Word)]   -> [([Int],Word)]
    [([Int], Word)] -> [([Int], Word)]
amalgamate  (Word -> [([Int], Word)])
-> ([([Int], Word)] -> [([Int], Word)]) -> Word -> [([Int], Word)]
forall a b c. (a -> b) -> (b -> c) -> a -> c
>.>     --   [([Int],Word)] -> [([Int],Word)]
    [([Int], Word)] -> [([Int], Word)]
shorten             --   [([Int],Word)] -> [([Int],Word)]

-- Implementing the component functions
-- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-- Attach a number to each line.

numLines :: [Line] -> [ ( Int , Line ) ]
numLines :: [Word] -> [(Int, Word)]
numLines [Word]
linels
  = [Int] -> [Word] -> [(Int, Word)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1 .. [Word] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Word]
linels] [Word]
linels

-- Associate each word with a line number

numWords :: ( Int , Line ) -> [ ( Int , Word ) ]

numWords :: (Int, Word) -> [(Int, Word)]
numWords (Int
number , Word
line)
  = [ (Int
number , Word
word) | Word
word <- Word -> [Word]
Chapter7.splitWords Word
line ]

-- The definition uses splitWords from Chapter 7, modified to use a different
-- version of whitespace. For this to take effect, need to make the modification
-- in the Chapter7.hs file.

whitespace :: String
whitespace :: Word
whitespace = Word
" \n\t;:.,\'\"!?()-"

-- Apply numWords to each integer,line pair.

allNumWords :: [ ( Int , Line ) ] -> [ ( Int , Word ) ]
allNumWords :: [(Int, Word)] -> [(Int, Word)]
allNumWords = [[(Int, Word)]] -> [(Int, Word)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[(Int, Word)]] -> [(Int, Word)])
-> ([(Int, Word)] -> [[(Int, Word)]])
-> [(Int, Word)]
-> [(Int, Word)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Int, Word) -> [(Int, Word)]) -> [(Int, Word)] -> [[(Int, Word)]]
forall a b. (a -> b) -> [a] -> [b]
map (Int, Word) -> [(Int, Word)]
numWords

-- The list must next be
-- sorted by word order, and lists of lines on which a word appears be built.
-- The ordering relation on pairs of numbers and 
-- words is given by

orderPair :: ( Int , Word ) -> ( Int , Word ) -> Bool
orderPair :: (Int, Word) -> (Int, Word) -> Bool
orderPair ( Int
n1 , Word
w1 ) ( Int
n2 , Word
w2 )
  = Word
w1 Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
w2 Bool -> Bool -> Bool
|| ( Word
w1 Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
w2 Bool -> Bool -> Bool
&& Int
n1 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n2 )

-- Sorting the list using the orderPair ordering on pairs.

sortLs :: [ ( Int , Word ) ] -> [ ( Int , Word ) ]

sortLs :: [(Int, Word)] -> [(Int, Word)]
sortLs []     = []
sortLs ((Int, Word)
p:[(Int, Word)]
ps)
  = [(Int, Word)] -> [(Int, Word)]
sortLs [(Int, Word)]
smaller [(Int, Word)] -> [(Int, Word)] -> [(Int, Word)]
forall a. [a] -> [a] -> [a]
++ [(Int, Word)
p] [(Int, Word)] -> [(Int, Word)] -> [(Int, Word)]
forall a. [a] -> [a] -> [a]
++ [(Int, Word)] -> [(Int, Word)]
sortLs [(Int, Word)]
larger
    where
    smaller :: [(Int, Word)]
smaller = [ (Int, Word)
q | (Int, Word)
q<-[(Int, Word)]
ps , (Int, Word) -> (Int, Word) -> Bool
orderPair (Int, Word)
q (Int, Word)
p ]
    larger :: [(Int, Word)]
larger  = [ (Int, Word)
q | (Int, Word)
q<-[(Int, Word)]
ps , (Int, Word) -> (Int, Word) -> Bool
orderPair (Int, Word)
p (Int, Word)
q ]

-- The entries for the same word need to be accumulated together.
-- First each entry is converted to having a list of line numbers associated with
-- it, thus

makeLists ::  [ (Int,Word) ] -> [ ([Int],Word) ]
makeLists :: [(Int, Word)] -> [([Int], Word)]
makeLists 
  = ((Int, Word) -> ([Int], Word)) -> [(Int, Word)] -> [([Int], Word)]
forall a b. (a -> b) -> [a] -> [b]
map (Int, Word) -> ([Int], Word)
forall {a} {b}. (a, b) -> ([a], b)
mklis 
    where
    mklis :: (a, b) -> ([a], b)
mklis ( a
n , b
st ) = ( [a
n] , b
st )

-- After this, the lists associated with the same words are amalgamated.

amalgamate :: [ ([Int],Word) ] -> [ ([Int],Word) ]

amalgamate :: [([Int], Word)] -> [([Int], Word)]
amalgamate [] = []
amalgamate [([Int], Word)
p] = [([Int], Word)
p]
amalgamate (([Int]
l1,Word
w1):([Int]
l2,Word
w2):[([Int], Word)]
rest)
  | Word
w1 Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
/= Word
w2    = ([Int]
l1,Word
w1) ([Int], Word) -> [([Int], Word)] -> [([Int], Word)]
forall a. a -> [a] -> [a]
: [([Int], Word)] -> [([Int], Word)]
amalgamate (([Int]
l2,Word
w2)([Int], Word) -> [([Int], Word)] -> [([Int], Word)]
forall a. a -> [a] -> [a]
:[([Int], Word)]
rest)
  | Bool
otherwise   = [([Int], Word)] -> [([Int], Word)]
amalgamate (([Int]
l1[Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++[Int]
l2,Word
w1)([Int], Word) -> [([Int], Word)] -> [([Int], Word)]
forall a. a -> [a] -> [a]
:[([Int], Word)]
rest)

-- Remove all the short words.

shorten :: [([Int],Word)] -> [([Int],Word)]

shorten :: [([Int], Word)] -> [([Int], Word)]
shorten 
  = (([Int], Word) -> Bool) -> [([Int], Word)] -> [([Int], Word)]
forall a. (a -> Bool) -> [a] -> [a]
filter ([Int], Word) -> Bool
forall {t :: * -> *} {a} {a}. Foldable t => (a, t a) -> Bool
sizer 
    where
    sizer :: (a, t a) -> Bool
sizer (a
nl,t a
wd) = t a -> Int
forall a. t a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length t a
wd Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
3