{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# OPTIONS_GHC -threaded -rtsopts #-}
{-# OPTIONS_HADDOCK show-extensions #-}

{- |
Module      :  Aftovolio.General.Parsing
Copyright   :  (c) Oleksandr Zhabenko 2021-2024
License     :  MIT
Stability   :  Experimental
Maintainer  :  oleksandr.zhabenko@yahoo.com

The additional parsing library functions for the AFTOVolio both old and new variants.
Is taken from the Phonetic.Languages.Parsing module from the
@phonetic-languages-simplified-examples-array@ package to reduce dependencies in general case.
-}
module Aftovolio.General.Parsing (
    -- * Predicates
    isClosingCurlyBracket,
    isSlash,
    isOpeningCurlyBracket,
    variations,

    -- * Transformations
    breakGroupOfStrings,
    breakInSlashes,
    combineVariants,
    combineHeadsWithNexts,
    transformToVariations,

    -- * Files processment for specifications
    readLangSpecs,
    innerProcessmentSimple,
    argsProcessment,
) where

import Aftovolio.General.Base
import Aftovolio.General.PrepareText
import Aftovolio.General.SpecificationsRead
import Aftovolio.General.Syllables
import Data.List (lines, sort, unwords)
import Data.Maybe (fromMaybe)
import GHC.Arr
import GHC.Base
import GHC.List
import System.Environment (getArgs)
import System.IO (FilePath, readFile)
import Text.Read (read, readMaybe)

isClosingCurlyBracket :: String -> Bool
isClosingCurlyBracket :: String -> Bool
isClosingCurlyBracket = (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"}")
{-# INLINE isClosingCurlyBracket #-}

isSlash :: String -> Bool
isSlash :: String -> Bool
isSlash (Char
x : String
xs)
    | Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'/' = Bool
False
    | String -> Bool
forall a. [a] -> Bool
null String
xs = Bool
True
    | Bool
otherwise = Bool
False
isSlash String
_ = Bool
False
{-# INLINE isSlash #-}

isOpeningCurlyBracket :: String -> Bool
isOpeningCurlyBracket :: String -> Bool
isOpeningCurlyBracket = (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"{")
{-# INLINE isOpeningCurlyBracket #-}

breakGroupOfStrings :: [String] -> (([String], [[String]]), [String])
breakGroupOfStrings :: [String] -> (([String], [[String]]), [String])
breakGroupOfStrings ![String]
xss = (([String]
tss, [String] -> [[String]] -> [[String]]
breakInSlashes [String]
uss []), Int -> [String] -> [String]
forall a. Int -> [a] -> [a]
drop Int
1 [String]
zss)
  where
    (![String]
yss, ![String]
zss) = (String -> Bool) -> [String] -> ([String], [String])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break String -> Bool
isClosingCurlyBracket [String]
xss
    (![String]
tss, ![String]
uss) = (\([String]
t1, [String]
t2) -> ([String]
t1, Int -> [String] -> [String]
forall a. Int -> [a] -> [a]
drop Int
1 [String]
t2)) (([String], [String]) -> ([String], [String]))
-> ([String] -> ([String], [String]))
-> [String]
-> ([String], [String])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String -> Bool) -> [String] -> ([String], [String])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break String -> Bool
isOpeningCurlyBracket ([String] -> ([String], [String]))
-> [String] -> ([String], [String])
forall a b. (a -> b) -> a -> b
$ [String]
yss
{-# INLINE breakGroupOfStrings #-}

breakInSlashes :: [String] -> [[String]] -> [[String]]
breakInSlashes :: [String] -> [[String]] -> [[String]]
breakInSlashes ![String]
wss ![[String]]
usss
    | [String] -> Bool
forall a. [a] -> Bool
null [String]
lss = [String]
kss [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: [[String]]
usss
    | Bool
otherwise = [String] -> [[String]] -> [[String]]
breakInSlashes (Int -> [String] -> [String]
forall a. Int -> [a] -> [a]
drop Int
1 [String]
lss) ([String]
kss [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: [[String]]
usss)
  where
    (![String]
kss, ![String]
lss) = (String -> Bool) -> [String] -> ([String], [String])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break String -> Bool
isSlash [String]
wss

combineVariants :: ([String], [[String]]) -> [[String]]
combineVariants :: ([String], [[String]]) -> [[String]]
combineVariants (![String]
xss, (![String]
yss : [[String]]
ysss)) = ([String]
xss [String] -> [String] -> [String]
forall a. Monoid a => a -> a -> a
`mappend` [String]
yss) [String] -> [[String]] -> [[String]]
forall a. a -> [a] -> [a]
: ([String], [[String]]) -> [[String]]
combineVariants ([String]
xss, [[String]]
ysss)
combineVariants ([String], [[String]])
_ = []

combineHeadsWithNexts :: [[String]] -> [String] -> [[String]]
combineHeadsWithNexts :: [[String]] -> [String] -> [[String]]
combineHeadsWithNexts ![[String]]
xsss ![String]
yss
    | [String] -> Bool
forall a. [a] -> Bool
null [String]
yss = [[String]]
xsss
    | Bool
otherwise =
        [[String]] -> [String] -> [[String]]
combineHeadsWithNexts [[String]
xss [String] -> [String] -> [String]
forall a. Monoid a => a -> a -> a
`mappend` [String]
zss | [String]
xss <- [[String]]
xsss, [String]
zss <- [[String]]
zsss] [String]
uss
  where
    (!([String], [[String]])
t, ![String]
uss) = [String] -> (([String], [[String]]), [String])
breakGroupOfStrings [String]
yss
    !zsss :: [[String]]
zsss = ([String], [[String]]) -> [[String]]
combineVariants ([String], [[String]])
t

transformToVariations :: [String] -> [[String]]
transformToVariations :: [String] -> [[String]]
transformToVariations ![String]
yss
    | [String] -> Bool
forall a. [a] -> Bool
null [String]
yss = []
    | Bool
otherwise = [[String]] -> [String] -> [[String]]
combineHeadsWithNexts [[String]]
xsss [String]
tss
  where
    (!([String], [[String]])
y, ![String]
tss) = [String] -> (([String], [[String]]), [String])
breakGroupOfStrings [String]
yss
    !xsss :: [[String]]
xsss = ([String], [[String]]) -> [[String]]
combineVariants ([String], [[String]])
y
{-# INLINE transformToVariations #-}

variations :: [String] -> Bool
variations :: [String] -> Bool
variations [String]
xss =
    (String -> Bool) -> [String] -> Bool
forall a. (a -> Bool) -> [a] -> Bool
any String -> Bool
isSlash [String]
xss
        Bool -> Bool -> Bool
&& (String -> Bool) -> [String] -> Bool
forall a. (a -> Bool) -> [a] -> Bool
any String -> Bool
isOpeningCurlyBracket [String]
xss
        Bool -> Bool -> Bool
&& (String -> Bool) -> [String] -> Bool
forall a. (a -> Bool) -> [a] -> Bool
any String -> Bool
isClosingCurlyBracket [String]
xss
{-# INLINE variations #-}

innerProcessmentSimple ::
    -- | Must be a valid 'GWritingSystemPRPLX' specifications 'String' representation only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
    String ->
    -- | Must be a 'String' with the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
    String ->
    -- | Must be a 'String' with the 'SegmentRulesG' specifications only;
    String ->
    -- | Must be a 'String' with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
    String ->
    -- | Must be a 'String' with the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
    String ->
    ( GWritingSystemPRPLX
    , [(Char, Char)]
    , CharPhoneticClassification
    , SegmentRulesG
    , String
    , String
    , Concatenations
    , Concatenations
    , String
    )
innerProcessmentSimple :: String
-> String
-> String
-> String
-> String
-> (GWritingSystemPRPLX, [(Char, Char)],
    CharPhoneticClassification, SegmentRulesG, String, String,
    [[String]], [[String]], String)
innerProcessmentSimple String
gwrsCnts String
controlConts String
segmentData String
concatenationsFileP String
concatenationsFileA =
    let [[String]
allophonesGs, [String]
charClfs, [String]
jss, [String]
vss, [String]
wss] = Char -> [String] -> [[String]]
groupBetweenChars Char
'~' ([String] -> [[String]])
-> (String -> [String]) -> String -> [[String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [String]
lines (String -> [[String]]) -> String -> [[String]]
forall a b. (a -> b) -> a -> b
$ String
controlConts
        wrs :: GWritingSystemPRPLX
wrs = Char -> String -> GWritingSystemPRPLX
getGWritingSystem Char
'~' String
gwrsCnts
        ks :: [(Char, Char)]
ks =
            [(Char, Char)] -> [(Char, Char)]
forall a. Ord a => [a] -> [a]
sort ([(Char, Char)] -> [(Char, Char)])
-> (Maybe [(Char, Char)] -> [(Char, Char)])
-> Maybe [(Char, Char)]
-> [(Char, Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Char, Char)] -> Maybe [(Char, Char)] -> [(Char, Char)]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [(Char, Char)] -> [(Char, Char)])
-> Maybe [(Char, Char)] -> [(Char, Char)]
forall a b. (a -> b) -> a -> b
$ (String -> Maybe [(Char, Char)]
forall a. Read a => String -> Maybe a
readMaybe ([String] -> String
unwords [String]
allophonesGs) :: Maybe [(Char, Char)])
        arr :: CharPhoneticClassification
arr = String -> CharPhoneticClassification
forall a. Read a => String -> a
read ([String] -> String
unwords [String]
charClfs) :: Array Int PRS -- The 'Array' must be previously sorted in the ascending order.
        gs :: SegmentRulesG
gs = String -> SegmentRulesG
forall a. Read a => String -> a
read String
segmentData :: SegmentRulesG
        ysss :: [[String]]
ysss =
            [[String]] -> [[String]]
sort2Concat ([[String]] -> [[String]])
-> (Maybe [[String]] -> [[String]])
-> Maybe [[String]]
-> [[String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[String]] -> Maybe [[String]] -> [[String]]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [[String]] -> [[String]]) -> Maybe [[String]] -> [[String]]
forall a b. (a -> b) -> a -> b
$ (String -> Maybe [[String]]
forall a. Read a => String -> Maybe a
readMaybe String
concatenationsFileP :: Maybe [[String]])
        zzzsss :: [[String]]
zzzsss =
            [[String]] -> [[String]]
sort2Concat ([[String]] -> [[String]])
-> (Maybe [[String]] -> [[String]])
-> Maybe [[String]]
-> [[String]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[String]] -> Maybe [[String]] -> [[String]]
forall a. a -> Maybe a -> a
fromMaybe [] (Maybe [[String]] -> [[String]]) -> Maybe [[String]] -> [[String]]
forall a b. (a -> b) -> a -> b
$ (String -> Maybe [[String]]
forall a. Read a => String -> Maybe a
readMaybe String
concatenationsFileA :: Maybe [[String]])
        js :: String
js = [String] -> String
forall a. [[a]] -> [a]
concat [String]
jss
        vs :: String
vs = [String] -> String
forall a. [[a]] -> [a]
concat [String]
vss
        ws :: String
ws = String -> String
forall a. Ord a => [a] -> [a]
sort (String -> String) -> ([String] -> String) -> [String] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [String] -> String
forall a. [[a]] -> [a]
concat ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ [String]
wss
     in (GWritingSystemPRPLX
wrs, [(Char, Char)]
ks, CharPhoneticClassification
arr, SegmentRulesG
gs, String
js, String
vs, [[String]]
ysss, [[String]]
zzzsss, String
ws)
{-# INLINE innerProcessmentSimple #-}

argsProcessment ::
    -- | With the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
    FilePath ->
    -- | With the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
    FilePath ->
    -- | With the 'SegmentRulesG' specifications only;
    FilePath ->
    -- | With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
    FilePath ->
    -- | With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
    FilePath ->
    IO [String]
argsProcessment :: String -> String -> String -> String -> String -> IO [String]
argsProcessment String
fileGWrSys String
controlFile String
segmentRulesFile String
concatenationsFileP String
concatenationsFileA =
    (String -> IO String) -> [String] -> IO [String]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> [a] -> m [b]
mapM
        String -> IO String
readFile
        [ String
controlFile
        , String
fileGWrSys
        , String
segmentRulesFile
        , String
concatenationsFileP
        , String
concatenationsFileA
        ]
{-# INLINE argsProcessment #-}

{- | The function that is mostly intended to be used by the end user. Reads the specifications from
the5 given files and returns the data that can be used further for generalized AFTOVolio.
-}
readLangSpecs ::
    -- | With the 'GWritingSystemPRPLX' specifications only (see the gwrsysExample.txt file in the @phonetic-languages-phonetics-basics@ package as a schema);
    FilePath ->
    -- | With the 5 meaningful lines that are delimited with the \'~\' line one from another with the specifications for the possible allophones (if any), 'CharPhoneticClassification', white spaces information (two 'String's) and the 'String' of all the possible 'PLL' 'Char's;
    FilePath ->
    -- | With the 'SegmentRulesG' specifications only;
    FilePath ->
    -- | With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be prepended to the next word.
    FilePath ->
    -- | With the 'Concatenations' specifications only (see the data in the EnglishConcatenated.txt file in the @phonetic-languages-phonetics-basics@ package as a list of English equivalents of the needed 'String's). These are to be appended to the previous word.
    FilePath ->
    IO
        ( GWritingSystemPRPLX
        , [(Char, Char)]
        , CharPhoneticClassification
        , SegmentRulesG
        , String
        , String
        , Concatenations
        , Concatenations
        , String
        )
readLangSpecs :: String
-> String
-> String
-> String
-> String
-> IO
     (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
      SegmentRulesG, String, String, [[String]], [[String]], String)
readLangSpecs String
fileGWrSys String
controlFile String
segmentRulesFile String
concatenationsFileP String
concatenationsFileA =
    String -> String -> String -> String -> String -> IO [String]
argsProcessment
        String
fileGWrSys
        String
controlFile
        String
segmentRulesFile
        String
concatenationsFileP
        String
concatenationsFileA IO [String]
-> ([String]
    -> IO
         (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
          SegmentRulesG, String, String, [[String]], [[String]], String))
-> IO
     (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
      SegmentRulesG, String, String, [[String]], [[String]], String)
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \[String]
xss ->
        let [ String
controlConts
                , String
gwrsCnts
                , String
segmentData
                , String
concatenationsFileP1
                , String
concatenationsFileA1
                ] = [String]
xss
         in (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
 SegmentRulesG, String, String, [[String]], [[String]], String)
-> IO
     (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
      SegmentRulesG, String, String, [[String]], [[String]], String)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
  SegmentRulesG, String, String, [[String]], [[String]], String)
 -> IO
      (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
       SegmentRulesG, String, String, [[String]], [[String]], String))
-> (GWritingSystemPRPLX, [(Char, Char)],
    CharPhoneticClassification, SegmentRulesG, String, String,
    [[String]], [[String]], String)
-> IO
     (GWritingSystemPRPLX, [(Char, Char)], CharPhoneticClassification,
      SegmentRulesG, String, String, [[String]], [[String]], String)
forall a b. (a -> b) -> a -> b
$
                String
-> String
-> String
-> String
-> String
-> (GWritingSystemPRPLX, [(Char, Char)],
    CharPhoneticClassification, SegmentRulesG, String, String,
    [[String]], [[String]], String)
innerProcessmentSimple
                    String
gwrsCnts
                    String
controlConts
                    String
segmentData
                    String
concatenationsFileP1
                    String
concatenationsFileA1
{-# INLINE readLangSpecs #-}