{-# OPTIONS_GHC -Wno-missing-export-lists #-} -- | Implementation of fully connected neutral network for classification -- of MNIST digits with sized lists of rank 1 tensors (vectors) -- as the trainable parameters. Sports 2 hidden layers. No mini-batches. -- This is an exotic and fundamentally inefficient way of implementing nns, -- but it's valuable for comparative benchmarking. module MnistFcnnRanked1 where import Prelude import Data.Vector.Generic qualified as V import GHC.TypeLits (KnownNat, Nat) import Data.Array.Nested qualified as Nested import Data.Array.Nested.Ranked.Shape import HordeAd import HordeAd.Core.Ops (tfromListR) import MnistData -- | The differentiable type of all trainable parameters of this nn. type ADFcnnMnist1Parameters (target :: Target) (widthHidden :: Nat) (widthHidden2 :: Nat) r = ( ( ListR widthHidden (target (TKS '[SizeMnistGlyph] r)) , target (TKS '[widthHidden] r) ) , ( ListR widthHidden2 (target (TKS '[widthHidden] Float)) , target (TKS '[widthHidden2] r) ) , ( ListR SizeMnistLabel (target (TKS '[widthHidden2] r)) , target (TKS '[SizeMnistLabel] r) ) ) -- | An ad-hoc matrix multiplication analogue for matrices represented -- as lists of vectors. listMatmul1 :: forall target r w1 w2. (ADReady target, GoodScalar r, KnownNat w1) => target (TKS '[w1] r) -> ListR w2 (target (TKS '[w1] r)) -> target (TKS '[w2] r) {-# INLINE listMatmul1 #-} -- this doesn't want to specialize listMatmul1 x0 weights = tlet x0 $ \x -> let f :: target (TKS '[w1] r) -> target (TKS '[] r) f v = v `sdot0` x in tfromListR knownSTK $ f <$> weights -- | Fully connected neural network for the MNIST digit classification task. -- There are two hidden layers and both use the same activation function. -- The output layer uses a different activation function. -- The widths of the two hidden layers are @widthHidden@ and @widthHidden2@, -- respectively. afcnnMnist1 :: forall target r widthHidden widthHidden2. (ADReady target, GoodScalar r, Differentiable r) => (forall n. KnownNat n => target (TKS '[n] r) -> target (TKS '[n] r)) -> (target (TKS '[SizeMnistLabel] r) -> target (TKS '[SizeMnistLabel] r)) -> SNat widthHidden -> SNat widthHidden2 -> target (TKS '[SizeMnistGlyph] r) -> ADFcnnMnist1Parameters target widthHidden widthHidden2 r -> target (TKR 1 r) afcnnMnist1 factivationHidden factivationOutput SNat SNat datum ((hidden, bias), (hidden2, bias2), (readout, biasr)) = let hiddenLayer1 = listMatmul1 datum hidden + bias nonlinearLayer1 = factivationHidden hiddenLayer1 hiddenLayer2 = scast (listMatmul1 (scast nonlinearLayer1) hidden2) + bias2 nonlinearLayer2 = factivationHidden hiddenLayer2 outputLayer = listMatmul1 nonlinearLayer2 readout + biasr result :: target (TKS '[SizeMnistLabel] r) result = factivationOutput outputLayer in rfromS result -- | The neural network applied to concrete activation functions -- and composed with the appropriate loss function. afcnnMnistLoss1 :: (ADReady target, GoodScalar r, Differentiable r) => SNat widthHidden -> SNat widthHidden2 -> (target (TKR 1 r), target (TKR 1 r)) -> ADFcnnMnist1Parameters target widthHidden widthHidden2 r -> target (TKScalar r) afcnnMnistLoss1 widthHidden widthHidden2 (datum, target) adparams = let result = afcnnMnist1 logisticS softMax1S widthHidden widthHidden2 (sfromR datum) adparams in lossCrossEntropyV target result -- {-# SPECIALIZE afcnnMnistLoss1 :: (GoodScalar r, Differentiable r) => SNat widthHidden -> SNat widthHidden2 -> (AstTensor AstMethodLet FullSpan (TKR 1 r), AstTensor AstMethodLet FullSpan (TKR 1 r)) -> ADFcnnMnist1Parameters (AstTensor AstMethodLet FullSpan) widthHidden widthHidden2 r -> AstTensor AstMethodLet FullSpan (TKScalar r) #-} {-# SPECIALIZE afcnnMnistLoss1 :: SNat widthHidden -> SNat widthHidden2 -> (AstTensor AstMethodLet FullSpan (TKR 1 Double), AstTensor AstMethodLet FullSpan (TKR 1 Double)) -> ADFcnnMnist1Parameters (AstTensor AstMethodLet FullSpan) widthHidden widthHidden2 Double -> AstTensor AstMethodLet FullSpan (TKScalar Double) #-} {-# SPECIALIZE afcnnMnistLoss1 :: SNat widthHidden -> SNat widthHidden2 -> (AstTensor AstMethodLet FullSpan (TKR 1 Float), AstTensor AstMethodLet FullSpan (TKR 1 Float)) -> ADFcnnMnist1Parameters (AstTensor AstMethodLet FullSpan) widthHidden widthHidden2 Float -> AstTensor AstMethodLet FullSpan (TKScalar Float) #-} {-# SPECIALIZE afcnnMnistLoss1 :: SNat widthHidden -> SNat widthHidden2 -> (ADVal Concrete (TKR 1 Double), ADVal Concrete (TKR 1 Double)) -> ADFcnnMnist1Parameters (ADVal Concrete) widthHidden widthHidden2 Double -> ADVal Concrete (TKScalar Double) #-} {-# SPECIALIZE afcnnMnistLoss1 :: SNat widthHidden -> SNat widthHidden2 -> (ADVal Concrete (TKR 1 Float), ADVal Concrete (TKR 1 Float)) -> ADFcnnMnist1Parameters (ADVal Concrete) widthHidden widthHidden2 Float -> ADVal Concrete (TKScalar Float) #-} -- | A function testing the neural network given testing set of inputs -- and the trained parameters. afcnnMnistTest1 :: forall target widthHidden widthHidden2 r. (target ~ Concrete, GoodScalar r, Differentiable r) => SNat widthHidden -> SNat widthHidden2 -> [MnistDataLinearR r] -> ADFcnnMnist1Parameters target widthHidden widthHidden2 r -> r afcnnMnistTest1 _ _ [] _ = 0 afcnnMnistTest1 widthHidden widthHidden2 dataList testParams = let matchesLabels :: MnistDataLinearR r -> Bool matchesLabels (glyph, label) = let glyph1 = rconcrete glyph nn :: ADFcnnMnist1Parameters target widthHidden widthHidden2 r -> target (TKR 1 r) nn = afcnnMnist1 logisticS softMax1S widthHidden widthHidden2 (sfromR glyph1) v = rtoVector $ nn testParams in V.maxIndex v == V.maxIndex (Nested.rtoVector label) in fromIntegral (length (filter matchesLabels dataList)) / fromIntegral (length dataList)