{-# LANGUAGE TypeFamilies #-}
module Futhark.CodeGen.ImpGen.GPU
( compileProgOpenCL,
compileProgCUDA,
compileProgHIP,
Warnings,
)
where
import Control.Monad
import Data.List qualified as L
import Data.Map qualified as M
import Data.Maybe
import Futhark.CodeGen.ImpCode.GPU qualified as Imp
import Futhark.CodeGen.ImpGen hiding (compileProg)
import Futhark.CodeGen.ImpGen qualified
import Futhark.CodeGen.ImpGen.GPU.Base
import Futhark.CodeGen.ImpGen.GPU.SegHist
import Futhark.CodeGen.ImpGen.GPU.SegMap
import Futhark.CodeGen.ImpGen.GPU.SegRed
import Futhark.CodeGen.ImpGen.GPU.SegScan
import Futhark.Error
import Futhark.IR.GPUMem
import Futhark.MonadFreshNames
import Futhark.Util.IntegralExp (divUp, nextMul)
import Prelude hiding (quot, rem)
callKernelOperations :: Operations GPUMem HostEnv Imp.HostOp
callKernelOperations :: Operations GPUMem HostEnv HostOp
callKernelOperations =
Operations
{ opsExpCompiler :: ExpCompiler GPUMem HostEnv HostOp
opsExpCompiler = ExpCompiler GPUMem HostEnv HostOp
expCompiler,
opsCopyCompiler :: CopyCompiler GPUMem HostEnv HostOp
opsCopyCompiler = CopyCompiler GPUMem HostEnv HostOp
forall rep r op. CopyCompiler rep r op
lmadCopy,
opsOpCompiler :: OpCompiler GPUMem HostEnv HostOp
opsOpCompiler = OpCompiler GPUMem HostEnv HostOp
Pat LetDecMem -> Op GPUMem -> CallKernelGen ()
opCompiler,
opsStmsCompiler :: StmsCompiler GPUMem HostEnv HostOp
opsStmsCompiler = StmsCompiler GPUMem HostEnv HostOp
forall rep (inner :: * -> *) op r.
(Mem rep inner, FreeIn op) =>
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
defCompileStms,
opsAllocCompilers :: Map Space (AllocCompiler GPUMem HostEnv HostOp)
opsAllocCompilers = Map Space (AllocCompiler GPUMem HostEnv HostOp)
forall a. Monoid a => a
mempty
}
openclAtomics, cudaAtomics :: AtomicBinOp
(AtomicBinOp
openclAtomics, AtomicBinOp
cudaAtomics) = ((BinOp
-> [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
-> Maybe
(VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp))
-> [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
-> AtomicBinOp
forall a b c. (a -> b -> c) -> b -> a -> c
flip BinOp
-> [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
-> Maybe
(VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
opencl, (BinOp
-> [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
-> Maybe
(VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp))
-> [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
-> AtomicBinOp
forall a b c. (a -> b -> c) -> b -> a -> c
flip BinOp
-> [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
-> Maybe
(VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
cuda)
where
opencl64 :: [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
opencl64 =
[ (IntType -> Overflow -> BinOp
Add IntType
Int64 Overflow
OverflowUndef, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicAdd IntType
Int64),
(FloatType -> BinOp
FAdd FloatType
Float64, FloatType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicFAdd FloatType
Float64),
(IntType -> BinOp
SMax IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicSMax IntType
Int64),
(IntType -> BinOp
SMin IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicSMin IntType
Int64),
(IntType -> BinOp
UMax IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicUMax IntType
Int64),
(IntType -> BinOp
UMin IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicUMin IntType
Int64),
(IntType -> BinOp
And IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicAnd IntType
Int64),
(IntType -> BinOp
Or IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicOr IntType
Int64),
(IntType -> BinOp
Xor IntType
Int64, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicXor IntType
Int64)
]
opencl32 :: [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
opencl32 =
[ (IntType -> Overflow -> BinOp
Add IntType
Int32 Overflow
OverflowUndef, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicAdd IntType
Int32),
(FloatType -> BinOp
FAdd FloatType
Float32, FloatType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicFAdd FloatType
Float32),
(IntType -> BinOp
SMax IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicSMax IntType
Int32),
(IntType -> BinOp
SMin IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicSMin IntType
Int32),
(IntType -> BinOp
UMax IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicUMax IntType
Int32),
(IntType -> BinOp
UMin IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicUMin IntType
Int32),
(IntType -> BinOp
And IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicAnd IntType
Int32),
(IntType -> BinOp
Or IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicOr IntType
Int32),
(IntType -> BinOp
Xor IntType
Int32, IntType
-> VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp
Imp.AtomicXor IntType
Int32)
]
opencl :: [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
opencl = [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
opencl32 [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
-> [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
-> [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
forall a. [a] -> [a] -> [a]
++ [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
opencl64
cuda :: [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
cuda = [(BinOp,
VName -> VName -> Count Elements (TExp Int64) -> Exp -> AtomicOp)]
opencl
compileProg ::
(MonadFreshNames m) =>
HostEnv ->
Prog GPUMem ->
m (Warnings, Imp.Program)
compileProg :: forall (m :: * -> *).
MonadFreshNames m =>
HostEnv -> Prog GPUMem -> m (Warnings, Program)
compileProg HostEnv
env =
HostEnv
-> Operations GPUMem HostEnv HostOp
-> Space
-> Prog GPUMem
-> m (Warnings, Program)
forall rep (inner :: * -> *) op (m :: * -> *) r.
(Mem rep inner, FreeIn op, MonadFreshNames m) =>
r
-> Operations rep r op
-> Space
-> Prog rep
-> m (Warnings, Definitions op)
Futhark.CodeGen.ImpGen.compileProg HostEnv
env Operations GPUMem HostEnv HostOp
callKernelOperations Space
device_space
where
device_space :: Space
device_space = SpaceId -> Space
Imp.Space SpaceId
"device"
compileProgOpenCL,
compileProgCUDA,
compileProgHIP ::
(MonadFreshNames m) => Prog GPUMem -> m (Warnings, Imp.Program)
compileProgOpenCL :: forall (m :: * -> *).
MonadFreshNames m =>
Prog GPUMem -> m (Warnings, Program)
compileProgOpenCL = HostEnv -> Prog GPUMem -> m (Warnings, Program)
forall (m :: * -> *).
MonadFreshNames m =>
HostEnv -> Prog GPUMem -> m (Warnings, Program)
compileProg (HostEnv -> Prog GPUMem -> m (Warnings, Program))
-> HostEnv -> Prog GPUMem -> m (Warnings, Program)
forall a b. (a -> b) -> a -> b
$ AtomicBinOp -> Target -> Map VName Locks -> HostEnv
HostEnv AtomicBinOp
openclAtomics Target
OpenCL Map VName Locks
forall a. Monoid a => a
mempty
compileProgCUDA :: forall (m :: * -> *).
MonadFreshNames m =>
Prog GPUMem -> m (Warnings, Program)
compileProgCUDA = HostEnv -> Prog GPUMem -> m (Warnings, Program)
forall (m :: * -> *).
MonadFreshNames m =>
HostEnv -> Prog GPUMem -> m (Warnings, Program)
compileProg (HostEnv -> Prog GPUMem -> m (Warnings, Program))
-> HostEnv -> Prog GPUMem -> m (Warnings, Program)
forall a b. (a -> b) -> a -> b
$ AtomicBinOp -> Target -> Map VName Locks -> HostEnv
HostEnv AtomicBinOp
cudaAtomics Target
CUDA Map VName Locks
forall a. Monoid a => a
mempty
compileProgHIP :: forall (m :: * -> *).
MonadFreshNames m =>
Prog GPUMem -> m (Warnings, Program)
compileProgHIP = HostEnv -> Prog GPUMem -> m (Warnings, Program)
forall (m :: * -> *).
MonadFreshNames m =>
HostEnv -> Prog GPUMem -> m (Warnings, Program)
compileProg (HostEnv -> Prog GPUMem -> m (Warnings, Program))
-> HostEnv -> Prog GPUMem -> m (Warnings, Program)
forall a b. (a -> b) -> a -> b
$ AtomicBinOp -> Target -> Map VName Locks -> HostEnv
HostEnv AtomicBinOp
cudaAtomics Target
HIP Map VName Locks
forall a. Monoid a => a
mempty
opCompiler ::
Pat LetDecMem ->
Op GPUMem ->
CallKernelGen ()
opCompiler :: Pat LetDecMem -> Op GPUMem -> CallKernelGen ()
opCompiler Pat LetDecMem
dest (Alloc SubExp
e Space
space) =
Pat (LetDec GPUMem) -> SubExp -> Space -> CallKernelGen ()
forall rep (inner :: * -> *) r op.
Mem rep inner =>
Pat (LetDec rep) -> SubExp -> Space -> ImpM rep r op ()
compileAlloc Pat (LetDec GPUMem)
Pat LetDecMem
dest SubExp
e Space
space
opCompiler (Pat [PatElem LetDecMem
pe]) (Inner (SizeOp (GetSize Name
key SizeClass
size_class))) = do
Maybe Name
fname <- ImpM GPUMem HostEnv HostOp (Maybe Name)
forall rep r op. ImpM rep r op (Maybe Name)
askFunction
HostOp -> CallKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (HostOp -> CallKernelGen ()) -> HostOp -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName -> Name -> SizeClass -> HostOp
Imp.GetSize (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LetDecMem
pe) (Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname Name
key) (SizeClass -> HostOp) -> SizeClass -> HostOp
forall a b. (a -> b) -> a -> b
$
Maybe Name -> SizeClass -> SizeClass
sizeClassWithEntryPoint Maybe Name
fname SizeClass
size_class
opCompiler (Pat [PatElem LetDecMem
pe]) (Inner (SizeOp (CmpSizeLe Name
key SizeClass
size_class SubExp
x))) = do
Maybe Name
fname <- ImpM GPUMem HostEnv HostOp (Maybe Name)
forall rep r op. ImpM rep r op (Maybe Name)
askFunction
let size_class' :: SizeClass
size_class' = Maybe Name -> SizeClass -> SizeClass
sizeClassWithEntryPoint Maybe Name
fname SizeClass
size_class
HostOp -> CallKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (HostOp -> CallKernelGen ())
-> (Exp -> HostOp) -> Exp -> CallKernelGen ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. VName -> Name -> SizeClass -> Exp -> HostOp
Imp.CmpSizeLe (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LetDecMem
pe) (Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname Name
key) SizeClass
size_class'
(Exp -> CallKernelGen ())
-> ImpM GPUMem HostEnv HostOp Exp -> CallKernelGen ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< SubExp -> ImpM GPUMem HostEnv HostOp Exp
forall a rep r op. ToExp a => a -> ImpM rep r op Exp
forall rep r op. SubExp -> ImpM rep r op Exp
toExp SubExp
x
opCompiler (Pat [PatElem LetDecMem
pe]) (Inner (SizeOp (GetSizeMax SizeClass
size_class))) =
HostOp -> CallKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (HostOp -> CallKernelGen ()) -> HostOp -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$ VName -> SizeClass -> HostOp
Imp.GetSizeMax (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LetDecMem
pe) SizeClass
size_class
opCompiler (Pat [PatElem LetDecMem
pe]) (Inner (SizeOp (CalcNumBlocks SubExp
w64 Name
max_num_tblocks_key SubExp
tblock_size))) = do
Maybe Name
fname <- ImpM GPUMem HostEnv HostOp (Maybe Name)
forall rep r op. ImpM rep r op (Maybe Name)
askFunction
TV Int64
max_num_tblocks :: TV Int64 <- SpaceId -> ImpM GPUMem HostEnv HostOp (TV Int64)
forall {k} (t :: k) rep r op.
MkTV t =>
SpaceId -> ImpM rep r op (TV t)
dPrim SpaceId
"max_num_tblocks"
HostOp -> CallKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (HostOp -> CallKernelGen ()) -> HostOp -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$
VName -> Name -> SizeClass -> HostOp
Imp.GetSize (TV Int64 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int64
max_num_tblocks) (Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname Name
max_num_tblocks_key) (SizeClass -> HostOp) -> SizeClass -> HostOp
forall a b. (a -> b) -> a -> b
$
Maybe Name -> SizeClass -> SizeClass
sizeClassWithEntryPoint Maybe Name
fname SizeClass
SizeGrid
let num_tblocks_maybe_zero :: TExp Int64
num_tblocks_maybe_zero =
TExp Int64 -> TExp Int64 -> TExp Int64
forall v. TPrimExp Int64 v -> TPrimExp Int64 v -> TPrimExp Int64 v
sMin64 (SubExp -> TExp Int64
pe64 SubExp
w64 TExp Int64 -> TExp Int64 -> TExp Int64
forall e. IntegralExp e => e -> e -> e
`divUp` SubExp -> TExp Int64
pe64 SubExp
tblock_size) (TExp Int64 -> TExp Int64) -> TExp Int64 -> TExp Int64
forall a b. (a -> b) -> a -> b
$
TExp Int64 -> TExp Int64
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TV Int64 -> TExp Int64
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int64
max_num_tblocks)
let num_tblocks :: TExp Int64
num_tblocks = TExp Int64 -> TExp Int64 -> TExp Int64
forall v. TPrimExp Int64 v -> TPrimExp Int64 v -> TPrimExp Int64 v
sMax64 TExp Int64
1 TExp Int64
num_tblocks_maybe_zero
VName -> TV Int32
forall {k} (t :: k). MkTV t => VName -> TV t
mkTV (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LetDecMem
pe) TV Int32 -> TExp Int32 -> CallKernelGen ()
forall {k} (t :: k) rep r op. TV t -> TExp t -> ImpM rep r op ()
<-- TExp Int64 -> TExp Int32
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int32 v
sExt32 TExp Int64
num_tblocks
opCompiler Pat LetDecMem
dest (Inner (SegOp SegOp SegLevel GPUMem
op)) =
Pat LetDecMem -> SegOp SegLevel GPUMem -> CallKernelGen ()
segOpCompiler Pat LetDecMem
dest SegOp SegLevel GPUMem
op
opCompiler (Pat [PatElem LetDecMem]
pes) (Inner (GPUBody [TypeBase Shape NoUniqueness]
_ (Body BodyDec GPUMem
_ Stms GPUMem
stms Result
res))) = do
VName
tid <- SpaceId -> ImpM GPUMem HostEnv HostOp VName
forall (m :: * -> *). MonadFreshNames m => SpaceId -> m VName
newVName SpaceId
"tid"
let one :: Count u SubExp
one = SubExp -> Count u SubExp
forall {k} (u :: k) e. e -> Count u e
Count (IntType -> Integer -> SubExp
intConst IntType
Int64 Integer
1)
SpaceId
-> VName -> KernelAttrs -> InKernelGen () -> CallKernelGen ()
sKernelThread SpaceId
"gpuseq" VName
tid (Count NumBlocks SubExp -> Count BlockSize SubExp -> KernelAttrs
defKernelAttrs Count NumBlocks SubExp
forall {k} {u :: k}. Count u SubExp
one Count BlockSize SubExp
forall {k} {u :: k}. Count u SubExp
one) (InKernelGen () -> CallKernelGen ())
-> InKernelGen () -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$
Names -> Stms GPUMem -> InKernelGen () -> InKernelGen ()
forall rep r op.
Names -> Stms rep -> ImpM rep r op () -> ImpM rep r op ()
compileStms (Result -> Names
forall a. FreeIn a => a -> Names
freeIn Result
res) Stms GPUMem
stms (InKernelGen () -> InKernelGen ())
-> InKernelGen () -> InKernelGen ()
forall a b. (a -> b) -> a -> b
$
[(PatElem LetDecMem, SubExpRes)]
-> ((PatElem LetDecMem, SubExpRes) -> InKernelGen ())
-> InKernelGen ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ ([PatElem LetDecMem] -> Result -> [(PatElem LetDecMem, SubExpRes)]
forall a b. [a] -> [b] -> [(a, b)]
zip [PatElem LetDecMem]
pes Result
res) (((PatElem LetDecMem, SubExpRes) -> InKernelGen ())
-> InKernelGen ())
-> ((PatElem LetDecMem, SubExpRes) -> InKernelGen ())
-> InKernelGen ()
forall a b. (a -> b) -> a -> b
$ \(PatElem LetDecMem
pe, SubExpRes Certs
_ SubExp
se) ->
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> InKernelGen ()
forall rep r op.
VName -> [TExp Int64] -> SubExp -> [TExp Int64] -> ImpM rep r op ()
copyDWIMFix (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem LetDecMem
pe) [TExp Int64
0] SubExp
se []
opCompiler Pat LetDecMem
pat Op GPUMem
e =
SpaceId -> CallKernelGen ()
forall a. SpaceId -> a
compilerBugS (SpaceId -> CallKernelGen ()) -> SpaceId -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$
SpaceId
"opCompiler: Invalid pattern\n "
SpaceId -> SpaceId -> SpaceId
forall a. [a] -> [a] -> [a]
++ Pat LetDecMem -> SpaceId
forall a. Pretty a => a -> SpaceId
prettyString Pat LetDecMem
pat
SpaceId -> SpaceId -> SpaceId
forall a. [a] -> [a] -> [a]
++ SpaceId
"\nfor expression\n "
SpaceId -> SpaceId -> SpaceId
forall a. [a] -> [a] -> [a]
++ MemOp (HostOp NoOp) GPUMem -> SpaceId
forall a. Pretty a => a -> SpaceId
prettyString Op GPUMem
MemOp (HostOp NoOp) GPUMem
e
sizeClassWithEntryPoint :: Maybe Name -> Imp.SizeClass -> Imp.SizeClass
sizeClassWithEntryPoint :: Maybe Name -> SizeClass -> SizeClass
sizeClassWithEntryPoint Maybe Name
fname (Imp.SizeThreshold KernelPath
path Maybe Int64
def) =
KernelPath -> Maybe Int64 -> SizeClass
Imp.SizeThreshold (((Name, Bool) -> (Name, Bool)) -> KernelPath -> KernelPath
forall a b. (a -> b) -> [a] -> [b]
map (Name, Bool) -> (Name, Bool)
f KernelPath
path) Maybe Int64
def
where
f :: (Name, Bool) -> (Name, Bool)
f (Name
name, Bool
x) = (Maybe Name -> Name -> Name
keyWithEntryPoint Maybe Name
fname Name
name, Bool
x)
sizeClassWithEntryPoint Maybe Name
_ SizeClass
size_class = SizeClass
size_class
segOpCompiler ::
Pat LetDecMem ->
SegOp SegLevel GPUMem ->
CallKernelGen ()
segOpCompiler :: Pat LetDecMem -> SegOp SegLevel GPUMem -> CallKernelGen ()
segOpCompiler Pat LetDecMem
pat (SegMap SegLevel
lvl SegSpace
space [TypeBase Shape NoUniqueness]
_ KernelBody GPUMem
kbody) =
Pat LetDecMem
-> SegLevel -> SegSpace -> KernelBody GPUMem -> CallKernelGen ()
compileSegMap Pat LetDecMem
pat SegLevel
lvl SegSpace
space KernelBody GPUMem
kbody
segOpCompiler Pat LetDecMem
pat (SegRed lvl :: SegLevel
lvl@(SegThread SegVirt
_ Maybe KernelGrid
_) SegSpace
space [TypeBase Shape NoUniqueness]
_ KernelBody GPUMem
kbody [SegBinOp GPUMem]
reds) =
Pat LetDecMem
-> SegLevel
-> SegSpace
-> [SegBinOp GPUMem]
-> KernelBody GPUMem
-> CallKernelGen ()
compileSegRed Pat LetDecMem
pat SegLevel
lvl SegSpace
space [SegBinOp GPUMem]
reds KernelBody GPUMem
kbody
segOpCompiler Pat LetDecMem
pat (SegScan lvl :: SegLevel
lvl@(SegThread SegVirt
_ Maybe KernelGrid
_) SegSpace
space [TypeBase Shape NoUniqueness]
_ KernelBody GPUMem
kbody [SegBinOp GPUMem]
scans) =
Pat LetDecMem
-> SegLevel
-> SegSpace
-> [SegBinOp GPUMem]
-> KernelBody GPUMem
-> CallKernelGen ()
compileSegScan Pat LetDecMem
pat SegLevel
lvl SegSpace
space [SegBinOp GPUMem]
scans KernelBody GPUMem
kbody
segOpCompiler Pat LetDecMem
pat (SegHist lvl :: SegLevel
lvl@(SegThread SegVirt
_ Maybe KernelGrid
_) SegSpace
space [TypeBase Shape NoUniqueness]
_ KernelBody GPUMem
kbody [HistOp GPUMem]
ops) =
Pat LetDecMem
-> SegLevel
-> SegSpace
-> [HistOp GPUMem]
-> KernelBody GPUMem
-> CallKernelGen ()
compileSegHist Pat LetDecMem
pat SegLevel
lvl SegSpace
space [HistOp GPUMem]
ops KernelBody GPUMem
kbody
segOpCompiler Pat LetDecMem
pat SegOp SegLevel GPUMem
segop =
SpaceId -> CallKernelGen ()
forall a. SpaceId -> a
compilerBugS (SpaceId -> CallKernelGen ()) -> SpaceId -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$ SpaceId
"segOpCompiler: unexpected " SpaceId -> SpaceId -> SpaceId
forall a. [a] -> [a] -> [a]
++ SegLevel -> SpaceId
forall a. Pretty a => a -> SpaceId
prettyString (SegOp SegLevel GPUMem -> SegLevel
forall lvl rep. SegOp lvl rep -> lvl
segLevel SegOp SegLevel GPUMem
segop) SpaceId -> SpaceId -> SpaceId
forall a. [a] -> [a] -> [a]
++ SpaceId
" for rhs of pattern " SpaceId -> SpaceId -> SpaceId
forall a. [a] -> [a] -> [a]
++ Pat LetDecMem -> SpaceId
forall a. Pretty a => a -> SpaceId
prettyString Pat LetDecMem
pat
checkSharedMemoryReqs :: (VName -> Bool) -> Imp.HostCode -> CallKernelGen (Maybe (Imp.TExp Bool))
checkSharedMemoryReqs :: (VName -> Bool) -> HostCode -> CallKernelGen (Maybe (TExp Bool))
checkSharedMemoryReqs VName -> Bool
in_scope HostCode
code = do
let alloc_sizes :: [Count Bytes (TExp Int64)]
alloc_sizes = (Kernel -> Count Bytes (TExp Int64))
-> [Kernel] -> [Count Bytes (TExp Int64)]
forall a b. (a -> b) -> [a] -> [b]
map ([Count Bytes (TExp Int64)] -> Count Bytes (TExp Int64)
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ([Count Bytes (TExp Int64)] -> Count Bytes (TExp Int64))
-> (Kernel -> [Count Bytes (TExp Int64)])
-> Kernel
-> Count Bytes (TExp Int64)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Count Bytes (TExp Int64) -> Count Bytes (TExp Int64))
-> [Count Bytes (TExp Int64)] -> [Count Bytes (TExp Int64)]
forall a b. (a -> b) -> [a] -> [b]
map Count Bytes (TExp Int64) -> Count Bytes (TExp Int64)
forall {e}. IntegralExp e => e -> e
alignedSize ([Count Bytes (TExp Int64)] -> [Count Bytes (TExp Int64)])
-> (Kernel -> [Count Bytes (TExp Int64)])
-> Kernel
-> [Count Bytes (TExp Int64)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Code KernelOp -> [Count Bytes (TExp Int64)]
localAllocSizes (Code KernelOp -> [Count Bytes (TExp Int64)])
-> (Kernel -> Code KernelOp)
-> Kernel
-> [Count Bytes (TExp Int64)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Kernel -> Code KernelOp
Imp.kernelBody) ([Kernel] -> [Count Bytes (TExp Int64)])
-> [Kernel] -> [Count Bytes (TExp Int64)]
forall a b. (a -> b) -> a -> b
$ HostCode -> [Kernel]
getGPU HostCode
code
if Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ (VName -> Bool) -> [VName] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all VName -> Bool
in_scope ([VName] -> Bool) -> [VName] -> Bool
forall a b. (a -> b) -> a -> b
$ Names -> [VName]
namesToList (Names -> [VName]) -> Names -> [VName]
forall a b. (a -> b) -> a -> b
$ [Count Bytes (TExp Int64)] -> Names
forall a. FreeIn a => a -> Names
freeIn [Count Bytes (TExp Int64)]
alloc_sizes
then Maybe (TExp Bool) -> CallKernelGen (Maybe (TExp Bool))
forall a. a -> ImpM GPUMem HostEnv HostOp a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe (TExp Bool)
forall a. Maybe a
Nothing
else do
TV Int64
shared_memory_capacity :: TV Int64 <- SpaceId -> ImpM GPUMem HostEnv HostOp (TV Int64)
forall {k} (t :: k) rep r op.
MkTV t =>
SpaceId -> ImpM rep r op (TV t)
dPrim SpaceId
"shared_memory_capacity"
HostOp -> CallKernelGen ()
forall op rep r. op -> ImpM rep r op ()
sOp (HostOp -> CallKernelGen ()) -> HostOp -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$ VName -> SizeClass -> HostOp
Imp.GetSizeMax (TV Int64 -> VName
forall {k} (t :: k). TV t -> VName
tvVar TV Int64
shared_memory_capacity) SizeClass
SizeSharedMemory
let shared_memory_capacity_64 :: TExp Int64
shared_memory_capacity_64 =
TExp Int64 -> TExp Int64
forall {k} (t :: k) v. IntExp t => TPrimExp t v -> TPrimExp Int64 v
sExt64 (TExp Int64 -> TExp Int64) -> TExp Int64 -> TExp Int64
forall a b. (a -> b) -> a -> b
$ TV Int64 -> TExp Int64
forall {k} (t :: k). TV t -> TExp t
tvExp TV Int64
shared_memory_capacity
fits :: Count Bytes (TExp Int64) -> TExp Bool
fits Count Bytes (TExp Int64)
size =
Count Bytes (TExp Int64) -> TExp Int64
forall {k} (u :: k) e. Count u e -> e
unCount Count Bytes (TExp Int64)
size TExp Int64 -> TExp Int64 -> TExp Bool
forall {k} v (t :: k).
Eq v =>
TPrimExp t v -> TPrimExp t v -> TPrimExp Bool v
.<=. TExp Int64
shared_memory_capacity_64
Maybe (TExp Bool) -> CallKernelGen (Maybe (TExp Bool))
forall a. a -> ImpM GPUMem HostEnv HostOp a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe (TExp Bool) -> CallKernelGen (Maybe (TExp Bool)))
-> Maybe (TExp Bool) -> CallKernelGen (Maybe (TExp Bool))
forall a b. (a -> b) -> a -> b
$ TExp Bool -> Maybe (TExp Bool)
forall a. a -> Maybe a
Just (TExp Bool -> Maybe (TExp Bool)) -> TExp Bool -> Maybe (TExp Bool)
forall a b. (a -> b) -> a -> b
$ (TExp Bool -> TExp Bool -> TExp Bool)
-> TExp Bool -> [TExp Bool] -> TExp Bool
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
L.foldl' TExp Bool -> TExp Bool -> TExp Bool
forall v.
Eq v =>
TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
(.&&.) TExp Bool
forall v. TPrimExp Bool v
true ((Count Bytes (TExp Int64) -> TExp Bool)
-> [Count Bytes (TExp Int64)] -> [TExp Bool]
forall a b. (a -> b) -> [a] -> [b]
map Count Bytes (TExp Int64) -> TExp Bool
fits [Count Bytes (TExp Int64)]
alloc_sizes)
where
getGPU :: HostCode -> [Kernel]
getGPU = (HostOp -> [Kernel]) -> HostCode -> [Kernel]
forall m a. Monoid m => (a -> m) -> Code a -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap HostOp -> [Kernel]
getKernel
getKernel :: HostOp -> [Kernel]
getKernel (Imp.CallKernel Kernel
k) | Kernel -> Bool
Imp.kernelCheckSharedMemory Kernel
k = [Kernel
k]
getKernel HostOp
_ = []
localAllocSizes :: Code KernelOp -> [Count Bytes (TExp Int64)]
localAllocSizes = (KernelOp -> [Count Bytes (TExp Int64)])
-> Code KernelOp -> [Count Bytes (TExp Int64)]
forall m a. Monoid m => (a -> m) -> Code a -> m
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap KernelOp -> [Count Bytes (TExp Int64)]
localAllocSize
localAllocSize :: KernelOp -> [Count Bytes (TExp Int64)]
localAllocSize (Imp.SharedAlloc VName
_ Count Bytes (TExp Int64)
size) = [Count Bytes (TExp Int64)
size]
localAllocSize KernelOp
_ = []
alignedSize :: e -> e
alignedSize e
x = e -> e -> e
forall e. IntegralExp e => e -> e -> e
nextMul e
x e
8
withAcc ::
Pat LetDecMem ->
[(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))] ->
Lambda GPUMem ->
CallKernelGen ()
withAcc :: Pat LetDecMem
-> [(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))]
-> Lambda GPUMem
-> CallKernelGen ()
withAcc Pat LetDecMem
pat [(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))]
inputs Lambda GPUMem
lam = do
AtomicBinOp
atomics <- HostEnv -> AtomicBinOp
hostAtomics (HostEnv -> AtomicBinOp)
-> ImpM GPUMem HostEnv HostOp HostEnv
-> ImpM GPUMem HostEnv HostOp AtomicBinOp
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ImpM GPUMem HostEnv HostOp HostEnv
forall rep r op. ImpM rep r op r
askEnv
AtomicBinOp
-> [(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
-> CallKernelGen ()
locksForInputs AtomicBinOp
atomics ([(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
-> CallKernelGen ())
-> [(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
-> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$ [VName]
-> [(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))]
-> [(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
forall a b. [a] -> [b] -> [(a, b)]
zip [VName]
accs [(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))]
inputs
where
accs :: [VName]
accs = (Param LetDecMem -> VName) -> [Param LetDecMem] -> [VName]
forall a b. (a -> b) -> [a] -> [b]
map Param LetDecMem -> VName
forall dec. Param dec -> VName
paramName ([Param LetDecMem] -> [VName]) -> [Param LetDecMem] -> [VName]
forall a b. (a -> b) -> a -> b
$ Lambda GPUMem -> [LParam GPUMem]
forall rep. Lambda rep -> [LParam rep]
lambdaParams Lambda GPUMem
lam
locksForInputs :: AtomicBinOp
-> [(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
-> CallKernelGen ()
locksForInputs AtomicBinOp
_ [] =
ExpCompiler GPUMem HostEnv HostOp
forall rep (inner :: * -> *) r op.
Mem rep inner =>
Pat (LetDec rep) -> Exp rep -> ImpM rep r op ()
defCompileExp Pat (LetDec GPUMem)
Pat LetDecMem
pat (Exp GPUMem -> CallKernelGen ()) -> Exp GPUMem -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$ [(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))]
-> Lambda GPUMem -> Exp GPUMem
forall rep. [WithAccInput rep] -> Lambda rep -> Exp rep
WithAcc [(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))]
inputs Lambda GPUMem
lam
locksForInputs AtomicBinOp
atomics ((VName
c, (Shape
_, [VName]
_, Maybe (Lambda GPUMem, [SubExp])
op)) : [(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
inputs')
| Just (Lambda GPUMem
op_lam, [SubExp]
_) <- Maybe (Lambda GPUMem, [SubExp])
op,
AtomicLocking Locking -> DoAtomicUpdate GPUMem KernelEnv
_ <- AtomicBinOp -> Lambda GPUMem -> AtomicUpdate GPUMem KernelEnv
atomicUpdateLocking AtomicBinOp
atomics Lambda GPUMem
op_lam = do
let num_locks :: Int
num_locks = Int
100151
VName
locks_arr <- SpaceId -> Int -> ImpM GPUMem HostEnv HostOp VName
genZeroes SpaceId
"withacc_locks" Int
num_locks
let locks :: Locks
locks = VName -> Int -> Locks
Locks VName
locks_arr Int
num_locks
extend :: HostEnv -> HostEnv
extend HostEnv
env = HostEnv
env {hostLocks = M.insert c locks $ hostLocks env}
(HostEnv -> HostEnv) -> CallKernelGen () -> CallKernelGen ()
forall r rep op a. (r -> r) -> ImpM rep r op a -> ImpM rep r op a
localEnv HostEnv -> HostEnv
extend (CallKernelGen () -> CallKernelGen ())
-> CallKernelGen () -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$ AtomicBinOp
-> [(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
-> CallKernelGen ()
locksForInputs AtomicBinOp
atomics [(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
inputs'
| Bool
otherwise =
AtomicBinOp
-> [(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
-> CallKernelGen ()
locksForInputs AtomicBinOp
atomics [(VName, (Shape, [VName], Maybe (Lambda GPUMem, [SubExp])))]
inputs'
expCompiler :: ExpCompiler GPUMem HostEnv Imp.HostOp
expCompiler :: ExpCompiler GPUMem HostEnv HostOp
expCompiler (Pat [PatElem (LetDec GPUMem)
pe]) (BasicOp (Iota SubExp
n SubExp
x SubExp
s IntType
et)) = do
Exp
x' <- SubExp -> ImpM GPUMem HostEnv HostOp Exp
forall a rep r op. ToExp a => a -> ImpM rep r op Exp
forall rep r op. SubExp -> ImpM rep r op Exp
toExp SubExp
x
Exp
s' <- SubExp -> ImpM GPUMem HostEnv HostOp Exp
forall a rep r op. ToExp a => a -> ImpM rep r op Exp
forall rep r op. SubExp -> ImpM rep r op Exp
toExp SubExp
s
VName -> TExp Int64 -> Exp -> Exp -> IntType -> CallKernelGen ()
sIota (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem (LetDec GPUMem)
PatElem LetDecMem
pe) (SubExp -> TExp Int64
pe64 SubExp
n) Exp
x' Exp
s' IntType
et
expCompiler (Pat [PatElem (LetDec GPUMem)
pe]) (BasicOp (Replicate Shape
shape SubExp
se))
| Acc {} <- PatElem LetDecMem -> TypeBase Shape NoUniqueness
forall dec. Typed dec => PatElem dec -> TypeBase Shape NoUniqueness
patElemType PatElem (LetDec GPUMem)
PatElem LetDecMem
pe = () -> CallKernelGen ()
forall a. a -> ImpM GPUMem HostEnv HostOp a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
| Bool
otherwise =
if Shape -> Int
forall a. ArrayShape a => a -> Int
shapeRank Shape
shape Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
then VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> CallKernelGen ()
forall rep r op.
VName
-> [DimIndex (TExp Int64)]
-> SubExp
-> [DimIndex (TExp Int64)]
-> ImpM rep r op ()
copyDWIM (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem (LetDec GPUMem)
PatElem LetDecMem
pe) [] SubExp
se []
else VName -> SubExp -> CallKernelGen ()
sReplicate (PatElem LetDecMem -> VName
forall dec. PatElem dec -> VName
patElemName PatElem (LetDec GPUMem)
PatElem LetDecMem
pe) SubExp
se
expCompiler Pat (LetDec GPUMem)
_ (Op (Alloc SubExp
_ (Space SpaceId
"shared"))) =
() -> CallKernelGen ()
forall a. a -> ImpM GPUMem HostEnv HostOp a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
expCompiler Pat (LetDec GPUMem)
pat (WithAcc [(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))]
inputs Lambda GPUMem
lam) =
Pat LetDecMem
-> [(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))]
-> Lambda GPUMem
-> CallKernelGen ()
withAcc Pat (LetDec GPUMem)
Pat LetDecMem
pat [(Shape, [VName], Maybe (Lambda GPUMem, [SubExp]))]
inputs Lambda GPUMem
lam
expCompiler Pat (LetDec GPUMem)
dest (Match [SubExp]
cond (Case (Body GPUMem)
first_case : [Case (Body GPUMem)]
cases) Body GPUMem
defbranch sort :: MatchDec (BranchType GPUMem)
sort@(MatchDec [BranchType GPUMem]
_ MatchSort
MatchEquiv)) = do
Scope SOACS
scope <- ImpM GPUMem HostEnv HostOp (Scope SOACS)
forall rep (m :: * -> *). HasScope rep m => m (Scope rep)
askScope
HostCode
tcode <- CallKernelGen () -> ImpM GPUMem HostEnv HostOp HostCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (CallKernelGen () -> ImpM GPUMem HostEnv HostOp HostCode)
-> CallKernelGen () -> ImpM GPUMem HostEnv HostOp HostCode
forall a b. (a -> b) -> a -> b
$ Pat (LetDec GPUMem) -> Body GPUMem -> CallKernelGen ()
forall rep r op. Pat (LetDec rep) -> Body rep -> ImpM rep r op ()
compileBody Pat (LetDec GPUMem)
dest (Body GPUMem -> CallKernelGen ())
-> Body GPUMem -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$ Case (Body GPUMem) -> Body GPUMem
forall body. Case body -> body
caseBody Case (Body GPUMem)
first_case
HostCode
fcode <- CallKernelGen () -> ImpM GPUMem HostEnv HostOp HostCode
forall rep r op. ImpM rep r op () -> ImpM rep r op (Code op)
collect (CallKernelGen () -> ImpM GPUMem HostEnv HostOp HostCode)
-> CallKernelGen () -> ImpM GPUMem HostEnv HostOp HostCode
forall a b. (a -> b) -> a -> b
$ ExpCompiler GPUMem HostEnv HostOp
expCompiler Pat (LetDec GPUMem)
dest (Exp GPUMem -> CallKernelGen ()) -> Exp GPUMem -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$ [SubExp]
-> [Case (Body GPUMem)]
-> Body GPUMem
-> MatchDec (BranchType GPUMem)
-> Exp GPUMem
forall rep.
[SubExp]
-> [Case (Body rep)]
-> Body rep
-> MatchDec (BranchType rep)
-> Exp rep
Match [SubExp]
cond [Case (Body GPUMem)]
cases Body GPUMem
defbranch MatchDec (BranchType GPUMem)
sort
Maybe (TExp Bool)
check <- (VName -> Bool) -> HostCode -> CallKernelGen (Maybe (TExp Bool))
checkSharedMemoryReqs (VName -> Scope SOACS -> Bool
forall k a. Ord k => k -> Map k a -> Bool
`M.member` Scope SOACS
scope) HostCode
tcode
let matches :: TExp Bool
matches = [SubExp] -> [Maybe PrimValue] -> TExp Bool
caseMatch [SubExp]
cond (Case (Body GPUMem) -> [Maybe PrimValue]
forall body. Case body -> [Maybe PrimValue]
casePat Case (Body GPUMem)
first_case)
HostCode -> CallKernelGen ()
forall op rep r. Code op -> ImpM rep r op ()
emit (HostCode -> CallKernelGen ()) -> HostCode -> CallKernelGen ()
forall a b. (a -> b) -> a -> b
$ case Maybe (TExp Bool)
check of
Maybe (TExp Bool)
Nothing -> HostCode
fcode
Just TExp Bool
ok -> TExp Bool -> HostCode -> HostCode -> HostCode
forall a. TExp Bool -> Code a -> Code a -> Code a
Imp.If (TExp Bool
matches TExp Bool -> TExp Bool -> TExp Bool
forall v.
Eq v =>
TPrimExp Bool v -> TPrimExp Bool v -> TPrimExp Bool v
.&&. TExp Bool
ok) HostCode
tcode HostCode
fcode
expCompiler Pat (LetDec GPUMem)
dest Exp GPUMem
e =
ExpCompiler GPUMem HostEnv HostOp
forall rep (inner :: * -> *) r op.
Mem rep inner =>
Pat (LetDec rep) -> Exp rep -> ImpM rep r op ()
defCompileExp Pat (LetDec GPUMem)
dest Exp GPUMem
e