walberla::gpu Namespace Reference

Namespaces

 communication
 
 internal
 

Classes

class  EventRAII
 
class  FieldAccessor
 
class  FieldAccessor3D
 
class  FieldAccessorXYZ
 
class  FieldIndexing
 
class  FieldIndexing3D
 
class  FieldIndexing3DBase
 
class  FieldIndexingXYZ
 
class  GeneratedGPUPackInfo
 
class  GeneratedNonUniformGPUPackInfo
 
class  GPUField
 GhostLayerField stored on a CUDA/HIP GPU. More...
 
class  GPUSweepBase
 
class  HostFieldAllocator
 Allocator that allocates a CPU! field using gpuHostAlloc without padding. More...
 
class  Kernel
 Wrapper class around a GPU kernel, to call kernels also from code not compiled with the device compiler. More...
 
class  NvtxRange
 
class  ParallelSection
 
class  ParallelStreams
 Helper class to run CUDA/HIP operations on parallel streams. More...
 
class  StreamRAII
 

Functions

template<typename GPUField_T >
BlockDataID addGPUFieldToStorage (const shared_ptr< StructuredBlockStorage > &bs, const std::string &identifier, uint_t fSize, const Layout layout=fzyx, uint_t nrOfGhostLayers=1, bool usePitchedMem=true)
 Adds a gpu::GPUField to a StructuredBlockStorage. More...
 
template<typename Field_T >
BlockDataID addGPUFieldToStorage (const shared_ptr< StructuredBlockStorage > &bs, ConstBlockDataID cpuFieldID, const std::string &identifier, bool usePitchedMem=true)
 Adds a gpu::GPUField to a StructuredBlockStorage using data from a CPU field. More...
 
void * allocate_aligned_with_offset (uint_t size, uint_t alignment, uint_t offset)
 
void free_aligned_with_offset (void *ptr)
 
void * allocate_pitched_with_offset (size_t &pitchOut, size_t width, size_t height, size_t alignment, size_t alignmentOffset)
 
void selectDeviceBasedOnMpiRank ()
 Selects active GPU device based on MPI rank. More...
 
void checkForError (gpuError_t code, const std::string &callerPath, const int line)
 
void checkForLastError (const std::string &callerPath, const int line)
 
template<typename DstType , typename SrcType >
void fieldCpy (const shared_ptr< StructuredBlockStorage > &blocks, BlockDataID dstID, ConstBlockDataID srcID)
 
template<typename DstType , typename SrcType >
std::function< void()> fieldCpyFunctor (const shared_ptr< StructuredBlockStorage > &blocks, BlockDataID dstID, ConstBlockDataID srcID)
 
template<typename DstType , typename SrcType >
void fieldCpySweepFunction (BlockDataID dstID, ConstBlockDataID srcID, IBlock *block)
 
template<typename DstType , typename SrcType >
std::function< void(IBlock *)> fieldCpyFunctor (BlockDataID dstID, ConstBlockDataID srcID)
 
template<typename T , uint_t fs>
void fieldCpy (gpu::GPUField< T > &dst, const field::Field< T, fs > &src)
 
template<typename T , uint_t fs>
void fieldCpy (field::Field< T, fs > &dst, const gpu::GPUField< T > &src)
 
template<typename T >
void shiftCoordinatesWhileFastestCoordHasSizeOne (typename FieldAccessor< T >::IndexingScheme &indexing, dim3 &gridDim, dim3 &blockDim)
 
unsigned int iDivUp (unsigned int a, unsigned int b)
 
void copyDevToDevFZYX (const gpuPitchedPtr &dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a device buffer to another device buffer with fzyx memory layout. More...
 
void copyDevToDevZYXF (const gpuPitchedPtr &dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a device buffer to another device buffer with zyxf memory layout. More...
 
void copyHostToDevFZYX (const gpuPitchedPtr &dst, unsigned char *src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a host buffer to a device buffer with fzyx memory layout. More...
 
void copyHostToDevZYXF (const gpuPitchedPtr &dst, unsigned char *src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a host buffer to a device buffer with zyxf memory layout. More...
 
void copyDevToHostFZYX (unsigned char *dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a device buffer to a host buffer with fzyx memory layout. More...
 
void copyDevToHostZYXF (unsigned char *dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a device buffer to a host buffer with zyxf memory layout. More...
 
template<typename FuncPtr >
Kernel< FuncPtr > make_kernel (FuncPtr funcPtr)
 
void nvtxMarker (const std::string &name, const uint32_t color=0xaaaaaa)
 
void nameStream (const cudaStream_t &stream, const std::string &name)
 
template<typename... GpuFields>
void exportModuleToPython (py::module_ &m)
 
template<typename... CpuFields>
void exportCopyFunctionsToPython (py::module_ &m)
 

Variables

static std::map< void *, void * > freePointers_
 

Function Documentation

◆ addGPUFieldToStorage() [1/2]

template<typename GPUField_T >
BlockDataID walberla::gpu::addGPUFieldToStorage ( const shared_ptr< StructuredBlockStorage > &  bs,
const std::string &  identifier,
uint_t  fSize,
const Layout  layout = fzyx,
uint_t  nrOfGhostLayers = 1,
bool  usePitchedMem = true 
)

Adds a gpu::GPUField to a StructuredBlockStorage.

◆ addGPUFieldToStorage() [2/2]

template<typename Field_T >
BlockDataID walberla::gpu::addGPUFieldToStorage ( const shared_ptr< StructuredBlockStorage > &  bs,
ConstBlockDataID  cpuFieldID,
const std::string &  identifier,
bool  usePitchedMem = true 
)

Adds a gpu::GPUField to a StructuredBlockStorage using data from a CPU field.

  • adds a GPU field to a StructuredBlockStorage using a CPU field
  • sizes, number of ghostlayers and layout are the same as the CPU field
  • GPU field is initialized with the data currently stored in the CPU field
    Template Parameters
    Field_Ttype of the CPU field, the created GPUField will be of type gpu::GPUField<Field_T::value_type>

◆ allocate_aligned_with_offset()

void * walberla::gpu::allocate_aligned_with_offset ( uint_t  size,
uint_t  alignment,
uint_t  offset 
)

◆ allocate_pitched_with_offset()

void * walberla::gpu::allocate_pitched_with_offset ( size_t pitchOut,
size_t  width,
size_t  height,
size_t  alignment,
size_t  alignmentOffset 
)

◆ checkForError()

void walberla::gpu::checkForError ( gpuError_t  code,
const std::string &  callerPath,
const int  line 
)
inline

◆ checkForLastError()

void walberla::gpu::checkForLastError ( const std::string &  callerPath,
const int  line 
)
inline

◆ copyDevToDevFZYX()

void walberla::gpu::copyDevToDevFZYX ( const gpuPitchedPtr &  dst,
const gpuPitchedPtr &  src,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  srcOffset,
uint_t  dstAllocSizeZ,
uint_t  srcAllocSizeZ,
uint_t  typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  intervalSize,
gpuStream_t  copyStream 
)

Copy a 4D interval of a device buffer to another device buffer with fzyx memory layout.

Parameters
dstdestination buffer
srcsource buffer
dstOffset(x, y, z, f)-tuple containing the coordinate of the interval start point in the destination buffer
srcOffset(x, y, z, f)-tuple containing the coordinate of the interval start point in the source buffer
dstAllocSizeZallocation size in z direction of the destination buffer
srcAllocSizeZallocation size in z direction of the source buffer
typeSizesize of an f element
intervalSizeinterval size
copyStreamCUDA/HIP stream, if not NULL copy operations will be performed asynchronously

◆ copyDevToDevZYXF()

void walberla::gpu::copyDevToDevZYXF ( const gpuPitchedPtr &  dst,
const gpuPitchedPtr &  src,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  srcOffset,
uint_t  dstAllocSizeY,
uint_t  srcAllocSizeY,
uint_t  typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  intervalSize,
gpuStream_t  copyStream 
)

Copy a 4D interval of a device buffer to another device buffer with zyxf memory layout.

Parameters
dstdestination buffer
srcsource buffer
dstOffset(x, y, z, f)-tuple containing the coordinate of the interval start point in the destination buffer
srcOffset(x, y, z, f)-tuple containing the coordinate of the interval start point in the source buffer
dstAllocSizeYallocation size in y direction of the destination buffer
srcAllocSizeYallocation size in y direction of the source buffer
typeSizesize of an f element
intervalSizeinterval size
copyStreamCUDA/HIP stream, if not NULL copy operations will be performed asynchronously

◆ copyDevToHostFZYX()

void walberla::gpu::copyDevToHostFZYX ( unsigned char *  dst,
const gpuPitchedPtr &  src,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  srcOffset,
uint_t  dstAllocSizeZ,
uint_t  srcAllocSizeZ,
uint_t  typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  intervalSize,
gpuStream_t  copyStream 
)

Copy a 4D interval of a device buffer to a host buffer with fzyx memory layout.

See copyDevToDevFZYX() for parameter information.

◆ copyDevToHostZYXF()

void walberla::gpu::copyDevToHostZYXF ( unsigned char *  dst,
const gpuPitchedPtr &  src,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  srcOffset,
uint_t  dstAllocSizeY,
uint_t  srcAllocSizeY,
uint_t  typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  intervalSize,
gpuStream_t  copyStream 
)

Copy a 4D interval of a device buffer to a host buffer with zyxf memory layout.

See copyDevToDevZYXF() for parameter information.

◆ copyHostToDevFZYX()

void walberla::gpu::copyHostToDevFZYX ( const gpuPitchedPtr &  dst,
unsigned char *  src,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  srcOffset,
uint_t  dstAllocSizeZ,
uint_t  srcAllocSizeZ,
uint_t  typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  intervalSize,
gpuStream_t  copyStream 
)

Copy a 4D interval of a host buffer to a device buffer with fzyx memory layout.

See copyDevToDevFZYX() for parameter information.

◆ copyHostToDevZYXF()

void walberla::gpu::copyHostToDevZYXF ( const gpuPitchedPtr &  dst,
unsigned char *  src,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  srcOffset,
uint_t  dstAllocSizeY,
uint_t  srcAllocSizeY,
uint_t  typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > &  intervalSize,
gpuStream_t  copyStream 
)

Copy a 4D interval of a host buffer to a device buffer with zyxf memory layout.

See copyDevToDevZYXF() for parameter information.

◆ exportCopyFunctionsToPython()

template<typename... CpuFields>
void walberla::gpu::exportCopyFunctionsToPython ( py::module_ &  m)

◆ exportModuleToPython()

template<typename... GpuFields>
void walberla::gpu::exportModuleToPython ( py::module_ &  m)

◆ fieldCpy() [1/3]

template<typename DstType , typename SrcType >
void walberla::gpu::fieldCpy ( const shared_ptr< StructuredBlockStorage > &  blocks,
BlockDataID  dstID,
ConstBlockDataID  srcID 
)

◆ fieldCpy() [2/3]

template<typename T , uint_t fs>
void walberla::gpu::fieldCpy ( field::Field< T, fs > &  dst,
const gpu::GPUField< T > &  src 
)

◆ fieldCpy() [3/3]

template<typename T , uint_t fs>
void walberla::gpu::fieldCpy ( gpu::GPUField< T > &  dst,
const field::Field< T, fs > &  src 
)

◆ fieldCpyFunctor() [1/2]

template<typename DstType , typename SrcType >
std::function<void(IBlock*)> walberla::gpu::fieldCpyFunctor ( BlockDataID  dstID,
ConstBlockDataID  srcID 
)

◆ fieldCpyFunctor() [2/2]

template<typename DstType , typename SrcType >
std::function<void()> walberla::gpu::fieldCpyFunctor ( const shared_ptr< StructuredBlockStorage > &  blocks,
BlockDataID  dstID,
ConstBlockDataID  srcID 
)

◆ fieldCpySweepFunction()

template<typename DstType , typename SrcType >
void walberla::gpu::fieldCpySweepFunction ( BlockDataID  dstID,
ConstBlockDataID  srcID,
IBlock block 
)

◆ free_aligned_with_offset()

void walberla::gpu::free_aligned_with_offset ( void *  ptr)

◆ iDivUp()

unsigned int walberla::gpu::iDivUp ( unsigned int  a,
unsigned int  b 
)
inline

◆ make_kernel()

template<typename FuncPtr >
Kernel<FuncPtr> walberla::gpu::make_kernel ( FuncPtr  funcPtr)

◆ nameStream()

void walberla::gpu::nameStream ( const cudaStream_t &  stream,
const std::string &  name 
)
inline

◆ nvtxMarker()

void walberla::gpu::nvtxMarker ( const std::string &  name,
const uint32_t  color = 0xaaaaaa 
)
inline

◆ selectDeviceBasedOnMpiRank()

void walberla::gpu::selectDeviceBasedOnMpiRank ( )

Selects active GPU device based on MPI rank.

assumes that on each node there are as many MPI processes started as there are GPUs

  • if there are more GPUs than processes on a node, a warning is printed and not all GPUs are utilized
  • if there are more processes than GPUs, also a warning is printed and multiple processes may access the same GPU. Processes are assigned to GPUs in a round-robin fashion

◆ shiftCoordinatesWhileFastestCoordHasSizeOne()

template<typename T >
void walberla::gpu::shiftCoordinatesWhileFastestCoordHasSizeOne ( typename FieldAccessor< T >::IndexingScheme &  indexing,
dim3 &  gridDim,
dim3 &  blockDim 
)

Variable Documentation

◆ freePointers_

std::map<void *, void*> walberla::gpu::freePointers_
static