waLBerla 7.2
Loading...
Searching...
No Matches
walberla::gpu Namespace Reference

Namespaces

namespace  communication
 
namespace  internal
 

Classes

class  EventRAII
 
class  FieldAccessor
 Handle to the underlying device data of a GPUField. More...
 
class  FieldAccessor3D
 
class  FieldAccessorXYZ
 
class  FieldIndexing
 Utility class to generate handles to the underlying device data of a GPUField. More...
 
class  FieldIndexing3D
 
class  FieldIndexing3DBase
 
class  FieldIndexingXYZ
 
class  GeneratedGPUPackInfo
 Data packing/unpacking for ghost layer based communication of a GPUField. More...
 
class  GeneratedNonUniformGPUPackInfo
 
class  GPUField
 GhostLayerField stored on a CUDA/HIP GPU. More...
 
class  GPUSweepBase
 
class  HostFieldAllocator
 Allocator that allocates a CPU! field using gpuHostAlloc without padding. More...
 
class  Kernel
 Wrapper class around a GPU kernel, to call kernels also from code not compiled with the device compiler. More...
 
class  NvtxRange
 
class  ParallelSection
 
class  ParallelStreams
 Helper class to run CUDA/HIP operations on parallel streams. More...
 
class  ShiftedPeriodicityGPU
 A periodicity boundary condition that adds a user-defined spatial shift to the field when applied. More...
 
class  StreamRAII
 RAII wrapper for GPU streams. More...
 

Functions

template<typename GPUField_T >
BlockDataID addGPUFieldToStorage (const shared_ptr< StructuredBlockStorage > &bs, const std::string &identifier, uint_t fSize, const Layout layout=fzyx, uint_t nrOfGhostLayers=1, bool usePitchedMem=true)
 Adds a gpu::GPUField to a StructuredBlockStorage.
 
template<typename Field_T >
BlockDataID addGPUFieldToStorage (const shared_ptr< StructuredBlockStorage > &bs, ConstBlockDataID cpuFieldID, const std::string &identifier, bool usePitchedMem=true)
 Adds a gpu::GPUField to a StructuredBlockStorage using data from a CPU field.
 
void * allocate_aligned_with_offset (uint_t size, uint_t alignment, uint_t offset)
 
void free_aligned_with_offset (void *ptr)
 
void * allocate_pitched_with_offset (size_t &pitchOut, size_t width, size_t height, size_t alignment, size_t alignmentOffset)
 
void selectDeviceBasedOnMpiRank ()
 Selects active GPU device based on MPI rank.
 
void checkForError (gpuError_t code, const std::string &callerPath, const int line)
 
void checkForLastError (const std::string &callerPath, const int line)
 
template<typename DstType , typename SrcType >
void fieldCpy (const shared_ptr< StructuredBlockStorage > &blocks, BlockDataID dstID, ConstBlockDataID srcID)
 
template<typename DstType , typename SrcType >
std::function< void()> fieldCpyFunctor (const shared_ptr< StructuredBlockStorage > &blocks, BlockDataID dstID, ConstBlockDataID srcID)
 
template<typename DstType , typename SrcType >
void fieldCpySweepFunction (BlockDataID dstID, ConstBlockDataID srcID, IBlock *block)
 
template<typename DstType , typename SrcType >
std::function< void(IBlock *)> fieldCpyFunctor (BlockDataID dstID, ConstBlockDataID srcID)
 
template<typename T , uint_t fs>
void fieldCpy (gpu::GPUField< T > &dst, const field::Field< T, fs > &src)
 
template<typename T , uint_t fs>
void fieldCpy (field::Field< T, fs > &dst, const gpu::GPUField< T > &src)
 
template<typename T >
void shiftCoordinatesWhileFastestCoordHasSizeOne (typename FieldAccessor< T >::IndexingScheme &indexing, dim3 &gridDim, dim3 &blockDim)
 
unsigned int iDivUp (unsigned int a, unsigned int b)
 
void copyDevToDevFZYX (const gpuPitchedPtr &dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a device buffer to another device buffer with fzyx memory layout.
 
void copyDevToDevZYXF (const gpuPitchedPtr &dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a device buffer to another device buffer with zyxf memory layout.
 
void copyHostToDevFZYX (const gpuPitchedPtr &dst, unsigned char *src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a host buffer to a device buffer with fzyx memory layout.
 
void copyHostToDevZYXF (const gpuPitchedPtr &dst, unsigned char *src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a host buffer to a device buffer with zyxf memory layout.
 
void copyDevToHostFZYX (unsigned char *dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeZ, uint_t srcAllocSizeZ, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a device buffer to a host buffer with fzyx memory layout.
 
void copyDevToHostZYXF (unsigned char *dst, const gpuPitchedPtr &src, std::tuple< uint_t, uint_t, uint_t, uint_t > &dstOffset, std::tuple< uint_t, uint_t, uint_t, uint_t > &srcOffset, uint_t dstAllocSizeY, uint_t srcAllocSizeY, uint_t typeSize, std::tuple< uint_t, uint_t, uint_t, uint_t > &intervalSize, gpuStream_t copyStream)
 Copy a 4D interval of a device buffer to a host buffer with zyxf memory layout.
 
template<typename FuncPtr >
Kernel< FuncPtr > make_kernel (FuncPtr funcPtr)
 
void nvtxMarker (const std::string &name, const uint32_t color=0xaaaaaa)
 
void nameStream (const cudaStream_t &stream, const std::string &name)
 
template<typename... GpuFields>
void exportModuleToPython (py::module_ &m)
 
template<typename... CpuFields>
void exportCopyFunctionsToPython (py::module_ &m)
 

Variables

static std::map< void *, void * > freePointers_
 

Function Documentation

◆ addGPUFieldToStorage() [1/2]

template<typename GPUField_T >
BlockDataID walberla::gpu::addGPUFieldToStorage ( const shared_ptr< StructuredBlockStorage > & bs,
const std::string & identifier,
uint_t fSize,
const Layout layout = fzyx,
uint_t nrOfGhostLayers = 1,
bool usePitchedMem = true )

Adds a gpu::GPUField to a StructuredBlockStorage.

◆ addGPUFieldToStorage() [2/2]

template<typename Field_T >
BlockDataID walberla::gpu::addGPUFieldToStorage ( const shared_ptr< StructuredBlockStorage > & bs,
ConstBlockDataID cpuFieldID,
const std::string & identifier,
bool usePitchedMem = true )

Adds a gpu::GPUField to a StructuredBlockStorage using data from a CPU field.

  • adds a GPU field to a StructuredBlockStorage using a CPU field
  • sizes, number of ghostlayers and layout are the same as the CPU field
  • GPU field is initialized with the data currently stored in the CPU field
    Template Parameters
    Field_Ttype of the CPU field, the created GPUField will be of type gpu::GPUField<Field_T::value_type>

◆ allocate_aligned_with_offset()

void * walberla::gpu::allocate_aligned_with_offset ( uint_t size,
uint_t alignment,
uint_t offset )

◆ allocate_pitched_with_offset()

void * walberla::gpu::allocate_pitched_with_offset ( size_t & pitchOut,
size_t width,
size_t height,
size_t alignment,
size_t alignmentOffset )

◆ checkForError()

void walberla::gpu::checkForError ( gpuError_t code,
const std::string & callerPath,
const int line )
inline

◆ checkForLastError()

void walberla::gpu::checkForLastError ( const std::string & callerPath,
const int line )
inline

◆ copyDevToDevFZYX()

void walberla::gpu::copyDevToDevFZYX ( const gpuPitchedPtr & dst,
const gpuPitchedPtr & src,
std::tuple< uint_t, uint_t, uint_t, uint_t > & dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > & srcOffset,
uint_t dstAllocSizeZ,
uint_t srcAllocSizeZ,
uint_t typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > & intervalSize,
gpuStream_t copyStream )

Copy a 4D interval of a device buffer to another device buffer with fzyx memory layout.

Parameters
dstdestination buffer
srcsource buffer
dstOffset(x, y, z, f)-tuple containing the coordinate of the interval start point in the destination buffer
srcOffset(x, y, z, f)-tuple containing the coordinate of the interval start point in the source buffer
dstAllocSizeZallocation size in z direction of the destination buffer
srcAllocSizeZallocation size in z direction of the source buffer
typeSizesize of an f element
intervalSizeinterval size
copyStreamCUDA/HIP stream, if not NULL copy operations will be performed asynchronously

◆ copyDevToDevZYXF()

void walberla::gpu::copyDevToDevZYXF ( const gpuPitchedPtr & dst,
const gpuPitchedPtr & src,
std::tuple< uint_t, uint_t, uint_t, uint_t > & dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > & srcOffset,
uint_t dstAllocSizeY,
uint_t srcAllocSizeY,
uint_t typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > & intervalSize,
gpuStream_t copyStream )

Copy a 4D interval of a device buffer to another device buffer with zyxf memory layout.

Parameters
dstdestination buffer
srcsource buffer
dstOffset(x, y, z, f)-tuple containing the coordinate of the interval start point in the destination buffer
srcOffset(x, y, z, f)-tuple containing the coordinate of the interval start point in the source buffer
dstAllocSizeYallocation size in y direction of the destination buffer
srcAllocSizeYallocation size in y direction of the source buffer
typeSizesize of an f element
intervalSizeinterval size
copyStreamCUDA/HIP stream, if not NULL copy operations will be performed asynchronously

◆ copyDevToHostFZYX()

void walberla::gpu::copyDevToHostFZYX ( unsigned char * dst,
const gpuPitchedPtr & src,
std::tuple< uint_t, uint_t, uint_t, uint_t > & dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > & srcOffset,
uint_t dstAllocSizeZ,
uint_t srcAllocSizeZ,
uint_t typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > & intervalSize,
gpuStream_t copyStream )

Copy a 4D interval of a device buffer to a host buffer with fzyx memory layout.

See copyDevToDevFZYX() for parameter information.

◆ copyDevToHostZYXF()

void walberla::gpu::copyDevToHostZYXF ( unsigned char * dst,
const gpuPitchedPtr & src,
std::tuple< uint_t, uint_t, uint_t, uint_t > & dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > & srcOffset,
uint_t dstAllocSizeY,
uint_t srcAllocSizeY,
uint_t typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > & intervalSize,
gpuStream_t copyStream )

Copy a 4D interval of a device buffer to a host buffer with zyxf memory layout.

See copyDevToDevZYXF() for parameter information.

◆ copyHostToDevFZYX()

void walberla::gpu::copyHostToDevFZYX ( const gpuPitchedPtr & dst,
unsigned char * src,
std::tuple< uint_t, uint_t, uint_t, uint_t > & dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > & srcOffset,
uint_t dstAllocSizeZ,
uint_t srcAllocSizeZ,
uint_t typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > & intervalSize,
gpuStream_t copyStream )

Copy a 4D interval of a host buffer to a device buffer with fzyx memory layout.

See copyDevToDevFZYX() for parameter information.

◆ copyHostToDevZYXF()

void walberla::gpu::copyHostToDevZYXF ( const gpuPitchedPtr & dst,
unsigned char * src,
std::tuple< uint_t, uint_t, uint_t, uint_t > & dstOffset,
std::tuple< uint_t, uint_t, uint_t, uint_t > & srcOffset,
uint_t dstAllocSizeY,
uint_t srcAllocSizeY,
uint_t typeSize,
std::tuple< uint_t, uint_t, uint_t, uint_t > & intervalSize,
gpuStream_t copyStream )

Copy a 4D interval of a host buffer to a device buffer with zyxf memory layout.

See copyDevToDevZYXF() for parameter information.

◆ exportCopyFunctionsToPython()

template<typename... CpuFields>
void walberla::gpu::exportCopyFunctionsToPython ( py::module_ & m)

◆ exportModuleToPython()

template<typename... GpuFields>
void walberla::gpu::exportModuleToPython ( py::module_ & m)

◆ fieldCpy() [1/3]

template<typename DstType , typename SrcType >
void walberla::gpu::fieldCpy ( const shared_ptr< StructuredBlockStorage > & blocks,
BlockDataID dstID,
ConstBlockDataID srcID )

◆ fieldCpy() [2/3]

template<typename T , uint_t fs>
void walberla::gpu::fieldCpy ( field::Field< T, fs > & dst,
const gpu::GPUField< T > & src )

◆ fieldCpy() [3/3]

template<typename T , uint_t fs>
void walberla::gpu::fieldCpy ( gpu::GPUField< T > & dst,
const field::Field< T, fs > & src )

◆ fieldCpyFunctor() [1/2]

template<typename DstType , typename SrcType >
std::function< void(IBlock *)> walberla::gpu::fieldCpyFunctor ( BlockDataID dstID,
ConstBlockDataID srcID )

◆ fieldCpyFunctor() [2/2]

template<typename DstType , typename SrcType >
std::function< void()> walberla::gpu::fieldCpyFunctor ( const shared_ptr< StructuredBlockStorage > & blocks,
BlockDataID dstID,
ConstBlockDataID srcID )

◆ fieldCpySweepFunction()

template<typename DstType , typename SrcType >
void walberla::gpu::fieldCpySweepFunction ( BlockDataID dstID,
ConstBlockDataID srcID,
IBlock * block )

◆ free_aligned_with_offset()

void walberla::gpu::free_aligned_with_offset ( void * ptr)

◆ iDivUp()

unsigned int walberla::gpu::iDivUp ( unsigned int a,
unsigned int b )
inline

◆ make_kernel()

template<typename FuncPtr >
Kernel< FuncPtr > walberla::gpu::make_kernel ( FuncPtr funcPtr)

◆ nameStream()

void walberla::gpu::nameStream ( const cudaStream_t & stream,
const std::string & name )
inline

◆ nvtxMarker()

void walberla::gpu::nvtxMarker ( const std::string & name,
const uint32_t color = 0xaaaaaa )
inline

◆ selectDeviceBasedOnMpiRank()

void walberla::gpu::selectDeviceBasedOnMpiRank ( )

Selects active GPU device based on MPI rank.

assumes that on each node there are as many MPI processes started as there are GPUs

  • if there are more GPUs than processes on a node, a warning is printed and not all GPUs are utilized
  • if there are more processes than GPUs, also a warning is printed and multiple processes may access the same GPU. Processes are assigned to GPUs in a round-robin fashion

◆ shiftCoordinatesWhileFastestCoordHasSizeOne()

template<typename T >
void walberla::gpu::shiftCoordinatesWhileFastestCoordHasSizeOne ( typename FieldAccessor< T >::IndexingScheme & indexing,
dim3 & gridDim,
dim3 & blockDim )

Variable Documentation

◆ freePointers_

std::map<void *, void*> walberla::gpu::freePointers_
static