37#include <cuda_runtime.h>
38#include <helper_cuda.h>
64 neighbor.populations[0],
65 para->
getParD(level)->distributionsAD.f[0],
66 neighbor.populationsAD[0],
68 neighbor.numberOfNodes,
69 para->
getParD(level)->neighborX,
70 para->
getParD(level)->neighborY,
71 para->
getParD(level)->neighborZ,
72 para->
getParD(level)->numberOfNodes,
73 para->
getParD(level)->isEvenTimestep,
75 para->
getParD(level)->numberofthreads,
86 neighbor.populations[0],
87 para->
getParD(level)->distributionsAD.f[0],
88 neighbor.populationsAD[0],
90 neighbor.numberOfNodes,
91 para->
getParD(level)->neighborX,
92 para->
getParD(level)->neighborY,
93 para->
getParD(level)->neighborZ,
94 para->
getParD(level)->numberOfNodes,
95 para->
getParD(level)->isEvenTimestep,
97 para->
getParD(level)->numberofthreads,
106 comm.
sendNonBlocking(neighbor.populations[0], neighbor.numberOfFs, neighbor.rankNeighbor);
108 comm.
sendNonBlocking(neighbor.populationsAD[0], neighbor.numberOfFs, neighbor.rankNeighbor);
116 comm.
receiveNonBlocking(neighbor.populations[0], neighbor.numberOfFs, neighbor.rankNeighbor);
118 comm.
receiveNonBlocking(neighbor.populationsAD[0], neighbor.numberOfFs, neighbor.rankNeighbor);
126#pragma omp parallel for
160 const std::optional<std::vector<LBMSimulationParameter::EdgeNodePositions>>&
edgeNodesX,
162 const std::optional<std::vector<LBMSimulationParameter::EdgeNodePositions>>&
edgeNodesY)
169 for (
size_t i = 0;
i < numberOfProcessNeighbors;
i++)
190 if (0 < numberOfProcessNeighbors)
194 for (
size_t i = 0;
i < numberOfProcessNeighbors;
i++)
221 parD.sendProcessNeighborsX, parD.recvProcessNeighborsX,
222 parH.sendProcessNeighborsX, parH.recvProcessNeighborsX);
232 parD.sendProcessNeighborsAfterFtoCX, parD.recvProcessNeighborsAfterFtoCX,
233 parH.sendProcessNeighborsAfterFtoCX, parH.recvProcessNeighborsAfterFtoCX);
270 parD.sendProcessNeighborsY, parD.recvProcessNeighborsY,
271 parH.sendProcessNeighborsY, parH.recvProcessNeighborsY,
272 parH.recvProcessNeighborsX, parH.edgeNodesXtoY);
283 parD.sendProcessNeighborsAfterFtoCY, parD.recvProcessNeighborsAfterFtoCY,
284 parH.sendProcessNeighborsAfterFtoCY, parH.recvProcessNeighborsAfterFtoCY,
285 parH.recvProcessNeighborsAfterFtoCX, parH.edgeNodesXtoY);
322 parD.sendProcessNeighborsZ, parD.recvProcessNeighborsZ,
323 parH.sendProcessNeighborsZ, parH.recvProcessNeighborsZ,
324 parH.recvProcessNeighborsX, parH.edgeNodesXtoZ,
325 parH.recvProcessNeighborsY, parH.edgeNodesYtoZ);
334 parD.sendProcessNeighborsAfterFtoCZ, parD.recvProcessNeighborsAfterFtoCZ,
335 parH.sendProcessNeighborsAfterFtoCZ, parH.recvProcessNeighborsAfterFtoCZ,
336 parH.recvProcessNeighborsAfterFtoCX, parH.edgeNodesXtoZ,
337 parH.recvProcessNeighborsAfterFtoCY, parH.edgeNodesYtoZ);
void cudaCopyProcessNeighborFsHtoD(const ProcessNeighbor27 &neighborHost, const ProcessNeighbor27 &neighborDevice) const
void cudaCopyProcessNeighborFsDtoH(const ProcessNeighbor27 &neighborHost, const ProcessNeighbor27 &neighborDevice) const
Class for LBM-parameter management.
std::unique_ptr< CudaStreamManager > & getStreamManager()
std::shared_ptr< LBMSimulationParameter > getParD(int level)
Pointer to instance of LBMSimulationParameter - stored on Device (GPU)
LBMSimulationParameter & getParHostAsReference(int level) const
LBMSimulationParameter & getParDeviceAsReference(int level) const
An abstract class for communication between processes in parallel computation.
virtual void resetRequests()=0
virtual void sendNonBlocking(real *sbuf, int count_s, int destinationRank)=0
virtual void receiveNonBlocking(real *rbuf, int count_r, int sourceRank)=0
std::shared_ptr< T > SPtr
void scatterNodesFromRecvBufferZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
void startNonBlockingMpiReceive(vf::parallel::Communicator &comm, const std::vector< ProcessNeighbor27 > &recvProcessNeighborsHost, const bool diffOn)
void exchangeCollDataYGPU27AfterFtoC(Parameter *para, vf::parallel::Communicator &comm, const CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex)
void scatterNodesFromRecvBufferZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
void scatterNodesFromRecvBufferXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
Distribute the receive nodes (x direction) from the buffer on the gpu.
void exchangeCollDataZGPU27AllNodes(Parameter *para, vf::parallel::Communicator &comm, const CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex)
void copyEdgeNodes(const std::vector< LBMSimulationParameter::EdgeNodePositions > &edgeNodes, const std::vector< ProcessNeighbor27 > &recvProcessNeighborsHost, const std::vector< ProcessNeighbor27 > &sendProcessNeighborsHost, const bool diffOn)
Copy nodes which are part of the communication in multiple directions.
void exchangeCollDataGPU27(Parameter *para, vf::parallel::Communicator &comm, const CudaMemoryManager *cudaMemoryManager, const CudaStreamIndex streamIndex, const std::vector< ProcessNeighbor27 > &sendProcessNeighborsDevice, const std::vector< ProcessNeighbor27 > &recvProcessNeighborsDevice, const std::vector< ProcessNeighbor27 > &sendProcessNeighborsHost, const std::vector< ProcessNeighbor27 > &recvProcessNeighborsHost, const std::optional< std::vector< ProcessNeighbor27 > > &recvProcessNeighborsHostX, const std::optional< std::vector< LBMSimulationParameter::EdgeNodePositions > > &edgeNodesX, const std::optional< std::vector< ProcessNeighbor27 > > &recvProcessNeighborsHostY, const std::optional< std::vector< LBMSimulationParameter::EdgeNodePositions > > &edgeNodesY)
Exchange routine for simulations on multiple gpus.
void scatterNodesFromRecvBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex, const std::vector< ProcessNeighbor27 > &recvProcessNeighborsDevice)
Distribute the receive nodes from the buffer on the gpu.
void prepareExchangeCollDataYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
void prepareExchangeCollDataYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
void startNonBlockingMpiSend(vf::parallel::Communicator &comm, const std::vector< ProcessNeighbor27 > &sendProcessNeighborsHost, const bool diffOn)
void collectNodesInSendBufferGPU(Parameter *para, int level, CudaStreamIndex streamIndex, const std::vector< ProcessNeighbor27 > &sendProcessNeighborsDevice)
Routines for data exchange when running simulations on multiple GPUs.
void scatterNodesFromRecvBufferYGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
constexpr DistributionReferences27 getDistributionReferences27(real *distributions, const unsigned long long numberOfLBnodes, const bool isEvenTimestep)
void exchangeCollDataYGPU27AllNodes(Parameter *para, vf::parallel::Communicator &comm, const CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex)
void exchangeCollDataXGPU27AllNodes(Parameter *para, vf::parallel::Communicator &comm, const CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex)
Calls exchangeCollDataXGPU27() for exchanging all nodes.
void exchangeCollDataZGPU27AfterFtoC(Parameter *para, vf::parallel::Communicator &comm, const CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex)
void scatterNodesFromRecvBufferYGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
void exchangeCollDataXGPU27AfterFtoC(Parameter *para, vf::parallel::Communicator &comm, const CudaMemoryManager *cudaMemoryManager, int level, CudaStreamIndex streamIndex)
Calls exchangeCollDataGPU27() for exchanging the nodes, which are part of the communication between t...
void prepareExchangeCollDataXGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
Collect the send nodes for communication in the x direction in a buffer on the gpu.
void prepareExchangeCollDataZGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
void prepareExchangeCollDataZGPU27AfterFtoC(Parameter *para, int level, CudaStreamIndex streamIndex)
void scatterNodesFromRecvBufferXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
Distribute the receive nodes (x direction) from the buffer on the gpu.
void prepareExchangeCollDataXGPU27AllNodes(Parameter *para, int level, CudaStreamIndex streamIndex)
Collect the send nodes for communication in the x direction in a buffer on the gpu.
constexpr void forEachDirection(F func)