Alsvinn  0.5.3
The fast FVM simulator with UQ support
structure_common_cuda.hpp
Go to the documentation of this file.
1 #pragma once
2 #include <thrust/device_vector.h>
4 
5 namespace alsfvm {
6 
7 namespace functional {
8 
9 
10 
16 template<alsfvm::boundary::Type BoundaryType, class PowerClass>
17 __global__ void computeStructureCubeKernel(real* output,
19  int h,
20  int nx, int ny, int nz, int ngx, int ngy, int ngz,
21  real p, int dimensions) {
22  const int index = threadIdx.x + blockIdx.x * blockDim.x;
23 
24  if (index >= nx * ny * nz) {
25  return;
26  }
27 
28  const int i = index % nx;
29  const int j = (index / nx) % ny;
30  const int k = (index / nx / ny);
31 
32 
33  output[index] = 0;
34  forEachPointInComputeStructureCube<BoundaryType>([&] (double u, double u_h) {
35  output[index] += PowerClass::power(fabs(u - u_h), p);
36  }, input, i, j, k, h, nx, ny, nz, ngx, ngy, ngz, dimensions);
37 
38 }
39 
40 
41 template<alsfvm::boundary::Type BoundaryType, class PowerClass, class BufferClass>
43  const alsfvm::volume::Volume& input,
44  BufferClass& buffer,
45 
46  size_t numberOfH, double p) {
47  for (size_t var = 0; var < input.getNumberOfVariables(); ++var) {
48  auto inputView = input[var]->getView();
49 
50  auto outputMemory = output[var]->getHostMemory();
51  auto outputView = outputMemory->getView();
52 
53  const int ngx = int(input.getNumberOfXGhostCells());
54  const int ngy = int(input.getNumberOfYGhostCells());
55  const int ngz = int(input.getNumberOfZGhostCells());
56 
57  const int nx = int(input.getNumberOfXCells());
58  const int ny = int(input.getNumberOfYCells());
59  const int nz = int(input.getNumberOfZCells());
60 
61  buffer.resize(nx * ny * nz);
62  const int dimensions = input.getDimensions();
63 
64  for (int h = 1; h < numberOfH; ++h) {
65  // CUDA can't handle too large kernels with pow
66  const int threads = (p < 6 && p == int(p)) ? 1024 : 512;
67  const int size = nx * ny * nz;
68  const int blockNumber = (size + threads - 1) / threads;
69 
70 
71  computeStructureCubeKernel<BoundaryType, PowerClass>
72  <<< blockNumber, threads>>>
73  (thrust::raw_pointer_cast(
74  buffer.data()),
75  inputView,
76  h, nx, ny, nz, ngx, ngy, ngz, p, dimensions);
77 
78 
79  real structureResult = thrust::reduce(buffer.begin(),
80  buffer.end(),
81  0.0, thrust::plus<real>());
82 
83  outputView.at(h) += structureResult / (nx * ny * nz);
84  }
85 
86  if (!output[var]->isOnHost()) {
87  output[var]->copyFrom(*outputMemory);
88  }
89 
90  }
91 }
92 
93 
94 template<alsfvm::boundary::Type BoundaryType>
96  const alsfvm::volume::Volume& input, thrust::device_vector<real>& buffer,
97  int numberOfH, double p) {
98  if (p == 1.0) {
99  computeStructureCubeCUDA < BoundaryType, alsutils::math::FastPower<1>>
100  (output, input, buffer, numberOfH, p);
101  } else if (p == 2.0) {
102  computeStructureCubeCUDA <BoundaryType, alsutils::math::FastPower<2>>
103  (output, input, buffer, numberOfH, p);
104  }
105 
106  else if (p == 3.0) {
107  computeStructureCubeCUDA < BoundaryType, alsutils::math::FastPower<3>>
108  (output, input, buffer, numberOfH, p);
109  } else if (p == 4.0) {
110  computeStructureCubeCUDA < BoundaryType, alsutils::math::FastPower<4>>
111  (output, input, buffer, numberOfH, p);
112  } else if (p == 5.0) {
113  computeStructureCubeCUDA < BoundaryType, alsutils::math::FastPower<5>>
114  (output, input, buffer, numberOfH, p);
115  } else {
116  computeStructureCubeCUDA < BoundaryType, alsutils::math::PowPower>
117  (output, input, buffer, numberOfH, p);
118  }
119 }
120 }
121 
122 }
virtual size_t getNumberOfYCells() const
Definition: Volume.cpp:208
The Volume class represents a volume (a collection of cells with values for each cell (eg...
Definition: Volume.hpp:30
virtual size_t getNumberOfVariables() const
getNumberOfVariables gets the number of variables used
Definition: Volume.cpp:83
size_t nx
Definition: VolumeFactory.cpp:87
double real
Definition: types.hpp:65
void dispatchComputeStructureCubeCUDA(alsfvm::volume::Volume &output, const alsfvm::volume::Volume &input, thrust::device_vector< real > &buffer, int numberOfH, double p)
Definition: structure_common_cuda.hpp:95
virtual size_t getNumberOfZCells() const
Definition: Volume.cpp:215
virtual size_t getNumberOfXGhostCells() const
Definition: Volume.cpp:238
__global__ void computeStructureCubeKernel(real *output, alsfvm::memory::View< const real > input, int h, int nx, int ny, int nz, int ngx, int ngy, int ngz, real p, int dimensions)
Definition: structure_common_cuda.hpp:17
virtual size_t getNumberOfZGhostCells() const
Definition: Volume.cpp:254
size_t ny
Definition: VolumeFactory.cpp:88
virtual size_t getNumberOfYGhostCells() const
Definition: Volume.cpp:246
virtual size_t getNumberOfXCells() const
Definition: Volume.cpp:201
virtual size_t getDimensions() const
Gets the number of space dimensions.
Definition: Volume.cpp:279
Definition: View.hpp:32
size_t nz
Definition: VolumeFactory.cpp:89
void computeStructureCubeCUDA(alsfvm::volume::Volume &output, const alsfvm::volume::Volume &input, BufferClass &buffer, size_t numberOfH, double p)
Definition: structure_common_cuda.hpp:42
Various utility functions to implement the tecno flux.
Definition: types.hpp:30
float p
Definition: sodshocktube.py:5
functional
Definition: FunctionalStatistics.cpp:76