BEM++  2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Groups Pages
separable_numerical_test_kernel_trial_integrator_imp.hpp
1 // Copyright (C) 2011-2012 by the BEM++ Authors
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #include "../common/common.hpp"
22 
23 #include "bempp/common/config_opencl.hpp"
24 
25 #include "separable_numerical_test_kernel_trial_integrator.hpp" // To keep IDEs happy
26 
27 #include "_2d_array.hpp"
28 #include "_3d_array.hpp"
29 #include "_4d_array.hpp"
30 
31 #include "shapeset.hpp"
32 #include "basis_data.hpp"
33 #include "conjugate.hpp"
34 #include "collection_of_shapeset_transformations.hpp"
35 #include "geometrical_data.hpp"
36 #include "collection_of_kernels.hpp"
37 #include "opencl_handler.hpp"
38 #include "raw_grid_geometry.hpp"
39 #include "test_kernel_trial_integral.hpp"
40 #include "types.hpp"
41 #include "CL/separable_numerical_double_integrator.cl.str"
42 
43 #include "../common/auto_timer.hpp"
44 
45 #include <cassert>
46 #include <memory>
47 
48 namespace Fiber
49 {
50 
51 template <typename BasisFunctionType, typename KernelType,
52  typename ResultType, typename GeometryFactory>
53 SeparableNumericalTestKernelTrialIntegrator<
54 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
55 SeparableNumericalTestKernelTrialIntegrator(
56  const arma::Mat<CoordinateType>& localTestQuadPoints,
57  const arma::Mat<CoordinateType>& localTrialQuadPoints,
58  const std::vector<CoordinateType>& testQuadWeights,
59  const std::vector<CoordinateType>& trialQuadWeights,
60  const GeometryFactory& testGeometryFactory,
61  const GeometryFactory& trialGeometryFactory,
62  const RawGridGeometry<CoordinateType>& testRawGeometry,
63  const RawGridGeometry<CoordinateType>& trialRawGeometry,
64  const CollectionOfShapesetTransformations<CoordinateType>& testTransformations,
65  const CollectionOfKernels<KernelType>& kernels,
66  const CollectionOfShapesetTransformations<CoordinateType>& trialTransformations,
67  const TestKernelTrialIntegral<BasisFunctionType, KernelType, ResultType>& integral,
68  const OpenClHandler& openClHandler,
69  bool cacheGeometricalData) :
70  m_localTestQuadPoints(localTestQuadPoints),
71  m_localTrialQuadPoints(localTrialQuadPoints),
72  m_testQuadWeights(testQuadWeights),
73  m_trialQuadWeights(trialQuadWeights),
74  m_testGeometryFactory(testGeometryFactory),
75  m_trialGeometryFactory(trialGeometryFactory),
76  m_testRawGeometry(testRawGeometry),
77  m_trialRawGeometry(trialRawGeometry),
78  m_testTransformations(testTransformations),
79  m_kernels(kernels),
80  m_trialTransformations(trialTransformations),
81  m_integral(integral),
82  m_openClHandler(openClHandler),
83  m_cacheGeometricalData(cacheGeometricalData)
84 {
85  if (localTestQuadPoints.n_cols != testQuadWeights.size())
86  throw std::invalid_argument("SeparableNumericalTestKernelTrialIntegrator::"
87  "SeparableNumericalTestKernelTrialIntegrator(): "
88  "numbers of test points and weights do not match");
89  if (localTrialQuadPoints.n_cols != trialQuadWeights.size())
90  throw std::invalid_argument("SeparableNumericalTestKernelTrialIntegrator::"
91  "SeparableNumericalTestKernelTrialIntegrator(): "
92  "numbers of trial points and weights do not match");
93 
94 #ifdef WITH_OPENCL
95  if (openClHandler.UseOpenCl()) {
96  // push integration points to CL device
97  clTestQuadPoints = openClHandler.pushMatrix<CoordinateType> (localTestQuadPoints);
98  clTrialQuadPoints = openClHandler.pushMatrix<CoordinateType> (localTrialQuadPoints);
99  clTestQuadWeights = openClHandler.pushVector<CoordinateType> (testQuadWeights);
100  clTrialQuadWeights = openClHandler.pushVector<CoordinateType> (trialQuadWeights);
101  }
102 #endif
103 
104  if (cacheGeometricalData)
105  precalculateGeometricalData();
106 }
107 
108 template <typename BasisFunctionType, typename KernelType,
109  typename ResultType, typename GeometryFactory>
110 SeparableNumericalTestKernelTrialIntegrator<
111 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
112 ~SeparableNumericalTestKernelTrialIntegrator()
113 {
114 #ifdef WITH_OPENCL
115  if (m_openClHandler.UseOpenCl()) {
116  delete clTestQuadPoints;
117  delete clTrialQuadPoints;
118  delete clTestQuadWeights;
119  delete clTrialQuadWeights;
120  }
121 #endif
122 }
123 
124 template <typename BasisFunctionType, typename KernelType,
125  typename ResultType, typename GeometryFactory>
126 void SeparableNumericalTestKernelTrialIntegrator<
127 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
128 precalculateGeometricalData()
129 {
130  size_t testBasisDeps = 0, trialBasisDeps = 0; // ignored in this function
131  size_t testGeomDeps = 0, trialGeomDeps = 0;
132 
133  m_testTransformations.addDependencies(testBasisDeps, testGeomDeps);
134  m_trialTransformations.addDependencies(trialBasisDeps, trialGeomDeps);
135  m_kernels.addGeometricalDependencies(testGeomDeps, trialGeomDeps);
136  m_integral.addGeometricalDependencies(testGeomDeps, trialGeomDeps);
137 
138  precalculateGeometricalDataOnSingleGrid(
139  m_localTestQuadPoints, m_testGeometryFactory,
140  m_testRawGeometry, testGeomDeps, m_cachedTestGeomData);
141  precalculateGeometricalDataOnSingleGrid(
142  m_localTrialQuadPoints, m_trialGeometryFactory,
143  m_trialRawGeometry, trialGeomDeps, m_cachedTrialGeomData);
144 }
145 
146 template <typename BasisFunctionType, typename KernelType,
147  typename ResultType, typename GeometryFactory>
148 void SeparableNumericalTestKernelTrialIntegrator<
149 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
150 precalculateGeometricalDataOnSingleGrid(
151  const arma::Mat<CoordinateType>& localQuadPoints,
152  const GeometryFactory& geometryFactory,
153  const RawGridGeometry<CoordinateType>& rawGeometry,
154  size_t geomDeps,
155  std::vector<GeometricalData<CoordinateType> >& geomData)
156 {
157  geomData.resize(rawGeometry.elementCount());
158 
159  typedef typename GeometryFactory::Geometry Geometry;
160  std::auto_ptr<Geometry> geometry = geometryFactory.make();
161  for (size_t e = 0; e < rawGeometry.elementCount(); ++e) {
162  rawGeometry.setupGeometry(e, *geometry);
163  geometry->getData(geomDeps, localQuadPoints, geomData[e]);
164  }
165 }
166 
167 template <typename BasisFunctionType, typename KernelType,
168  typename ResultType, typename GeometryFactory>
169 void SeparableNumericalTestKernelTrialIntegrator<
170 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
171 integrate(
172  CallVariant callVariant,
173  const std::vector<int>& elementIndicesA,
174  int elementIndexB,
175  const Shapeset<BasisFunctionType>& basisA,
176  const Shapeset<BasisFunctionType>& basisB,
177  LocalDofIndex localDofIndexB,
178  const std::vector<arma::Mat<ResultType>*>& result) const
179 {
180  if (m_openClHandler.UseOpenCl())
181  {
182  integrateCl (callVariant, elementIndicesA, elementIndexB, basisA, basisB,
183  localDofIndexB, result);
184  }
185  else
186  {
187  integrateCpu (callVariant, elementIndicesA, elementIndexB, basisA, basisB,
188  localDofIndexB, result);
189  }
190 }
191 
192 template <typename BasisFunctionType, typename KernelType,
193  typename ResultType, typename GeometryFactory>
194 void SeparableNumericalTestKernelTrialIntegrator<
195 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
196 integrateCpu(
197  CallVariant callVariant,
198  const std::vector<int>& elementIndicesA,
199  int elementIndexB,
200  const Shapeset<BasisFunctionType>& basisA,
201  const Shapeset<BasisFunctionType>& basisB,
202  LocalDofIndex localDofIndexB,
203  const std::vector<arma::Mat<ResultType>*>& result) const
204 {
205  const int testPointCount = m_localTestQuadPoints.n_cols;
206  const int trialPointCount = m_localTrialQuadPoints.n_cols;
207  const int elementACount = elementIndicesA.size();
208 
209  if (result.size() != elementIndicesA.size())
210  throw std::invalid_argument(
211  "SeparableNumericalTestKernelTrialIntegrator::integrate(): "
212  "arrays 'result' and 'elementIndicesA' must have the same number "
213  "of elements");
214  if (testPointCount == 0 || trialPointCount == 0 || elementACount == 0)
215  return;
216  // TODO: in the (pathological) case that pointCount == 0 but
217  // geometryCount != 0, set elements of result to 0.
218 
219  // Evaluate constants
220 
221  const int dofCountA = basisA.size();
222  const int dofCountB = localDofIndexB == ALL_DOFS ? basisB.size() : 1;
223  const int testDofCount = callVariant == TEST_TRIAL ? dofCountA : dofCountB;
224  const int trialDofCount = callVariant == TEST_TRIAL ? dofCountB : dofCountA;
225 
226  BasisData<BasisFunctionType> testBasisData, trialBasisData;
227  GeometricalData<CoordinateType>* testGeomData = &m_testGeomData.local();
228  GeometricalData<CoordinateType>* trialGeomData = &m_trialGeomData.local();
229  const GeometricalData<CoordinateType>* constTestGeomData = testGeomData;
230  const GeometricalData<CoordinateType>* constTrialGeomData = trialGeomData;
231 
232  size_t testBasisDeps = 0, trialBasisDeps = 0;
233  size_t testGeomDeps = 0, trialGeomDeps = 0;
234 
235  m_testTransformations.addDependencies(testBasisDeps, testGeomDeps);
236  m_trialTransformations.addDependencies(trialBasisDeps, trialGeomDeps);
237  m_kernels.addGeometricalDependencies(testGeomDeps, trialGeomDeps);
238  m_integral.addGeometricalDependencies(testGeomDeps, trialGeomDeps);
239 
240  typedef typename GeometryFactory::Geometry Geometry;
241  std::auto_ptr<Geometry> geometryA, geometryB;
242  const RawGridGeometry<CoordinateType> *rawGeometryA = 0, *rawGeometryB = 0;
243  if (!m_cacheGeometricalData) {
244  if (callVariant == TEST_TRIAL)
245  {
246  geometryA = m_testGeometryFactory.make();
247  geometryB = m_trialGeometryFactory.make();
248  rawGeometryA = &m_testRawGeometry;
249  rawGeometryB = &m_trialRawGeometry;
250  }
251  else
252  {
253  geometryA = m_trialGeometryFactory.make();
254  geometryB = m_testGeometryFactory.make();
255  rawGeometryA = &m_trialRawGeometry;
256  rawGeometryB = &m_testRawGeometry;
257  }
258  }
259 
260  CollectionOf3dArrays<BasisFunctionType> testValues, trialValues;
261  CollectionOf4dArrays<KernelType> kernelValues;
262 
263  for (size_t i = 0; i < result.size(); ++i) {
264  assert(result[i]);
265  result[i]->set_size(testDofCount, trialDofCount);
266  }
267 
268  if (!m_cacheGeometricalData)
269  rawGeometryB->setupGeometry(elementIndexB, *geometryB);
270  if (callVariant == TEST_TRIAL)
271  {
272  basisA.evaluate(testBasisDeps, m_localTestQuadPoints, ALL_DOFS, testBasisData);
273  basisB.evaluate(trialBasisDeps, m_localTrialQuadPoints, localDofIndexB, trialBasisData);
274  if (m_cacheGeometricalData)
275  constTrialGeomData = &m_cachedTrialGeomData[elementIndexB];
276  else
277  geometryB->getData(trialGeomDeps, m_localTrialQuadPoints, *trialGeomData);
278  m_trialTransformations.evaluate(trialBasisData, *constTrialGeomData, trialValues);
279  }
280  else
281  {
282  basisA.evaluate(trialBasisDeps, m_localTrialQuadPoints, ALL_DOFS, trialBasisData);
283  basisB.evaluate(testBasisDeps, m_localTestQuadPoints, localDofIndexB, testBasisData);
284  if (m_cacheGeometricalData)
285  constTestGeomData = &m_cachedTestGeomData[elementIndexB];
286  else
287  geometryB->getData(testGeomDeps, m_localTestQuadPoints, *testGeomData);
288  m_testTransformations.evaluate(testBasisData, *constTestGeomData, testValues);
289  }
290 
291  // Iterate over the elements
292  for (int indexA = 0; indexA < elementACount; ++indexA)
293  {
294  if (!m_cacheGeometricalData)
295  rawGeometryA->setupGeometry(elementIndicesA[indexA], *geometryA);
296  if (callVariant == TEST_TRIAL)
297  {
298  if (m_cacheGeometricalData)
299  constTestGeomData = &m_cachedTestGeomData[elementIndicesA[indexA]];
300  else
301  geometryA->getData(testGeomDeps, m_localTestQuadPoints, *testGeomData);
302  m_testTransformations.evaluate(testBasisData, *constTestGeomData, testValues);
303  }
304  else
305  {
306  if (m_cacheGeometricalData)
307  constTrialGeomData = &m_cachedTrialGeomData[elementIndicesA[indexA]];
308  else
309  geometryA->getData(trialGeomDeps, m_localTrialQuadPoints, *trialGeomData);
310  m_trialTransformations.evaluate(trialBasisData, *constTrialGeomData, trialValues);
311  }
312 
313  m_kernels.evaluateOnGrid(*constTestGeomData, *constTrialGeomData, kernelValues);
314  m_integral.evaluateWithTensorQuadratureRule(
315  *constTestGeomData, *constTrialGeomData, testValues, trialValues,
316  kernelValues, m_testQuadWeights, m_trialQuadWeights,
317  *result[indexA]);
318  }
319 }
320 
321 template <typename BasisFunctionType, typename KernelType,
322  typename ResultType, typename GeometryFactory>
323 void
324 SeparableNumericalTestKernelTrialIntegrator<
325 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
326 integrateCl(
327  CallVariant callVariant,
328  const std::vector<int>& elementIndicesA,
329  int elementIndexB,
330  const Shapeset<BasisFunctionType>& basisA,
331  const Shapeset<BasisFunctionType>& basisB,
332  LocalDofIndex localDofIndexB,
333  const std::vector<arma::Mat<ResultType>*>& result) const
334 {
335 //#ifdef WITH_OPENCL
336 // // DEBUG: test latency
337 // {
338 // int bufsize = 10;
339 // std::vector<ResultType> buf(bufsize);
340 // tbb::tick_count t0 = tbb::tick_count::now();
341 // cl::Buffer *clbuf = m_openClHandler.pushVector<ResultType> (buf);
342 // tbb::tick_count::interval_t dt1 = tbb::tick_count::now()-t0;
343 
344 // t0 = tbb::tick_count::now();
345 // m_openClHandler.pullVector<ResultType> (*clbuf, buf, bufsize);
346 // tbb::tick_count::interval_t dt2 = tbb::tick_count::now()-t0;
347 
348 // static int callcount = 0;
349 // std::cout << callcount++ << "\tpush: " << dt1.seconds() << "\tpull: " << dt2.seconds() << std::endl;
350 // }
351 
352 // // tbb::tick_count t_start, t_end, t0, t1;
353 // // tbb::tick_count::interval_t dt_buf, dt_kern, dt_prog, dt_pull;
354 
355 // // t_start = tbb::tick_count::now();
356 
357 // // Temporary code. TODO: add support for multiple-term expressions.
358 // if (!m_testTransformation.isTrivial() || !m_trialTransformation.isTrivial())
359 // throw std::runtime_error("SeparableNumericalTestKernelTrialIntegrator::"
360 // "integrateCl(): multiple-term expression lists are "
361 // "not supported in the OpenCL mode yet");
362 // const Expression<CoordinateType>& m_testExpression = m_testTransformation.term(0);
363 // const Expression<CoordinateType>& m_trialExpression = m_trialTransformation.term(0);
364 
365 // const int testPointCount = m_localTestQuadPoints.n_cols;
366 // const int trialPointCount = m_localTrialQuadPoints.n_cols;
367 // const int elementACount = elementIndicesA.size();
368 // const int pointDim = m_localTestQuadPoints.n_rows;
369 // const int meshDim = m_openClHandler.meshGeom().size.dim;
370 
371 // if (testPointCount == 0 || trialPointCount == 0 || elementACount == 0)
372 // return;
373 // // TODO: in the (pathological) case that pointCount == 0 but
374 // // geometryCount != 0, set elements of result to 0.
375 
376 // // Evaluate constants
377 // const int testComponentCount = m_testExpression.codomainDimension();
378 // const int trialComponentCount = m_trialExpression.codomainDimension();
379 // const int dofCountA = basisA.size();
380 // const int dofCountB = localDofIndexB == ALL_DOFS ? basisB.size() : 1;
381 // const int testDofCount = callVariant == TEST_TRIAL ? dofCountA : dofCountB;
382 // const int trialDofCount = callVariant == TEST_TRIAL ? dofCountB : dofCountA;
383 
384 // const int kernelRowCount = m_kernels.codomainDimension();
385 // const int kernelColCount = m_kernels.domainDimension();
386 
387 // // Assert that the kernel tensor dimensions are compatible
388 // // with the number of components of the functions
389 
390 // // TODO: This will need to be modified once we allow scalar-valued kernels
391 // // (treated as if they were multiplied by the unit tensor) with
392 // // vector-valued functions
393 // assert(testComponentCount == kernelRowCount);
394 // assert(kernelColCount == trialComponentCount);
395 
396 // int argIdx;
397 // int testBasisDeps = 0, trialBasisDeps = 0;
398 // int testGeomDeps = INTEGRATION_ELEMENTS;
399 // int trialGeomDeps = INTEGRATION_ELEMENTS;
400 
401 // cl::Buffer *clElementIndicesA;
402 // cl::Buffer *clGlobalTrialPoints;
403 // cl::Buffer *clGlobalTestPoints;
404 // cl::Buffer *clGlobalTrialNormals;
405 // cl::Buffer *clTrialValues;
406 // cl::Buffer *clTestValues;
407 // cl::Buffer *clTrialIntegrationElements;
408 // cl::Buffer *clTestIntegrationElements;
409 // cl::Buffer *clResult;
410 
411 // m_testExpression.addDependencies(testBasisDeps, testGeomDeps);
412 // m_trialExpression.addDependencies(trialBasisDeps, trialGeomDeps);
413 // m_kernels.addGeometricalDependencies(testGeomDeps, trialGeomDeps);
414 
415 // result.set_size(testDofCount, trialDofCount, elementACount);
416 
417 // // t0 = tbb::tick_count::now();
418 // clElementIndicesA = m_openClHandler.pushVector<int> (elementIndicesA);
419 // clResult = m_openClHandler.createBuffer<ResultType> (testDofCount*trialDofCount*elementACount,
420 // CL_MEM_WRITE_ONLY);
421 // // dt_buf += tbb::tick_count::now()-t0;
422 
423 // // Build the OpenCL program
424 // // t0 = tbb::tick_count::now();
425 // cl::Program::Sources sources;
426 // sources.push_back (m_openClHandler.initStr());
427 // sources.push_back (basisA.clCodeString(true));
428 // sources.push_back (basisB.clCodeString(false));
429 // sources.push_back (m_kernels.evaluateClCode());
430 // sources.push_back (clStrIntegrateRowOrCol());
431 // m_openClHandler.loadProgramFromStringArray (sources);
432 // // dt_prog += tbb::tick_count::now()-t0;
433 
434 // // Call the CL kernels to map the trial and test quadrature points
435 // if (callVariant == TEST_TRIAL)
436 // {
437 // // t0 = tbb::tick_count::now();
438 // clGlobalTestPoints = m_openClHandler.createBuffer<CoordinateType>(
439 // elementACount*testPointCount*meshDim, CL_MEM_READ_WRITE);
440 // clTestIntegrationElements = m_openClHandler.createBuffer<CoordinateType>(
441 // elementACount*testPointCount, CL_MEM_READ_WRITE);
442 // // dt_buf += tbb::tick_count::now()-t0;
443 // cl::Kernel &clMapTest = m_openClHandler.setKernel ("clMapPointsToElements");
444 // argIdx = m_openClHandler.SetGeometryArgs (clMapTest, 0);
445 // clMapTest.setArg (argIdx++, *clTestQuadPoints);
446 // clMapTest.setArg (argIdx++, testPointCount);
447 // clMapTest.setArg (argIdx++, pointDim);
448 // clMapTest.setArg (argIdx++, *clElementIndicesA);
449 // clMapTest.setArg (argIdx++, elementACount);
450 // clMapTest.setArg (argIdx++, *clGlobalTestPoints);
451 // clMapTest.setArg (argIdx++, *clTestIntegrationElements);
452 
453 // // t0 = tbb::tick_count::now();
454 // m_openClHandler.enqueueKernel (cl::NDRange(elementACount, testPointCount));
455 // // dt_kern += tbb::tick_count::now()-t0;
456 
457 // // t0 = tbb::tick_count::now();
458 // clGlobalTrialPoints = m_openClHandler.createBuffer<CoordinateType> (
459 // trialPointCount*meshDim, CL_MEM_READ_WRITE);
460 // clGlobalTrialNormals = m_openClHandler.createBuffer<CoordinateType> (
461 // trialPointCount*meshDim, CL_MEM_READ_WRITE);
462 // clTrialIntegrationElements = m_openClHandler.createBuffer<CoordinateType>(
463 // trialPointCount, CL_MEM_READ_WRITE);
464 // // dt_buf += tbb::tick_count::now()-t0;
465 // cl::Kernel &clMapTrial = m_openClHandler.setKernel ("clMapPointsAndNormalsToElement");
466 // argIdx = m_openClHandler.SetGeometryArgs (clMapTrial, 0);
467 // clMapTrial.setArg (argIdx++, *clTestQuadPoints);
468 // clMapTrial.setArg (argIdx++, trialPointCount);
469 // clMapTrial.setArg (argIdx++, pointDim);
470 // clMapTrial.setArg (argIdx++, elementIndexB);
471 // clMapTrial.setArg (argIdx++, *clGlobalTrialPoints);
472 // clMapTrial.setArg (argIdx++, *clGlobalTrialNormals);
473 // clMapTrial.setArg (argIdx++, *clTrialIntegrationElements);
474 // // t0 = tbb::tick_count::now();
475 // m_openClHandler.enqueueKernel (cl::NDRange(trialPointCount));
476 // // dt_kern += tbb::tick_count::now()-t0;
477 
478 // // t0 = tbb::tick_count::now();
479 // clTestValues = m_openClHandler.createBuffer<BasisFunctionType> (
480 // elementACount*testPointCount*testDofCount, CL_MEM_READ_WRITE);
481 // // dt_buf += tbb::tick_count::now()-t0;
482 // cl::Kernel &clBasisTest = m_openClHandler.setKernel ("clBasisfAElements");
483 // argIdx = m_openClHandler.SetGeometryArgs (clBasisTest, 0);
484 // clBasisTest.setArg (argIdx++, *clElementIndicesA);
485 // clBasisTest.setArg (argIdx++, elementACount);
486 // clBasisTest.setArg (argIdx++, *clTestQuadPoints);
487 // clBasisTest.setArg (argIdx++, testPointCount);
488 // clBasisTest.setArg (argIdx++, pointDim);
489 // clBasisTest.setArg (argIdx++, testDofCount);
490 // clBasisTest.setArg (argIdx++, *clTestValues);
491 // // t0 = tbb::tick_count::now();
492 // m_openClHandler.enqueueKernel (cl::NDRange(elementACount, testPointCount));
493 // // dt_kern += tbb::tick_count::now()-t0;
494 
495 // // t0 = tbb::tick_count::now();
496 // clTrialValues = m_openClHandler.createBuffer<BasisFunctionType> (
497 // trialPointCount*trialDofCount, CL_MEM_READ_WRITE);
498 // // dt_buf += tbb::tick_count::now()-t0;
499 // cl::Kernel &clBasisTrial = m_openClHandler.setKernel ("clBasisfBElement");
500 // argIdx = m_openClHandler.SetGeometryArgs (clBasisTrial, 0);
501 // clBasisTrial.setArg (argIdx++, elementIndexB);
502 // clBasisTrial.setArg (argIdx++, *clTrialQuadPoints);
503 // clBasisTrial.setArg (argIdx++, trialPointCount);
504 // clBasisTrial.setArg (argIdx++, pointDim);
505 // clBasisTrial.setArg (argIdx++, trialDofCount);
506 // clBasisTrial.setArg (argIdx++, localDofIndexB);
507 // clBasisTrial.setArg (argIdx++, *clTrialValues);
508 // // t0 = tbb::tick_count::now();
509 // m_openClHandler.enqueueKernel (cl::NDRange(trialPointCount));
510 // // dt_kern += tbb::tick_count::now()-t0;
511 // }
512 // else
513 // {
514 // // t0 = tbb::tick_count::now();
515 // clGlobalTrialPoints = m_openClHandler.createBuffer<CoordinateType> (
516 // elementACount*trialPointCount*meshDim, CL_MEM_READ_WRITE);
517 // clGlobalTrialNormals = m_openClHandler.createBuffer<CoordinateType> (
518 // elementACount*trialPointCount*meshDim, CL_MEM_READ_WRITE);
519 // clTrialIntegrationElements = m_openClHandler.createBuffer<CoordinateType> (
520 // elementACount*trialPointCount, CL_MEM_READ_WRITE);
521 // // dt_buf += tbb::tick_count::now()-t0;
522 // cl::Kernel &clMapTrial = m_openClHandler.setKernel ("clMapPointsAndNormalsToElements");
523 // argIdx = m_openClHandler.SetGeometryArgs (clMapTrial, 0);
524 // clMapTrial.setArg (argIdx++, *clTrialQuadPoints);
525 // clMapTrial.setArg (argIdx++, trialPointCount);
526 // clMapTrial.setArg (argIdx++, pointDim);
527 // clMapTrial.setArg (argIdx++, *clElementIndicesA);
528 // clMapTrial.setArg (argIdx++, elementACount);
529 // clMapTrial.setArg (argIdx++, *clGlobalTrialPoints);
530 // clMapTrial.setArg (argIdx++, *clGlobalTrialNormals);
531 // clMapTrial.setArg (argIdx++, *clTrialIntegrationElements);
532 // // t0 = tbb::tick_count::now();
533 // m_openClHandler.enqueueKernel (cl::NDRange(elementACount, trialPointCount));
534 // // dt_kern += tbb::tick_count::now()-t0;
535 
536 // // t0 = tbb::tick_count::now();
537 // clGlobalTestPoints = m_openClHandler.createBuffer<CoordinateType> (
538 // testPointCount*meshDim, CL_MEM_READ_WRITE);
539 // clTestIntegrationElements = m_openClHandler.createBuffer<CoordinateType> (
540 // testPointCount, CL_MEM_READ_WRITE);
541 // // dt_buf += tbb::tick_count::now()-t0;
542 // cl::Kernel &clMapTest = m_openClHandler.setKernel ("clMapPointsToElement");
543 // argIdx = m_openClHandler.SetGeometryArgs (clMapTest, 0);
544 // clMapTest.setArg (argIdx++, *clTestQuadPoints);
545 // clMapTest.setArg (argIdx++, testPointCount);
546 // clMapTest.setArg (argIdx++, pointDim);
547 // clMapTest.setArg (argIdx++, elementIndexB);
548 // clMapTest.setArg (argIdx++, *clGlobalTestPoints);
549 // clMapTest.setArg (argIdx++, *clTestIntegrationElements);
550 // // t0 = tbb::tick_count::now();
551 // m_openClHandler.enqueueKernel (cl::NDRange(testPointCount));
552 // // dt_kern += tbb::tick_count::now()-t0;
553 
554 // // t0 = tbb::tick_count::now();
555 // clTrialValues = m_openClHandler.createBuffer<BasisFunctionType> (
556 // elementACount*trialPointCount*trialDofCount, CL_MEM_READ_WRITE);
557 // // dt_buf += tbb::tick_count::now()-t0;
558 // cl::Kernel &clBasisTrial = m_openClHandler.setKernel ("clBasisfAElements");
559 // argIdx = m_openClHandler.SetGeometryArgs (clBasisTrial, 0);
560 // clBasisTrial.setArg (argIdx++, *clElementIndicesA);
561 // clBasisTrial.setArg (argIdx++, elementACount);
562 // clBasisTrial.setArg (argIdx++, *clTrialQuadPoints);
563 // clBasisTrial.setArg (argIdx++, trialPointCount);
564 // clBasisTrial.setArg (argIdx++, pointDim);
565 // clBasisTrial.setArg (argIdx++, trialDofCount);
566 // clBasisTrial.setArg (argIdx++, *clTrialValues);
567 // // t0 = tbb::tick_count::now();
568 // m_openClHandler.enqueueKernel (cl::NDRange(elementACount, trialPointCount));
569 // // dt_kern += tbb::tick_count::now()-t0;
570 
571 // // t0 = tbb::tick_count::now();
572 // clTestValues = m_openClHandler.createBuffer<BasisFunctionType> (
573 // testPointCount*testDofCount, CL_MEM_READ_WRITE);
574 // // dt_buf += tbb::tick_count::now()-t0;
575 // cl::Kernel &clBasisTest = m_openClHandler.setKernel ("clBasisfBElement");
576 // argIdx = m_openClHandler.SetGeometryArgs (clBasisTest, 0);
577 // clBasisTest.setArg (argIdx++, elementIndexB);
578 // clBasisTest.setArg (argIdx++, *clTestQuadPoints);
579 // clBasisTest.setArg (argIdx++, testPointCount);
580 // clBasisTest.setArg (argIdx++, pointDim);
581 // clBasisTest.setArg (argIdx++, testDofCount);
582 // clBasisTest.setArg (argIdx++, localDofIndexB);
583 // clBasisTest.setArg (argIdx++, *clTestValues);
584 // // t0 = tbb::tick_count::now();
585 // m_openClHandler.enqueueKernel (cl::NDRange(testPointCount));
586 // // dt_kern += tbb::tick_count::now()-t0;
587 // }
588 
589 // // Build the OpenCL kernel
590 // cl::Kernel &clKernel = m_openClHandler.setKernel ("clIntegrate");
591 
592 // // Set kernel arguments
593 // argIdx = m_openClHandler.SetGeometryArgs (clKernel, 0);
594 // clKernel.setArg (argIdx++, *clGlobalTrialPoints);
595 // clKernel.setArg (argIdx++, *clGlobalTestPoints);
596 // clKernel.setArg (argIdx++, *clGlobalTrialNormals);
597 // clKernel.setArg (argIdx++, *clTrialIntegrationElements);
598 // clKernel.setArg (argIdx++, *clTestIntegrationElements);
599 // clKernel.setArg (argIdx++, *clTrialValues);
600 // clKernel.setArg (argIdx++, *clTestValues);
601 // clKernel.setArg (argIdx++, *clTrialQuadWeights);
602 // clKernel.setArg (argIdx++, *clTestQuadWeights);
603 // clKernel.setArg (argIdx++, trialPointCount);
604 // clKernel.setArg (argIdx++, testPointCount);
605 // clKernel.setArg (argIdx++, trialComponentCount);
606 // clKernel.setArg (argIdx++, testComponentCount);
607 // clKernel.setArg (argIdx++, trialDofCount);
608 // clKernel.setArg (argIdx++, testDofCount);
609 // clKernel.setArg (argIdx++, elementACount);
610 // clKernel.setArg (argIdx++, callVariant == TEST_TRIAL ? 1:0);
611 // clKernel.setArg (argIdx++, *clElementIndicesA);
612 // clKernel.setArg (argIdx++, elementIndexB);
613 // clKernel.setArg (argIdx++, *clResult);
614 
615 
616 // // Run the CL kernel
617 // // t0 = tbb::tick_count::now();
618 // m_openClHandler.enqueueKernel (cl::NDRange(elementACount));
619 // // dt_kern += tbb::tick_count::now()-t0;
620 
621 // // Copy results back
622 // // t0 = tbb::tick_count::now();
623 // m_openClHandler.pullCube<ResultType> (*clResult, result);
624 // // dt_pull += tbb::tick_count::now()-t0;
625 
626 // // Clean up local device buffers
627 // delete clElementIndicesA;
628 // delete clGlobalTrialPoints;
629 // delete clGlobalTestPoints;
630 // delete clGlobalTrialNormals;
631 // delete clTestValues;
632 // delete clTrialValues;
633 // delete clTrialIntegrationElements;
634 // delete clTestIntegrationElements;
635 // delete clResult;
636 
637 // // t_end = tbb::tick_count::now();
638 // // static int callcount = 0;
639 // // std::cout << callcount++ << '\t' << (t_end-t_start).seconds()
640 // // << "\tprog=" << dt_prog.seconds()
641 // // << "\tbuf=" << dt_buf.seconds()
642 // // << "\tkern=" << dt_kern.seconds()
643 // // << "\tpull=" << dt_pull.seconds()
644 // // << std::endl;
645 
646 //#else
647 // throw std::runtime_error ("Trying to call OpenCL method without OpenCL support");
648 //#endif // WITH_OPENCL
649 }
650 
651 
652 template <typename BasisFunctionType, typename KernelType,
653  typename ResultType, typename GeometryFactory>
654 void
655 SeparableNumericalTestKernelTrialIntegrator<
656 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
657 integrate(const std::vector<ElementIndexPair>& elementIndexPairs,
658  const Shapeset<BasisFunctionType>& testShapeset,
659  const Shapeset<BasisFunctionType>& trialShapeset,
660  const std::vector<arma::Mat<ResultType>*>& result) const
661 {
662  if (m_openClHandler.UseOpenCl())
663  {
664  integrateCl (elementIndexPairs, testShapeset, trialShapeset, result);
665  }
666  else
667  {
668  integrateCpu (elementIndexPairs, testShapeset, trialShapeset, result);
669  }
670 }
671 
672 template <typename BasisFunctionType, typename KernelType,
673  typename ResultType, typename GeometryFactory>
674 void
675 SeparableNumericalTestKernelTrialIntegrator<
676 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
677 integrateCpu(
678  const std::vector<ElementIndexPair>& elementIndexPairs,
679  const Shapeset<BasisFunctionType>& testShapeset,
680  const Shapeset<BasisFunctionType>& trialShapeset,
681  const std::vector<arma::Mat<ResultType>*>& result) const
682 {
683  const int testPointCount = m_localTestQuadPoints.n_cols;
684  const int trialPointCount = m_localTrialQuadPoints.n_cols;
685  const int geometryPairCount = elementIndexPairs.size();
686 
687  if (result.size() != elementIndexPairs.size())
688  throw std::invalid_argument(
689  "NonseparableNumericalTestKernelTrialIntegrator::integrate(): "
690  "arrays 'result' and 'elementIndicesA' must have the same number "
691  "of elements");
692  if (testPointCount == 0 || trialPointCount == 0 || geometryPairCount == 0)
693  return;
694  // TODO: in the (pathological) case that pointCount == 0 but
695  // geometryPairCount != 0, set elements of result to 0.
696 
697  // Evaluate constants
698 
699  const int testDofCount = testShapeset.size();
700  const int trialDofCount = trialShapeset.size();
701 
702  BasisData<BasisFunctionType> testBasisData, trialBasisData;
703  GeometricalData<CoordinateType>* testGeomData = &m_testGeomData.local();
704  GeometricalData<CoordinateType>* trialGeomData = &m_trialGeomData.local();
705  const GeometricalData<CoordinateType>* constTestGeomData = testGeomData;
706  const GeometricalData<CoordinateType>* constTrialGeomData = trialGeomData;
707 
708  size_t testBasisDeps = 0, trialBasisDeps = 0;
709  size_t testGeomDeps = 0, trialGeomDeps = 0;
710 
711  m_testTransformations.addDependencies(testBasisDeps, testGeomDeps);
712  m_trialTransformations.addDependencies(trialBasisDeps, trialGeomDeps);
713  m_kernels.addGeometricalDependencies(testGeomDeps, trialGeomDeps);
714  m_integral.addGeometricalDependencies(testGeomDeps, trialGeomDeps);
715 
716  typedef typename GeometryFactory::Geometry Geometry;
717  std::auto_ptr<Geometry> testGeometry;
718  std::auto_ptr<Geometry> trialGeometry;
719  if (!m_cacheGeometricalData) {
720  testGeometry = m_testGeometryFactory.make();
721  trialGeometry = m_trialGeometryFactory.make();
722  }
723 
724  CollectionOf3dArrays<BasisFunctionType> testValues, trialValues;
725  CollectionOf4dArrays<KernelType> kernelValues;
726 
727  for (size_t i = 0; i < result.size(); ++i) {
728  assert(result[i]);
729  result[i]->set_size(testDofCount, trialDofCount);
730  }
731 
732  testShapeset.evaluate(testBasisDeps, m_localTestQuadPoints, ALL_DOFS, testBasisData);
733  trialShapeset.evaluate(trialBasisDeps, m_localTrialQuadPoints, ALL_DOFS, trialBasisData);
734 
735  // Iterate over the elements
736  for (int pairIndex = 0; pairIndex < geometryPairCount; ++pairIndex)
737  {
738  if (m_cacheGeometricalData) {
739  constTestGeomData = &m_cachedTestGeomData[elementIndexPairs[pairIndex].first];
740  constTrialGeomData = &m_cachedTrialGeomData[elementIndexPairs[pairIndex].second];
741  } else {
742  m_testRawGeometry.setupGeometry(elementIndexPairs[pairIndex].first, *testGeometry);
743  m_trialRawGeometry.setupGeometry(elementIndexPairs[pairIndex].second, *trialGeometry);
744  testGeometry->getData(testGeomDeps, m_localTestQuadPoints, *testGeomData);
745  trialGeometry->getData(trialGeomDeps, m_localTrialQuadPoints, *trialGeomData);
746  }
747  m_testTransformations.evaluate(testBasisData, *constTestGeomData, testValues);
748  m_trialTransformations.evaluate(trialBasisData, *constTrialGeomData, trialValues);
749 
750  m_kernels.evaluateOnGrid(*constTestGeomData, *constTrialGeomData, kernelValues);
751  m_integral.evaluateWithTensorQuadratureRule(
752  *constTestGeomData, *constTrialGeomData, testValues, trialValues,
753  kernelValues, m_testQuadWeights, m_trialQuadWeights,
754  *result[pairIndex]);
755  }
756 }
757 
758 
759 template <typename BasisFunctionType, typename KernelType,
760  typename ResultType, typename GeometryFactory>
761 void SeparableNumericalTestKernelTrialIntegrator<
762 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
763 integrateCl(
764  const std::vector<ElementIndexPair>& elementIndexPairs,
765  const Shapeset<BasisFunctionType>& testShapeset,
766  const Shapeset<BasisFunctionType>& trialShapeset,
767  const std::vector<arma::Mat<ResultType>*>& result) const
768 {
769 //#ifdef WITH_OPENCL
770 // const int testPointCount = m_localTestQuadPoints.n_cols;
771 // const int trialPointCount = m_localTrialQuadPoints.n_cols;
772 // const int geometryPairCount = elementIndexPairs.size();
773 // const int pointDim = m_localTestQuadPoints.n_rows;
774 // const int meshDim = m_openClHandler.meshGeom().size.dim;
775 
776 // if (testPointCount == 0 || trialPointCount == 0 || geometryPairCount == 0)
777 // return;
778 // // TODO: in the (pathological) case that pointCount == 0 but
779 // // geometryPairCount != 0, set elements of result to 0.
780 
781 // // TODO: add support for multiple-term expressions.
782 // if (!m_testTransformation.isTrivial() || !m_trialTransformation.isTrivial())
783 // throw std::runtime_error("SeparableNumericalTestKernelTrialIntegrator::"
784 // "integrateCl(): multiple-term expression lists are "
785 // "not supported in the OpenCL mode yet");
786 // const Expression<CoordinateType>& m_testExpression = m_testTransformation.term(0);
787 // const Expression<CoordinateType>& m_trialExpression = m_trialTransformation.term(0);
788 
789 // // Evaluate constants
790 // const int testComponentCount = m_testExpression.codomainDimension();
791 // const int trialComponentCount = m_trialExpression.codomainDimension();
792 // const int testDofCount = testShapeset.size();
793 // const int trialDofCount = trialShapeset.size();
794 
795 // const int kernelRowCount = m_kernels.codomainDimension();
796 // const int kernelColCount = m_kernels.domainDimension();
797 
798 // // Assert that the kernel tensor dimensions are compatible
799 // // with the number of components of the functions
800 
801 // const bool scalarKernel = (kernelRowCount == 1 && kernelColCount == 1);
802 // if (scalarKernel)
803 // assert(testComponentCount == trialComponentCount);
804 // else
805 // {
806 // assert(testComponentCount == kernelRowCount);
807 // assert(kernelColCount == trialComponentCount);
808 // }
809 
810 // result.set_size(testDofCount, trialDofCount, geometryPairCount);
811 
812 // int argIdx;
813 
814 // // define device buffers
815 // cl::Buffer *clElementIndexA;
816 // cl::Buffer *clElementIndexB;
817 // cl::Buffer *clGlobalTrialPoints;
818 // cl::Buffer *clGlobalTestPoints;
819 // cl::Buffer *clGlobalTrialNormals;
820 // cl::Buffer *clTrialIntegrationElements;
821 // cl::Buffer *clTestIntegrationElements;
822 // cl::Buffer *clTrialValues;
823 // cl::Buffer *clTestValues;
824 // cl::Buffer *clResult;
825 
826 // // Build the OpenCL program
827 // //std::vector<std::string> sources;
828 // cl::Program::Sources sources;
829 // sources.push_back (m_openClHandler.initStr());
830 // sources.push_back (testShapeset.clCodeString(true));
831 // sources.push_back (trialShapeset.clCodeString(false));
832 // sources.push_back (m_kernels.evaluateClCode());
833 // sources.push_back (clStrIntegrateRowOrCol());
834 // m_openClHandler.loadProgramFromStringArray (sources);
835 
836 // // we need to separate the two index lists
837 // std::vector<int> elementIndexA(geometryPairCount);
838 // std::vector<int> elementIndexB(geometryPairCount);
839 // for (int i = 0; i < geometryPairCount; i++) {
840 // elementIndexA[i] = elementIndexPairs[i].first;
841 // elementIndexB[i] = elementIndexPairs[i].second;
842 // }
843 
844 // // push the element index pairs
845 // clElementIndexA = m_openClHandler.pushVector<int> (elementIndexA);
846 // clElementIndexB = m_openClHandler.pushVector<int> (elementIndexB);
847 // clGlobalTestPoints = m_openClHandler.createBuffer<CoordinateType> (
848 // geometryPairCount*testPointCount*meshDim, CL_MEM_READ_WRITE);
849 // clGlobalTrialPoints = m_openClHandler.createBuffer<CoordinateType> (
850 // geometryPairCount*trialPointCount*meshDim, CL_MEM_READ_WRITE);
851 // clGlobalTrialNormals = m_openClHandler.createBuffer<CoordinateType> (
852 // geometryPairCount*trialPointCount*meshDim, CL_MEM_READ_WRITE);
853 // clTestIntegrationElements = m_openClHandler.createBuffer<CoordinateType> (
854 // geometryPairCount*testPointCount, CL_MEM_READ_WRITE);
855 // clTrialIntegrationElements = m_openClHandler.createBuffer<CoordinateType> (
856 // geometryPairCount*trialPointCount, CL_MEM_READ_WRITE);
857 // clTestValues = m_openClHandler.createBuffer<BasisFunctionType> (
858 // geometryPairCount*testPointCount*testDofCount, CL_MEM_READ_WRITE);
859 // clTrialValues = m_openClHandler.createBuffer<BasisFunctionType> (
860 // geometryPairCount*trialPointCount*trialDofCount, CL_MEM_READ_WRITE);
861 // clResult = m_openClHandler.createBuffer<ResultType> (testDofCount*trialDofCount*geometryPairCount,
862 // CL_MEM_WRITE_ONLY);
863 
864 // // Call the CL kernels to map the trial and test quadrature points
865 // cl::Kernel &clMapTest = m_openClHandler.setKernel ("clMapPointsToElements");
866 // argIdx = m_openClHandler.SetGeometryArgs (clMapTest, 0);
867 // clMapTest.setArg (argIdx++, *clTestQuadPoints);
868 // clMapTest.setArg (argIdx++, testPointCount);
869 // clMapTest.setArg (argIdx++, pointDim);
870 // clMapTest.setArg (argIdx++, *clElementIndexA);
871 // clMapTest.setArg (argIdx++, geometryPairCount);
872 // clMapTest.setArg (argIdx++, *clGlobalTestPoints);
873 // clMapTest.setArg (argIdx++, *clTestIntegrationElements);
874 // m_openClHandler.enqueueKernel (cl::NDRange(geometryPairCount, testPointCount));
875 
876 // cl::Kernel &clMapTrial = m_openClHandler.setKernel ("clMapPointsAndNormalsToElements");
877 // argIdx = m_openClHandler.SetGeometryArgs (clMapTrial, 0);
878 // clMapTrial.setArg (argIdx++, *clTrialQuadPoints);
879 // clMapTrial.setArg (argIdx++, trialPointCount);
880 // clMapTrial.setArg (argIdx++, pointDim);
881 // clMapTrial.setArg (argIdx++, *clElementIndexB);
882 // clMapTrial.setArg (argIdx++, *clGlobalTrialPoints);
883 // clMapTrial.setArg (argIdx++, *clGlobalTrialNormals);
884 // clMapTrial.setArg (argIdx++, *clTrialIntegrationElements);
885 // m_openClHandler.enqueueKernel (cl::NDRange(geometryPairCount, trialPointCount));
886 
887 // cl::Kernel &clBasisTest = m_openClHandler.setKernel ("clBasisAElements");
888 // argIdx = m_openClHandler.SetGeometryArgs (clBasisTest, 0);
889 // clBasisTest.setArg (argIdx++, *clElementIndexA);
890 // clBasisTest.setArg (argIdx++, geometryPairCount);
891 // clBasisTest.setArg (argIdx++, *clTestQuadPoints);
892 // clBasisTest.setArg (argIdx++, testPointCount);
893 // clBasisTest.setArg (argIdx++, pointDim);
894 // clBasisTest.setArg (argIdx++, testDofCount);
895 // clBasisTest.setArg (argIdx++, *clTestValues);
896 // m_openClHandler.enqueueKernel (cl::NDRange(geometryPairCount, testPointCount));
897 
898 // cl::Kernel &clBasisTrial = m_openClHandler.setKernel ("clBasisBElements");
899 // argIdx = m_openClHandler.SetGeometryArgs (clBasisTrial, 0);
900 // clBasisTest.setArg (argIdx++, *clElementIndexB);
901 // clBasisTest.setArg (argIdx++, geometryPairCount);
902 // clBasisTest.setArg (argIdx++, *clTrialQuadPoints);
903 // clBasisTest.setArg (argIdx++, trialPointCount);
904 // clBasisTest.setArg (argIdx++, pointDim);
905 // clBasisTest.setArg (argIdx++, trialDofCount);
906 // clBasisTest.setArg (argIdx++, *clTrialValues);
907 // m_openClHandler.enqueueKernel (cl::NDRange(geometryPairCount, trialPointCount));
908 
909 // // Build the OpenCL kernel
910 // cl::Kernel &clKernel = m_openClHandler.setKernel (scalarKernel ?
911 // "clIntegratePairsScalar" : "clIntegratePairs");
912 
913 // // Set kernel arguments
914 // argIdx = m_openClHandler.SetGeometryArgs (clKernel, 0);
915 // clKernel.setArg (argIdx++, *clGlobalTrialPoints);
916 // clKernel.setArg (argIdx++, *clGlobalTestPoints);
917 // clKernel.setArg (argIdx++, *clGlobalTrialNormals);
918 // clKernel.setArg (argIdx++, *clTrialIntegrationElements);
919 // clKernel.setArg (argIdx++, *clTestIntegrationElements);
920 // clKernel.setArg (argIdx++, *clTrialValues);
921 // clKernel.setArg (argIdx++, *clTestValues);
922 // clKernel.setArg (argIdx++, *clTrialQuadWeights);
923 // clKernel.setArg (argIdx++, *clTestQuadWeights);
924 // clKernel.setArg (argIdx++, trialPointCount);
925 // clKernel.setArg (argIdx++, testPointCount);
926 // clKernel.setArg (argIdx++, trialComponentCount);
927 // clKernel.setArg (argIdx++, testComponentCount);
928 // clKernel.setArg (argIdx++, trialDofCount);
929 // clKernel.setArg (argIdx++, testDofCount);
930 // clKernel.setArg (argIdx++, geometryPairCount);
931 // clKernel.setArg (argIdx++, *clElementIndexA);
932 // clKernel.setArg (argIdx++, *clElementIndexB);
933 // clKernel.setArg (argIdx++, *clResult);
934 
935 // // Run the CL kernel
936 // m_openClHandler.enqueueKernel (cl::NDRange(geometryPairCount));
937 
938 // // Copy results back
939 // m_openClHandler.pullCube<ResultType> (*clResult, result);
940 
941 // // clean up local device buffers
942 // delete clElementIndexA;
943 // delete clElementIndexB;
944 // delete clGlobalTestPoints;
945 // delete clGlobalTrialPoints;
946 // delete clGlobalTrialNormals;
947 // delete clTrialIntegrationElements;
948 // delete clTestIntegrationElements;
949 // delete clTestValues;
950 // delete clTrialValues;
951 // delete clResult;
952 
953 //#else
954 // throw std::runtime_error ("Trying to call OpenCL method without OpenCL support");
955 //#endif // WITH_OPENCL
956 }
957 
958 template <typename BasisFunctionType, typename KernelType,
959  typename ResultType, typename GeometryFactory>
960 const std::pair<const char*,int>
961 SeparableNumericalTestKernelTrialIntegrator<
962 BasisFunctionType, KernelType, ResultType, GeometryFactory>::
963 clStrIntegrateRowOrCol () const
964 {
965  return std::make_pair (separable_numerical_double_integrator_cl,
966  separable_numerical_double_integrator_cl_len);
967 }
968 
969 } // namespace Fiber
virtual void addGeometricalDependencies(size_t &testGeomDeps, size_t &trialGeomDeps) const =0
Retrieve types of geometrical data on which the kernels depend.
virtual void evaluateOnGrid(const GeometricalData< CoordinateType > &testGeomData, const GeometricalData< CoordinateType > &trialGeomData, CollectionOf4dArrays< ValueType > &result) const =0
Evaluate the kernels on a tensor grid of test and trial points.