openPMD-api
MPIBenchmark.hpp
1 /* Copyright 2018-2021 Franz Poeschel
2  *
3  * This file is part of openPMD-api.
4  *
5  * openPMD-api is free software: you can redistribute it and/or modify
6  * it under the terms of of either the GNU General Public License or
7  * the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * openPMD-api is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License and the GNU Lesser General Public License
15  * for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * and the GNU Lesser General Public License along with openPMD-api.
19  * If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #pragma once
23 
24 #include "openPMD/config.hpp"
25 #if openPMD_HAVE_MPI
26 
27 #include "RandomDatasetFiller.hpp"
28 
29 #include "openPMD/DatatypeHelpers.hpp"
30 #include "openPMD/benchmark/mpi/BlockSlicer.hpp"
31 #include "openPMD/benchmark/mpi/DatasetFiller.hpp"
32 #include "openPMD/benchmark/mpi/MPIBenchmarkReport.hpp"
33 #include "openPMD/openPMD.hpp"
34 
35 #include <mpi.h>
36 
37 #include <chrono>
38 #include <exception>
39 #include <iostream>
40 #include <set>
41 #include <sstream>
42 #include <tuple>
43 #include <utility>
44 #include <vector>
45 
46 namespace openPMD
47 {
56 template <typename DatasetFillerProvider>
58 {
59 
60 public:
61  using extentT = Extent::value_type;
62  MPI_Comm communicator = MPI_COMM_WORLD;
63 
67  Extent totalExtent;
68 
69  std::shared_ptr<BlockSlicer> m_blockSlicer;
70 
71  DatasetFillerProvider m_dfp;
72 
88  std::string basePath,
89  Extent tExtent,
90  std::shared_ptr<BlockSlicer> blockSlicer,
91  DatasetFillerProvider dfp,
92  MPI_Comm comm = MPI_COMM_WORLD);
93 
104  void addConfiguration(
105  std::string jsonConfig,
106  std::string backend,
107  Datatype dt,
108  Series::IterationIndex_t iterations,
109  int threadSize);
110 
122  void addConfiguration(
123  std::string jsonConfig,
124  std::string backend,
125  Datatype dt,
126  Series::IterationIndex_t iterations);
127 
128  void resetConfigurations();
129 
138  template <typename Clock>
140  runBenchmark(int rootThread = 0);
141 
142 private:
143  std::string m_basePath;
144  std::vector<std::tuple<
145  std::string,
146  std::string,
147  int,
148  Datatype,
150  m_configurations;
151 
152  enum Config
153  {
154  JSON_CONFIG = 0,
155  BACKEND,
156  NRANKS,
157  DTYPE,
158  ITERATIONS
159  };
160 
161  std::pair<Offset, Extent> slice(int size);
162 
169  template <typename Clock>
170  struct BenchmarkExecution
171  {
173 
174  explicit BenchmarkExecution(
176  : m_benchmark{benchmark}
177  {}
178 
190  template <typename T>
191  typename Clock::duration writeBenchmark(
192  std::string const &jsonConfig,
193  Offset &offset,
194  Extent &extent,
195  std::string const &extension,
196  std::shared_ptr<DatasetFiller<T>> datasetFiller,
197  Series::IterationIndex_t iterations);
198 
208  template <typename T>
209  typename Clock::duration readBenchmark(
210  Offset &offset,
211  Extent &extent,
212  std::string extension,
213  Series::IterationIndex_t iterations);
214 
215  template <typename T>
216  static void call(
217  BenchmarkExecution<Clock> &,
219  int rootThread = 0);
220 
221  static constexpr char const *errorMsg = "BenchmarkExecution";
222  };
223 };
224 
225 // Implementation
226 
227 template <typename DatasetFillerProvider>
228 template <typename Clock>
231 {
232  MPIBenchmarkReport<typename Clock::duration> res{this->communicator};
233  BenchmarkExecution<Clock> exec{this};
234 
235  std::set<Datatype> datatypes;
236  for (auto const &conf : m_configurations)
237  {
238  datatypes.insert(std::get<DTYPE>(conf));
239  }
240  for (Datatype dt : datatypes)
241  {
242  switchType<BenchmarkExecution<Clock>>(dt, exec, res, rootThread);
243  }
244 
245  return res;
246 }
247 
248 template <typename DatasetFillerProvider>
250  std::string basePath,
251  Extent tExtent,
252  std::shared_ptr<BlockSlicer> blockSlicer,
253  DatasetFillerProvider dfp,
254  MPI_Comm comm)
255  : communicator{comm}
256  , totalExtent{std::move(tExtent)}
257  , m_blockSlicer{std::move(blockSlicer)}
258  , m_dfp{dfp}
259  , m_basePath{std::move(basePath)}
260 {
261  if (m_blockSlicer == nullptr)
262  throw std::runtime_error("Argument blockSlicer cannot be a nullptr!");
263 }
264 
265 template <typename DatasetFillerProvider>
266 std::pair<Offset, Extent> MPIBenchmark<DatasetFillerProvider>::slice(int size)
267 {
268  int actualSize;
269  MPI_Comm_size(this->communicator, &actualSize);
270  int rank;
271  MPI_Comm_rank(this->communicator, &rank);
272  size = std::min(size, actualSize);
273  return m_blockSlicer->sliceBlock(totalExtent, size, rank);
274 }
275 
276 template <typename DatasetFillerProvider>
278  std::string jsonConfig,
279  std::string backend,
280  Datatype dt,
281  Series::IterationIndex_t iterations,
282  int threadSize)
283 {
284  this->m_configurations.emplace_back(
285  std::move(jsonConfig), backend, threadSize, dt, iterations);
286 }
287 
288 template <typename DatasetFillerProvider>
290  std::string jsonConfig,
291  std::string backend,
292  Datatype dt,
293  Series::IterationIndex_t iterations)
294 {
295  int size;
296  MPI_Comm_size(communicator, &size);
297  addConfiguration(std::move(jsonConfig), backend, dt, iterations, size);
298 }
299 
300 template <typename DatasetFillerProvider>
302 {
303  this->m_compressions.clear();
304 }
305 
306 template <typename DatasetFillerProvider>
307 template <typename Clock>
308 template <typename T>
309 typename Clock::duration
311  std::string const &jsonConfig,
312  Offset &offset,
313  Extent &extent,
314  std::string const &extension,
315  std::shared_ptr<DatasetFiller<T>> datasetFiller,
316  Series::IterationIndex_t iterations)
317 {
318  MPI_Barrier(m_benchmark->communicator);
319  auto start = Clock::now();
320 
321  // open file for writing
322  Series series = Series(
323  m_benchmark->m_basePath + "." + extension,
325  m_benchmark->communicator,
326  jsonConfig);
327 
328  for (Series::IterationIndex_t i = 0; i < iterations; i++)
329  {
330  auto writeData = datasetFiller->produceData();
331 
333  series.iterations[i].meshes["id"][MeshRecordComponent::SCALAR];
334 
335  Datatype datatype = determineDatatype(writeData);
336  Dataset dataset = Dataset(datatype, m_benchmark->totalExtent);
337 
338  id.resetDataset(dataset);
339 
340  series.flush();
341 
342  id.storeChunk<T>(writeData, offset, extent);
343  series.flush();
344  }
345 
346  MPI_Barrier(m_benchmark->communicator);
347  auto end = Clock::now();
348 
349  // deduct the time needed for data generation
350  for (Series::IterationIndex_t i = 0; i < iterations; i++)
351  {
352  datasetFiller->produceData();
353  }
354  auto deduct = Clock::now();
355 
356  return end - start - (deduct - end);
357 }
358 
359 template <typename DatasetFillerProvider>
360 template <typename Clock>
361 template <typename T>
362 typename Clock::duration
364  Offset &offset,
365  Extent &extent,
366  std::string extension,
367  Series::IterationIndex_t iterations)
368 {
369  MPI_Barrier(m_benchmark->communicator);
370  // let every thread measure time
371  auto start = Clock::now();
372 
373  Series series = Series(
374  m_benchmark->m_basePath + "." + extension,
376  m_benchmark->communicator);
377 
378  for (Series::IterationIndex_t i = 0; i < iterations; i++)
379  {
381  series.iterations[i].meshes["id"][MeshRecordComponent::SCALAR];
382 
383  auto chunk_data = id.loadChunk<T>(offset, extent);
384  series.flush();
385  }
386 
387  MPI_Barrier(m_benchmark->communicator);
388  auto end = Clock::now();
389  return end - start;
390 }
391 
392 template <typename DatasetFillerProvider>
393 template <typename Clock>
394 template <typename T>
396  BenchmarkExecution<Clock> &exec,
398  int rootThread)
399 {
400  Datatype dt = determineDatatype<T>();
401  auto dsf = std::dynamic_pointer_cast<DatasetFiller<T>>(
402  exec.m_benchmark->m_dfp.template operator()<T>());
403  for (auto const &config : exec.m_benchmark->m_configurations)
404  {
405  std::string jsonConfig;
406  std::string backend;
407  int size;
408  Datatype dt2;
409  Series::IterationIndex_t iterations;
410  std::tie(jsonConfig, backend, size, dt2, iterations) = config;
411 
412  if (dt != dt2)
413  {
414  continue;
415  }
416 
417  auto localCuboid = exec.m_benchmark->slice(size);
418 
419  extentT blockSize = 1;
420  for (auto ext : localCuboid.second)
421  {
422  blockSize *= ext;
423  }
424  dsf->setNumberOfItems(blockSize);
425 
426  auto writeTime = exec.writeBenchmark<T>(
427  jsonConfig,
428  localCuboid.first,
429  localCuboid.second,
430  backend,
431  dsf,
432  iterations);
433  auto readTime = exec.readBenchmark<T>(
434  localCuboid.first, localCuboid.second, backend, iterations);
435  report.addReport(
436  rootThread,
437  jsonConfig,
438  backend,
439  size,
440  dt2,
441  iterations,
442  std::make_pair(writeTime, readTime));
443  }
444 }
445 } // namespace openPMD
446 
447 #endif
Definition: Dataset.hpp:35
An abstract class to create one iteration of data per thread.
Definition: DatasetFiller.hpp:35
Extent totalExtent
Total extent of the hypercuboid used in the benchmark.
Definition: MPIBenchmark.hpp:67
create new series and truncate existing (files)
MPIBenchmarkReport< typename Clock::duration > runBenchmark(int rootThread=0)
Main function for running a benchmark.
Definition: MPIBenchmark.hpp:230
void addConfiguration(std::string jsonConfig, std::string backend, Datatype dt, Series::IterationIndex_t iterations, int threadSize)
Definition: MPIBenchmark.hpp:277
Datatype
Concrete datatype of an object available at runtime.
Definition: Datatype.hpp:45
Implementation for the root level of the openPMD hierarchy.
Definition: Series.hpp:186
Class representing a benchmark.
Definition: MPIBenchmark.hpp:57
The report for a single benchmark produced by <openPMD/benchmark/mpi/MPIBenchmark>.
Definition: MPIBenchmarkReport.hpp:44
void flush(std::string backendConfig="{}")
Execute all required remaining IO operations to write or read data.
Definition: Series.cpp:397
MPIBenchmark(std::string basePath, Extent tExtent, std::shared_ptr< BlockSlicer > blockSlicer, DatasetFillerProvider dfp, MPI_Comm comm=MPI_COMM_WORLD)
Construct an MPI benchmark manually.
Definition: MPIBenchmark.hpp:249
void addReport(int rootThread, std::string jsonConfig, std::string extension, int threadSize, Datatype dt, Series::IterationIndex_t iterations, std::pair< Duration, Duration > const &report)
Add results for a certain compression strategy and level.
Definition: MPIBenchmarkReport.hpp:186
Public definitions of openPMD-api.
Iteration::IterationIndex_t IterationIndex_t
An unsigned integer type, used to identify Iterations in a Series.
Definition: Series.hpp:229
Definition: MeshRecordComponent.hpp:29
Open Series as read-only, fails if Series is not found.