openPMD-api
Series.hpp
1 /* Copyright 2017-2025 Fabian Koller, Axel Huebl, Franz Poeschel, Luca Fedeli
2  *
3  * This file is part of openPMD-api.
4  *
5  * openPMD-api is free software: you can redistribute it and/or modify
6  * it under the terms of of either the GNU General Public License or
7  * the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * openPMD-api is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License and the GNU Lesser General Public License
15  * for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * and the GNU Lesser General Public License along with openPMD-api.
19  * If not, see <http://www.gnu.org/licenses/>.
20  */
21 #pragma once
22 
23 #include "openPMD/Error.hpp"
24 #include "openPMD/IO/AbstractIOHandler.hpp"
25 #include "openPMD/IO/Access.hpp"
26 #include "openPMD/IO/Format.hpp"
27 #include "openPMD/Iteration.hpp"
28 #include "openPMD/IterationEncoding.hpp"
29 #include "openPMD/Streaming.hpp"
30 #include "openPMD/auxiliary/TypeTraits.hpp"
31 #include "openPMD/auxiliary/Variant.hpp"
32 #include "openPMD/backend/Attributable.hpp"
33 #include "openPMD/backend/Container.hpp"
34 #include "openPMD/backend/HierarchyVisitor.hpp"
35 #include "openPMD/backend/ParsePreference.hpp"
36 #include "openPMD/config.hpp"
37 #include "openPMD/snapshots/Snapshots.hpp"
38 #include "openPMD/version.hpp"
39 
40 #if openPMD_HAVE_MPI
41 #include <mpi.h>
42 #endif
43 
44 #include <cstdint> // uint64_t
45 #include <deque>
46 #include <functional>
47 #include <map>
48 #include <memory>
49 #include <optional>
50 #include <set>
51 #include <stdexcept>
52 #include <string>
53 #include <tuple>
54 #include <unordered_map>
55 #include <variant>
56 #include <vector>
57 
58 // expose private and protected members for invasive testing
59 #ifndef OPENPMD_private
60 #define OPENPMD_private private:
61 #endif
62 
63 namespace openPMD
64 {
65 class ReadIterations;
66 class StatefulIterator;
67 class Series;
68 
69 namespace internal
70 {
71  /* Just a more self-documenting boolean used for
72  * m_iterationEncodingSetExplicitly */
73  enum class default_or_explicit : bool
74  {
75  default_,
76  explicit_
77  };
89  class SeriesData final : public AttributableData
90  {
91  public:
92  explicit SeriesData() = default;
93 
94  virtual ~SeriesData();
95 
96  SeriesData(SeriesData const &) = delete;
97  SeriesData(SeriesData &&) = delete;
98 
99  SeriesData &operator=(SeriesData const &) = delete;
100  SeriesData &operator=(SeriesData &&) = delete;
101 
102  using IterationIndex_t = Iteration::IterationIndex_t;
104  Iterations iterations{};
105 
121  std::unique_ptr<StatefulIterator> m_sharedStatefulIterator;
127  std::set<IterationIndex_t> m_currentlyActiveIterations;
131  std::unordered_map<IterationIndex_t, size_t> m_snapshotToStep;
151  std::unordered_map<IterationIndex_t, std::string> m_iterationFilenames;
161  std::optional<std::string> m_overrideFilebasedFilename;
168  std::string m_name;
173  std::string m_filenamePrefix;
177  std::string m_filenamePostfix;
182  std::string m_filenameExtension;
193  /*
194  * ADIOS2 should use variable-based encoding as default rather than
195  * group-based encoding as much as possible.
196  * Since this cannot be decided at construction time, groupBased
197  * encoding is selected first, and re-decided later.
198  * However, when group-based encoding is selected by the user explcitly,
199  * that selection should not be changed again.
200  * Hence, remember that here.
201  */
202  default_or_explicit m_iterationEncodingSetExplicitly =
203  default_or_explicit::default_;
215  StepStatus m_stepStatus = StepStatus::NoStep;
219  bool m_parseLazily = false;
220  uint64_t m_hintLazyParsingAfterTimeout = 20; // seconds
221 
230 
237  std::optional<ParsePreference> m_parsePreference;
238 
239  std::optional<std::function<AbstractIOHandler *(Series &)>>
240  m_deferred_initialization = std::nullopt;
241 
242  void close();
243 
244 #if openPMD_HAVE_MPI
245  /*
246  * @todo Once we have separate MPI headers, move this there.
247  */
248  std::optional<MPI_Comm> m_communicator;
249 #endif
250 
252  {};
254  {
255  std::string value;
256  };
258  {
259  std::string value;
260  };
261 
263  {
264  Attributable m_attributable;
265  std::variant<
269  m_rankTableSource;
270  std::optional<chunk_assignment::RankMeta> m_bufferedRead;
271  };
272  RankTableData m_rankTable;
273  }; // SeriesData
274 
275  class SeriesInternal;
276 } // namespace internal
277 
287 class Series : public Attributable
288 {
289  friend class Attributable;
290  friend class Iteration;
291  friend class Writable;
292  friend class ReadIterations;
293  friend class StatefulIterator;
294  friend class internal::SeriesData;
295  friend class internal::AttributableData;
296  friend class StatefulSnapshotsContainer;
297 
298 public:
299  explicit Series();
300 
301 #if openPMD_HAVE_MPI
314  Series(
315  std::string const &filepath,
316  Access at,
317  MPI_Comm comm,
318  std::string const &options = "{}");
319 #endif
320 
367  Series(
368  std::string const &filepath,
369  Access at,
370  std::string const &options = "{}");
371 
372  Series(Series const &) = default;
373  Series(Series &&) = default;
374 
375  Series &operator=(Series const &) = default;
376  Series &operator=(Series &&) = default;
377 
378  ~Series() override = default;
379 
383  using IterationIndex_t = Iteration::IterationIndex_t;
388  Iterations iterations;
389 
396  operator bool() const;
397 
403  std::string openPMD() const;
412  Series &setOpenPMD(std::string const &openPMD);
413 
419  uint32_t openPMDextension() const;
429 
434  std::string basePath() const;
442  Series &setBasePath(std::string const &basePath);
443 
449  std::string meshesPath() const;
459  Series &setMeshesPath(std::string const &meshesPath);
460 
471  bool hasRankTableRead();
472 
484 #if openPMD_HAVE_MPI
485  chunk_assignment::RankMeta rankTable(bool collective);
486 #else
487  chunk_assignment::RankMeta rankTable(bool collective = false);
488 #endif
489 
497  Series &setRankTable(std::string const &myRankInfo);
498 
504  std::string particlesPath() const;
514  Series &setParticlesPath(std::string const &particlesPath);
515 
521  std::string author() const;
528  Series &setAuthor(std::string const &author);
529 
535  std::string software() const;
545  std::string const &newName,
546  std::string const &newVersion = std::string("unspecified"));
547 
553  std::string softwareVersion() const;
563  [[deprecated(
564  "Set the version with the second argument of setSoftware()")]] Series &
565  setSoftwareVersion(std::string const &softwareVersion);
566 
571  std::string date() const;
577  Series &setDate(std::string const &date);
578 
584  std::string softwareDependencies() const;
592  Series &setSoftwareDependencies(std::string const &newSoftwareDependencies);
593 
599  std::string machine() const;
606  Series &setMachine(std::string const &newMachine);
607 
630 
636  std::string iterationFormat() const;
648  Series &setIterationFormat(std::string const &iterationFormat);
649 
653  std::string name() const;
654 
661  Series &setName(std::string const &name);
662 
669  std::string backend() const;
670  std::string backend();
671 
679  void flush(std::string backendConfig = "{}");
680 
697 
741 
756  void parseBase();
757 
768 
779  void close();
780 
781  void visitHierarchy(HierarchyVisitor &v, bool recursive) override;
782 
786  template <typename X = void, typename... Args>
787  auto iterationFlush(Args &&...)
788  {
789  static_assert(
790  auxiliary::dependent_false_v<X>,
791  "Cannot call this on an instance of Series.");
792  }
793 
794  // clang-format off
795 OPENPMD_private
796  // clang-format on
797 
798  static constexpr char const *const BASEPATH = "/data/%T/";
799 
800  struct ParsedInput;
801  using iterations_t = decltype(internal::SeriesData::iterations);
802  using iterations_iterator = iterations_t::iterator;
803 
804  using Data_t = internal::SeriesData;
805  std::shared_ptr<Data_t> m_series = nullptr;
806 
807  inline Data_t &get()
808  {
809  if (m_series)
810  {
811  return *m_series;
812  }
813  else
814  {
815  throw std::runtime_error(
816  "[Series] Cannot use default-constructed Series.");
817  }
818  }
819 
820  inline Data_t const &get() const
821  {
822  if (m_series)
823  {
824  return *m_series;
825  }
826  else
827  {
828  throw std::runtime_error(
829  "[Series] Cannot use default-constructed Series.");
830  }
831  }
832 
833  inline void setData(std::shared_ptr<internal::SeriesData> series)
834  {
835  m_series = std::move(series);
836  iterations = m_series->iterations;
837  Attributable::setData(m_series);
838  }
839 
840  std::unique_ptr<ParsedInput> parseInput(std::string);
850  template <typename TracingJSON>
851  void parseJsonOptions(TracingJSON &options, ParsedInput &);
852  bool hasExpansionPattern(std::string filenameWithExtension);
853  bool reparseExpansionPattern(std::string filenameWithExtension);
854  template <typename... MPI_Communicator>
855  void init(
856  std::string const &filepath,
857  Access at,
858  std::string const &options,
859  MPI_Communicator &&...);
860  template <typename TracingJSON, typename... MPI_Communicator>
861  std::tuple<std::unique_ptr<ParsedInput>, TracingJSON> initIOHandler(
862  std::string const &filepath,
863  std::string const &options,
864  Access at,
865  bool resolve_generic_extension,
866  MPI_Communicator &&...);
867  void initSeries(
868  std::unique_ptr<AbstractIOHandler>, std::unique_ptr<ParsedInput>);
869  void initDefaults(IterationEncoding, bool initAll = false);
881  std::future<void> flush_impl(
882  iterations_iterator begin,
883  iterations_iterator end,
884  internal::FlushParams const &flushParams,
885  bool flushIOHandler = true);
886  void flushFileBased(
887  iterations_iterator begin,
888  iterations_iterator end,
889  internal::FlushParams const &flushParams,
890  bool flushIOHandler = true);
891  /*
892  * Group-based and variable-based iteration layouts share a lot of logic
893  * (realistically, the variable-based iteration layout only throws out
894  * one layer in the hierarchy).
895  * As a convention, methods that deal with both layouts are called
896  * .*GorVBased, short for .*GroupOrVariableBased
897  */
898  void flushGorVBased(
899  iterations_iterator begin,
900  iterations_iterator end,
901  internal::FlushParams const &flushParams,
902  bool flushIOHandler = true);
903  void flushMeshesPath();
904  void flushParticlesPath();
905  void flushRankTable();
906  /* Parameter `read_only_this_single_iteration` used for reopening an
907  * Iteration after closing it.
908  */
909  void readFileBased(
910  std::optional<IterationIndex_t> read_only_this_single_iteration);
911  void readOneIterationFileBased(std::string const &filePath);
927  std::vector<IterationIndex_t> readGorVBased(
928  bool do_always_throw_errors,
929  bool init,
930  std::optional<IterationIndex_t> read_only_this_single_iteration);
931  void readBase();
932  std::string iterationFilename(IterationIndex_t i);
933 
934  enum class IterationOpened : bool
935  {
936  HasBeenOpened,
937  RemainsClosed
938  };
939  /*
940  * For use by flushFileBased, flushGorVBased
941  * Open an iteration, but only if necessary.
942  * Only open if the iteration is dirty and if it is not in deferred
943  * parse state.
944  */
945  IterationOpened
946  openIterationIfDirty(IterationIndex_t index, Iteration &iteration);
947  /*
948  * Open an iteration. Ensures that the iteration's m_closed status
949  * is set properly and that any files pertaining to the iteration
950  * is opened.
951  * Does not create files when called in CREATE mode.
952  */
953  void openIteration(IterationIndex_t index, Iteration &iteration);
954 
959  iterations_iterator indexOf(Iteration const &);
960 
976  AdvanceMode mode,
977  internal::AttributableData &file,
978  iterations_iterator it);
979 
981 
989  void flushStep(bool doFlush);
990 
991  /*
992  * setIterationEncoding() should only be called by users of our public API,
993  * but never internally. We need to distinguish if the iteration encoding
994  * was selected explicitly or implicitly, see
995  * m_iterationEncodingSetExplicitly for further details.
996  */
997  Series &setIterationEncoding_internal(
998  IterationEncoding iterationEncoding, internal::default_or_explicit);
999 
1000  /*
1001  * Returns the current content of the /data/snapshot attribute.
1002  * (We could also add this to the public API some time)
1003  */
1004  std::optional<std::vector<IterationIndex_t>> currentSnapshot();
1005 
1006  AbstractIOHandler *runDeferredInitialization();
1007 
1008  AbstractIOHandler *IOHandler();
1009  AbstractIOHandler const *IOHandler() const;
1010 
1011  /* adios2::Mode::ReadRandomAccess does not support reading modifiable
1012  * attributes. However, we need the values of /data/snapshot as a modifiable
1013  * attribute, so this function quickly opens the file in adios2::Mode::Read
1014  * and retrieves the changings values over time.
1015  * Return std::nullopt if /data/snapshot is not present.
1016  */
1017  std::optional<std::vector<std::vector<IterationIndex_t>>>
1018  preparseSnapshots();
1019 
1020  Snapshots makeRandomAccessSnapshots();
1021  Snapshots makeSynchronousSnapshots();
1022  /* Should adios2::Variable<T>::SetStepSelection() be used for accessing
1023  * steps?
1024  */
1025  [[nodiscard]] bool randomAccessSteps() const;
1026 
1027  std::vector<std::string> availableDatasets();
1028 }; // Series
1029 
1030 namespace debug
1031 {
1032  void printDirty(Series const &);
1033 }
1034 } // namespace openPMD
1035 
1036 // Make sure that this legacy header is always included if Series.hpp is
1037 // included, otherwise Series::readIterations() cannot be used
1038 #include "openPMD/ReadIterations.hpp"
Interface for communicating between logical and physically persistent data.
Definition: AbstractIOHandler.hpp:206
Layer to manage storage of attributes associated with file objects.
Definition: Attributable.hpp:225
Definition: HierarchyVisitor.hpp:20
Logical compilation of data from one snapshot (e.g.
Definition: Iteration.hpp:172
Definition: Iteration.hpp:543
Legacy class as return type for Series::readIterations().
Definition: ReadIterations.hpp:75
Implementation for the root level of the openPMD hierarchy.
Definition: Series.hpp:288
Series & setName(std::string const &name)
Set the pattern for file names.
Definition: Series.cpp:736
std::string softwareVersion() const
Definition: Series.cpp:644
Series & setMeshesPath(std::string const &meshesPath)
Set the path to mesh records, relative(!) to basePath.
Definition: Series.cpp:275
std::string iterationFormat() const
Definition: Series.cpp:700
void flush(std::string backendConfig="{}")
Execute all required remaining IO operations to write or read data.
Definition: Series.cpp:785
auto iterationFlush(Args &&...)
This overrides Attributable::iterationFlush() which will fail on Series.
Definition: Series.hpp:787
void parseBase()
Parse the Series.
Definition: Series.cpp:3534
std::string basePath() const
Definition: Series.cpp:249
Series & setSoftwareDependencies(std::string const &newSoftwareDependencies)
Indicate dependencies of software that were used to create the file.
Definition: Series.cpp:672
iterations_iterator indexOf(Iteration const &)
Find the given iteration in Series::iterations and return an iterator into Series::iterations at that...
Definition: Series.cpp:2603
Iteration::IterationIndex_t IterationIndex_t
An unsigned integer type, used to identify Iterations in a Series.
Definition: Series.hpp:383
Series & setParticlesPath(std::string const &particlesPath)
Set the path to groups for each particle species, relative(!) to basePath.
Definition: Series.cpp:599
std::future< void > flush_impl(iterations_iterator begin, iterations_iterator end, internal::FlushParams const &flushParams, bool flushIOHandler=true)
Internal call for flushing a Series.
Definition: Series.cpp:1369
std::string machine() const
Definition: Series.cpp:678
std::string date() const
Definition: Series.cpp:655
WriteIterations writeIterations()
Entry point to the writing end of the streaming API.
Definition: Series.cpp:3539
std::string backend() const
The currently used backend.
Definition: Series.cpp:774
void close()
Close the Series and release the data storage/transport backends.
Definition: Series.cpp:3555
Series & setOpenPMD(std::string const &openPMD)
Set the version of the enforced openPMD standard.
Definition: Series.cpp:226
void flushStep(bool doFlush)
Called at the end of an IO step to store the iterations defined in the IO step to the snapshot attrib...
Definition: Series.cpp:2826
std::string openPMD() const
Definition: Series.cpp:221
Series & setAuthor(std::string const &author)
Indicate the author and contact for the information in the file.
Definition: Series.cpp:625
Snapshots snapshots()
Preferred way to access Iterations/Snapshots.
Definition: Series.cpp:3424
Series & setSoftwareVersion(std::string const &softwareVersion)
Indicate the version of the software/code/simulation that created the file.
Definition: Series.cpp:649
AdvanceStatus advance(AdvanceMode mode, internal::AttributableData &file, iterations_iterator it)
In step-based IO mode, begin or end an IO step for the given iteration.
Definition: Series.cpp:2627
void visitHierarchy(HierarchyVisitor &v, bool recursive) override
Visitor pattern for the openPMD object hierarchy in postfix traversal.
Definition: Series.cpp:3561
std::string author() const
Definition: Series.cpp:620
std::string meshesPath() const
Definition: Series.cpp:270
Series & setMachine(std::string const &newMachine)
Indicate the machine or relevant hardware that created the file.
Definition: Series.cpp:683
std::string software() const
Definition: Series.cpp:631
std::vector< IterationIndex_t > readGorVBased(bool do_always_throw_errors, bool init, std::optional< IterationIndex_t > read_only_this_single_iteration)
Note on re-parsing of a Series: If init == false, the parsing process will seek for new Iterations/Re...
Definition: Series.cpp:2072
uint32_t openPMDextension() const
Definition: Series.cpp:238
chunk_assignment::RankMeta rankTable(bool collective)
Definition: Series.cpp:329
Series & setBasePath(std::string const &basePath)
Set the common prefix for all data sets and sub-groups of a specific iteration.
Definition: Series.cpp:254
bool hasRankTableRead()
Definition: Series.cpp:312
Series & setRankTable(std::string const &myRankInfo)
Set the Mpi Ranks Meta Info attribute, i.e.
Definition: Series.cpp:460
void parseJsonOptions(TracingJSON &options, ParsedInput &)
Parse non-backend-specific configuration in JSON config.
Definition: Series.cpp:3163
std::string particlesPath() const
Definition: Series.cpp:594
Series & setDate(std::string const &date)
Indicate the date of creation.
Definition: Series.cpp:660
std::string name() const
Definition: Series.cpp:731
Series & setSoftware(std::string const &newName, std::string const &newVersion=std::string("unspecified"))
Indicate the software/code/simulation that created the file.
Definition: Series.cpp:637
std::string softwareDependencies() const
Definition: Series.cpp:666
ReadIterations readIterations()
Entry point to the reading end of the streaming API.
Definition: Series.cpp:3376
Series & setOpenPMDextension(uint32_t openPMDextension)
Set a 32-bit mask of applied extensions to the openPMD standard.
Definition: Series.cpp:243
Series & setIterationEncoding(IterationEncoding iterationEncoding)
Set the encoding style for multiple iterations in this series.
Definition: Series.cpp:694
IterationEncoding iterationEncoding() const
Definition: Series.cpp:689
Series & setIterationFormat(std::string const &iterationFormat)
Set a pattern describing how to access single iterations in the raw file.
Definition: Series.cpp:705
Entry point for accessing Snapshots/Iterations.
Definition: Snapshots.hpp:50
Based on the logic of the former class ReadIterations, integrating into itself the logic of former Wr...
Definition: StatefulIterator.hpp:204
Definition: ContainerImpls.hpp:36
Layer to mirror structure of logical data and persistent data in file.
Definition: Writable.hpp:77
Definition: Attributable.hpp:110
Data members for Series.
Definition: Series.hpp:90
std::unordered_map< IterationIndex_t, size_t > m_snapshotToStep
For reading: In which IO step do I need to look for an Iteration?
Definition: Series.hpp:131
std::unique_ptr< StatefulIterator > m_sharedStatefulIterator
Series::readIterations() returns an iterator type that modifies the state of the Series (by proceedin...
Definition: Series.hpp:121
std::string m_filenamePrefix
Filename leading up to the expansion pattern.
Definition: Series.hpp:173
std::set< IterationIndex_t > m_currentlyActiveIterations
For writing: Remember which iterations have been written in the currently active output step.
Definition: Series.hpp:127
std::unordered_map< IterationIndex_t, std::string > m_iterationFilenames
This map contains the filenames of those Iterations which were found on the file system upon opening ...
Definition: Series.hpp:151
std::optional< std::string > m_overrideFilebasedFilename
Needed if reading a single iteration of a file-based series.
Definition: Series.hpp:161
StepStatus m_stepStatus
Whether a step is currently active for this iteration.
Definition: Series.hpp:215
std::string m_name
Name of the iteration without filename suffix.
Definition: Series.hpp:168
std::optional< ParsePreference > m_parsePreference
Remember the preference that the backend specified for parsing.
Definition: Series.hpp:237
std::string m_filenamePostfix
Filename after the expansion pattern without filename extension.
Definition: Series.hpp:177
int m_filenamePadding
The padding in file-based iteration encoding.
Definition: Series.hpp:188
bool m_parseLazily
True if a user opts into lazy parsing.
Definition: Series.hpp:219
bool m_wroteAtLeastOneIOStep
In variable-based encoding, all backends except ADIOS2 can only write one single iteration.
Definition: Series.hpp:229
std::string m_filenameExtension
Filename extension as specified by the user.
Definition: Series.hpp:182
IterationEncoding m_iterationEncoding
The iteration encoding used in this series.
Definition: Series.hpp:192
Format m_format
Detected IO format (backend).
Definition: Series.hpp:207
Public definitions of openPMD-api.
Definition: Date.cpp:29
Access
File access mode to use during IO.
Definition: Access.hpp:58
AdvanceMode
In step-based mode (i.e.
Definition: Streaming.hpp:46
StepStatus
Used in step-based mode (i.e.
Definition: Streaming.hpp:57
AdvanceStatus
In step-based mode (i.e.
Definition: Streaming.hpp:32
Format
File format to use during IO.
Definition: Format.hpp:30
IterationEncoding
Encoding scheme of an Iterations Series'.
Definition: IterationEncoding.hpp:33