hdf5: dataset: init -> initbase + hdf5-mpi: compiles; TODO: tests

This commit is contained in:
Christian Zimmermann 2024-10-30 23:54:21 -07:00
parent f597254d5d
commit 1663f7ae9a
14 changed files with 414 additions and 30 deletions

View file

@ -16,4 +16,8 @@ if(ENABLE_mpi)
add_subdirectory(opt/mpi) add_subdirectory(opt/mpi)
endif() endif()
if(ENABLE_hdf5 AND ENABLE_mpi)
add_subdirectory(opt/hdf5-mpi)
endif()
install(DIRECTORY include/ DESTINATION ${INSTALL_PATH}/include/cnorxz) install(DIRECTORY include/ DESTINATION ${INSTALL_PATH}/include/cnorxz)

View file

@ -0,0 +1,33 @@
find_package(HDF5 REQUIRED COMPONENTS C HL)
if(HDF5_FOUND)
include_directories(${HDF5_INCLUDE_DIRS})
else()
message(FATAL_ERROR "HDF5 not found")
endif()
message(STATUS "hdf5 libs = ${HDF5_LIBRARIES}")
set(HDF5_LIBS ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})
find_package(MPI REQUIRED)
if(MPI_FOUND)
include_directories(${MPI_C_INCLUDE_DIRS})
else()
message(FATAL_ERROR "MPI not found")
endif()
message(STATUS "mpi lib = ${MPI_C_LIBRARIES}")
set(MPI_LIBS ${MPI_LIBRARIES})
if(HDF5_IS_PARALLEL)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../hdf5/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../mpi/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
add_subdirectory(lib)
#add_subdirectory(tests)
install(CODE "execute_process(COMMAND sed -i \"s|CXZ_H5_MPI_BUILD_MODE 1|CXZ_H5_MPI_BUILD_MODE 0|g;\" ${CMAKE_CURRENT_SOURCE_DIR}/include/h5_mpi_base.h)")
install(DIRECTORY include/ DESTINATION ${INSTALL_PATH}/include/cnorxz/hdf5-mpi)
install(CODE "execute_process(COMMAND sed -i \"s|CXZ_H5_MPI_BUILD_MODE 0|CXZ_H5_MPI_BUILD_MODE 1|g;\" ${CMAKE_CURRENT_SOURCE_DIR}/include/h5_mpi_base.h)")
else()
message(WARNING "no parallel support in available HDF5 library")
endif()

View file

@ -0,0 +1,19 @@
#ifndef __cxz_h5_mpi_base_h__
#define __cxz_h5_mpi_base_h__
#define CXZ_H5_MPI_BUILD_MODE 1
#if CXZ_H5_MPI_BUILD_MODE
#include "cnorxz_mpi.h"
#include "cnorxz_hdf5.h"
#else
#include "mpi/cnorxz_mpi.h"
#include "hdf5/cnorxz_hdf5.h"
#endif
#endif

View file

@ -0,0 +1,50 @@
// -*- C++ -*-
/**
@file opt/hdf5-mpi/include/h5_dataset.cc.h
@brief Implementation of template member functions of RDataset and SRDataset.
Copyright (c) 2024 Christian Zimmermann. All rights reserved.
Mail: chizeta@f3l.de
**/
#ifndef __cxz_h5_rdataset_cc_h__
#define __cxz_h5_rdataset_cc_h__
#include "h5_rdataset.h"
namespace CNORXZ
{
namespace hdf5
{
template <typename T>
RDataset& RDataset::init(const mpi::RArray<T>& data)
{
const hid_t tid = getTypeId(*data.data());
if(data.begin().formatIsTrivial()){
init(data.range(), tid, data.data());
}
else {
CXZ_ERROR("Got array type with non-trivial format; non-contiguous data formats are not supported yet!");
}
return *this;
}
template <typename T>
SRDataset<T>::SRDataset(const String& name, const ContentBase* _parent) :
RDataset(name, _parent)
{}
template <typename T>
mpi::RArray<T> SRDataset<T>::read(const RangePtr& geom) const
{
RangePtr rr = mpi::rrange(mFileRange, geom);
mpi::RArray<T> out(rr);
readbase(out.data(), rr, nullptr);
return out;
}
}
}

View file

@ -12,6 +12,13 @@
#ifndef __cxz_h5_rdataset_h__ #ifndef __cxz_h5_rdataset_h__
#define __cxz_h5_rdataset_h__ #define __cxz_h5_rdataset_h__
//#include "hdf5/h5_types.h"
//#include "hdf5/h5_content_base.h"
//#include "hdf5/h5_dataset.h"
//#include "mpi/mpi_base.h"
#include "h5_mpi_base.h"
namespace CNORXZ namespace CNORXZ
{ {
namespace hdf5 namespace hdf5
@ -19,8 +26,7 @@ namespace CNORXZ
/** **** /** ****
Class to handle hdf5 datasets on multiple ranks. Class to handle hdf5 datasets on multiple ranks.
*/ */
template <typename T> class RDataset : public Dataset
class RDataset : public SDataset<T>
{ {
public: public:
DEFAULT_MEMBERS(RDataset); /**< Default constructors and assignments. */ DEFAULT_MEMBERS(RDataset); /**< Default constructors and assignments. */
@ -31,26 +37,44 @@ namespace CNORXZ
*/ */
RDataset(const String& name, const ContentBase* _parent); RDataset(const String& name, const ContentBase* _parent);
virtual RDataset& initbase(const RangePtr& fileRange, hid_t type) override;
virtual RDataset& writebase(const RangePtr& writeRange, Sptr<YIndex> pos,
const void* data) override;
virtual void readbase(void* dest, RangePtr readrange, Sptr<YIndex> beg) const override;
/** Initalize the dataset. /** Initalize the dataset.
@param data Array containing the dataset. @param data Array containing the dataset.
*/ */
template <typename T> template <typename T>
RDataset& init(const mpi::RArray<T>& data); Dataset& init(const mpi::RArray<T>& data);
/** Read the dataset.
@return Array containing the dataset values.
*/
mpi::RArray<T> read() const;
/** Read a given subset of the dataset.
The subset needs to be hypercubic.
@param beg Index indicating the global begin edge of the hypercube.
@param end Index indicating the global end edge of the hypercube (inclusive).
@return Array containing the dataset values.
*/
template <class I, typename M>
mpi::RArray<T> read(const IndexInterface<I,M>& beg, const IndexInterface<I,M>& end) const;
private:
bool checkHaveParallel() const;
}; };
/** ****
Class to handle hdf5 datasets on multiple ranks,
the value type is assumed to be known at compile time.
@tparam T Dataset value type.
*/
template <typename T>
class SRDataset : public RDataset
{
public:
DEFAULT_MEMBERS(SRDataset); /**< Default constructors and assignments. */
/** Construct the class.
@param name Dataset name.
@param _parent Parent content object.
*/
SRDataset(const String& name, const ContentBase* _parent);
/** Read the dataset.
@param geom Geometry of the created array.
@return Array containing the dataset values.
*/
mpi::RArray<T> read(const RangePtr& geom) const;
};
}
}
#endif #endif

View file

@ -0,0 +1,45 @@
// -*- C++ -*-
/**
@file opt/hdf5/include/h5_rfile.h
@brief RFile declaration.
Copyright (c) 2024 Christian Zimmermann. All rights reserved.
Mail: chizeta@f3l.de
**/
#ifndef __cxz_h5_rfile_h__
#define __cxz_h5_rfile_h__
//#include "h5_file.h"
#include "h5_mpi_base.h"
namespace CNORXZ
{
namespace hdf5
{
/** ****
Class to handle hdf5 file objects with parallel I/O.
Objects of this type usually serve as root object
so they don't have any parent.
*/
class RFile : public File
{
public:
DEFAULT_MEMBERS(RFile); /**< Default constructors and assignments. */
/** Construct the class.
@param fname Path to the hdf5 file to be handled.
@param _ro Open in read-only mode if true, otherwise have write access.
*/
RFile(const String& fname, bool _ro = true);
/** Destructor. Release all involved hdf5 ids. */
~RFile();
};
}
}
#endif

View file

@ -0,0 +1,18 @@
set(libcnorxzhdf5mpi_a_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/h5_rfile.cc
${CMAKE_CURRENT_SOURCE_DIR}/h5_rdataset.cc
)
add_library(cnorxzhdf5mpi_obj OBJECT
${libcnorxzhdf5mpi_a_SOURCES}
)
set_target_properties(cnorxzhdf5mpi_obj PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
add_library(cnorxzhdf5mpi SHARED
$<TARGET_OBJECTS:cnorxzhdf5mpi_obj>
)
set_target_properties(cnorxzhdf5mpi PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
install(TARGETS cnorxzhdf5mpi LIBRARY DESTINATION ${INSTALL_PATH}/lib)

View file

@ -0,0 +1,157 @@
// -*- C++ -*-
/**
@file opt/hdf5-mpi/lib/h5_rdataset.cc
@brief RDataset implementations.
Copyright (c) 2024 Christian Zimmermann. All rights reserved.
Mail: chizeta@f3l.de
**/
#include "h5_rdataset.h"
#include "h5_rfile.h"
namespace CNORXZ
{
namespace hdf5
{
RDataset::RDataset(const String& name, const ContentBase* _parent) :
Dataset(name, _parent)
{
CXZ_ASSERT( checkHaveParallel(), "tried to open dataset in parallel mode while file"
<< parent()->filename() << " was opened in serial mode");
if(exists()){
open();
}
}
RDataset& RDataset::initbase(const RangePtr& fileRange, hid_t type)
{
RangePtr fr = fileRange;
if(fr->stype() == "R"){
const RangePtr local = fr->sub(1);
const RangePtr geom = fr->sub(0);
const SizeT ndims = local->dim();
//CXZ_ASSERT(ndims == geom->dim(), "")
Vector<RangePtr> rs(ndims);
for(SizeT i = 0; i != ndims; ++i){
const SizeT ext = local->savesub(i)->size()*geom->savesub(i)->size();
rs[i] = CRangeFactory(ext).create();
}
fr = yrange(rs);
}
Dataset::initbase(fr, type);
return *this;
}
RDataset& RDataset::writebase(const RangePtr& writeRange, Sptr<YIndex> pos, const void* data)
{
//bool todo = true;
RangePtr dr = writeRange;
if(dr->stype() == "R"){
dr = writeRange->sub(1);
}
CXZ_ASSERT(dr->dim() == mFileRange->dim(), "dimension of data range ("
<< dr->dim() << ") different from dimension of file range ("
<< mFileRange->dim() << ")");
Vector<hsize_t> offset(mFileRange->dim());
if(dr->stype() == "R"){
mpi::RIndex<YIndex,YIndex> idx(writeRange);
idx.localize();
const SizeT rat = mpi::getNumRanks() / idx.rankI()->lmax().val();
assert(rat == 1); // for now...
assert(mpi::getRankNumber() == idx.rankI()->lex());
for(SizeT i = 0; i != offset.size(); ++i){
offset[i] = idx.rankI()->pack().get(i)->lex() * dr->savesub(i)->size();
}
}
if(pos){
CXZ_ASSERT(pos->range()->dim() == mFileRange->dim(), "dimension of position index ("
<< pos->range()->dim() << ") different from dimension of file range ("
<< mFileRange->dim() << ")");
for(SizeT i = 0; i != offset.size(); ++i){
offset[i] += pos->pack().get(i)->lex();
}
}
Vector<hsize_t> dims(dr->dim());
for(SizeT i = 0; i != dims.size(); ++i){
dims[i] = dr->sub(i)->size();
}
H5Sselect_hyperslab(mFilespace, H5S_SELECT_SET, offset.data(), NULL, dims.data(), NULL);
const hid_t memspace = H5Screate_simple(dims.size(), dims.data(), NULL);
const hid_t xfer_plist_id = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio(xfer_plist_id, H5FD_MPIO_COLLECTIVE);
H5Dwrite(mId, mType, memspace, mFilespace, xfer_plist_id, data);
H5Pclose(xfer_plist_id);
H5Sclose(memspace);
return *this;
}
void Dataset::readbase(void* dest, RangePtr readRange, Sptr<YIndex> beg) const
{
RangePtr dr = readRange;
if(not dr){
dr = mFileRange;
}
if(dr->stype() == "R"){
dr = readRange->sub(1);
}
CXZ_ASSERT(dr->dim() == mFileRange->dim(), "dimension of data range ("
<< dr->dim() << ") different from dimension of file range ("
<< mFileRange->dim() << ")");
Vector<hsize_t> offset(mFileRange->dim());
if(dr->stype() == "R"){
mpi::RIndex<YIndex,YIndex> idx(readRange);
idx.localize();
const SizeT rat = mpi::getNumRanks() / idx.rankI()->lmax().val();
assert(rat == 1); // for now...
assert(mpi::getRankNumber() == idx.rankI()->lex());
for(SizeT i = 0; i != offset.size(); ++i){
offset[i] = idx.rankI()->pack().get(i)->lex() * dr->savesub(i)->size();
}
}
if(beg){
CXZ_ASSERT(beg->range()->dim() == mFileRange->dim(), "dimension of position index ("
<< beg->range()->dim() << ") different from dimension of file range ("
<< mFileRange->dim() << ")");
for(SizeT i = 0; i != offset.size(); ++i){
offset[i] += beg->pack().get(i)->lex();
}
}
Vector<hsize_t> dims(mFileRange->dim());
for(SizeT i = 0; i != dims.size(); ++i){
dims[i] = readRange->sub(i)->size();
}
H5Sselect_hyperslab(mFilespace, H5S_SELECT_SET, offset.data(), NULL, dims.data(), NULL);
const hid_t mem_space_id = H5Screate_simple(static_cast<hsize_t>(dims.size()),
dims.data(), nullptr);
const hid_t xfer_plist_id = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio(xfer_plist_id, H5FD_MPIO_COLLECTIVE);
//MArray<T> out(readRange);
const herr_t err = H5Dread(mId, mType, mem_space_id, mFilespace, xfer_plist_id, dest);
CXZ_ASSERT(err >= 0, "error while reading dataset '" << mName
<< "', errorcode :" << err);
H5Pclose(xfer_plist_id);
H5Sclose(mem_space_id);
}
bool RDataset::checkHaveParallel() const
{
const ContentBase* p = parent();
while(p->type() != ContentType::FILE and p != nullptr){
p = p->parent();
}
if(not p){
return false;
}
const RFile* fp = dynamic_cast<const RFile*>(p);
if(not fp){
return false;
}
return true;
}
}
}

View file

@ -0,0 +1,33 @@
// -*- C++ -*-
/**
@file opt/hdf5/lib/h5_file.cc
@brief RFile implementations.
Copyright (c) 2024 Christian Zimmermann. All rights reserved.
Mail: chizeta@f3l.de
**/
#include "h5_rfile.h"
#include "mpi.h"
namespace CNORXZ
{
namespace hdf5
{
RFile::RFile(const String& fname, bool _ro) :
File(fname, _ro)
{
mFAPL_id = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio(mFAPL_id, MPI_COMM_WORLD, MPI_INFO_NULL);
}
RFile::~RFile()
{
if(mFAPL_id){
H5Pclose(mFAPL_id);
}
}
}
}

View file

@ -23,7 +23,7 @@ namespace CNORXZ
{ {
const hid_t tid = getTypeId(*data.data()); const hid_t tid = getTypeId(*data.data());
if(data.begin().formatIsTrivial()){ if(data.begin().formatIsTrivial()){
init(data.range(), tid, data.data()); initbase(data.range(), tid, data.data());
} }
else { else {
CXZ_ERROR("Got array type with non-trivial format; non-contiguous data formats are not supported yet!"); CXZ_ERROR("Got array type with non-trivial format; non-contiguous data formats are not supported yet!");

View file

@ -48,14 +48,14 @@ namespace CNORXZ
@param dataRange A potentially multi-dimensional range characterizing the dataset. @param dataRange A potentially multi-dimensional range characterizing the dataset.
@param type Data type id. @param type Data type id.
*/ */
virtual Dataset& init(const RangePtr& dataRange, hid_t type); virtual Dataset& initbase(const RangePtr& dataRange, hid_t type);
/** Initalize the dataset. /** Initalize the dataset.
@param dataRange A potentially multi-dimensional range characterizing the dataset. @param dataRange A potentially multi-dimensional range characterizing the dataset.
@param type Data type id. @param type Data type id.
@param data Pointer to raw data. @param data Pointer to raw data.
*/ */
virtual Dataset& init(const RangePtr& dataRange, hid_t type, const void* data); virtual Dataset& initbase(const RangePtr& dataRange, hid_t type, const void* data);
/** Write data into dataset. /** Write data into dataset.
@param dataRange A potentially multi-dimensional range characterizing the format of the data to be written. @param dataRange A potentially multi-dimensional range characterizing the format of the data to be written.

View file

@ -2,7 +2,7 @@
/** /**
@file opt/hdf5/include/h5_file.h @file opt/hdf5/include/h5_file.h
@brief Group declaration. @brief File declaration.
Copyright (c) 2024 Christian Zimmermann. All rights reserved. Copyright (c) 2024 Christian Zimmermann. All rights reserved.
Mail: chizeta@f3l.de Mail: chizeta@f3l.de
@ -52,8 +52,9 @@ namespace CNORXZ
*/ */
bool ishdf5() const; bool ishdf5() const;
private: protected:
bool mRo = true; bool mRo = true;
hid_t mFAPL_id = H5P_DEFAULT;
}; };
} }
} }

View file

@ -84,7 +84,7 @@ namespace CNORXZ
return H5Lexists(mParent->id(), mName.c_str(), H5P_DEFAULT) > 0; return H5Lexists(mParent->id(), mName.c_str(), H5P_DEFAULT) > 0;
} }
Dataset& Dataset::init(const RangePtr& fileRange, hid_t type) Dataset& Dataset::initbase(const RangePtr& fileRange, hid_t type)
{ {
CXZ_ASSERT(not isOpen(), "tried to initialize dataset that is already extisting"); CXZ_ASSERT(not isOpen(), "tried to initialize dataset that is already extisting");
mFileRange = fileRange; mFileRange = fileRange;
@ -105,9 +105,9 @@ namespace CNORXZ
return *this; return *this;
} }
Dataset& Dataset::init(const RangePtr& writeRange, hid_t type, const void* data) Dataset& Dataset::initbase(const RangePtr& writeRange, hid_t type, const void* data)
{ {
init(writeRange, type); initbase(writeRange, type);
writebase(writeRange, std::make_shared<YIndex>(mFileRange), data); writebase(writeRange, std::make_shared<YIndex>(mFileRange), data);
return *this; return *this;
} }

View file

@ -2,7 +2,7 @@
/** /**
@file opt/hdf5/lib/h5_file.cc @file opt/hdf5/lib/h5_file.cc
@brief Group implementations. @brief File implementations.
Copyright (c) 2024 Christian Zimmermann. All rights reserved. Copyright (c) 2024 Christian Zimmermann. All rights reserved.
Mail: chizeta@f3l.de Mail: chizeta@f3l.de
@ -47,14 +47,14 @@ namespace CNORXZ
if(mRo){ if(mRo){
CXZ_ASSERT( ex == 1, "could not open file as read-only: '" CXZ_ASSERT( ex == 1, "could not open file as read-only: '"
<< fn << "' does not exist'"); << fn << "' does not exist'");
mId = H5Fopen( fn.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT ); mId = H5Fopen( fn.c_str(), H5F_ACC_RDONLY, mFAPL_id );
} }
else { else {
if(ex == 1){ if(ex == 1){
mId = H5Fopen( fn.c_str(), H5F_ACC_RDWR, H5P_DEFAULT ); mId = H5Fopen( fn.c_str(), H5F_ACC_RDWR, mFAPL_id );
} }
else { else {
mId = H5Fcreate( fn.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT ); mId = H5Fcreate( fn.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, mFAPL_id );
} }
} }
CXZ_ASSERT( mId > 0, "error while opening file '" << fn << "'" ); CXZ_ASSERT( mId > 0, "error while opening file '" << fn << "'" );