From e38eaa62a352ce28c1277333c34075c7070a3e72 Mon Sep 17 00:00:00 2001
From: Christian Zimmermann <chizeta@f3l.de>
Date: Wed, 19 Mar 2025 00:03:41 -0700
Subject: [PATCH] hdf5/hdf5-mpi: refactor writebase/initbase

---
 src/opt/hdf5/include/h5_dataset.cc.h      |   6 +-
 src/opt/hdf5/include/h5_dataset.h         |  23 ++--
 src/opt/hdf5/lib/h5_dataset.cc            |  62 +++++-----
 src/opt/hdf5_mpi/include/h5_rdataset.cc.h |   7 +-
 src/opt/hdf5_mpi/include/h5_rdataset.h    |  19 +++-
 src/opt/hdf5_mpi/lib/h5_rdataset.cc       | 131 +++++++++++-----------
 6 files changed, 136 insertions(+), 112 deletions(-)

diff --git a/src/opt/hdf5/include/h5_dataset.cc.h b/src/opt/hdf5/include/h5_dataset.cc.h
index ed57954..88de05d 100644
--- a/src/opt/hdf5/include/h5_dataset.cc.h
+++ b/src/opt/hdf5/include/h5_dataset.cc.h
@@ -44,7 +44,7 @@ namespace CNORXZ
 	    auto mend = fileRangeIndex();
 	    *mend = mend->lmax().val()-1;
 	    auto fbeg = fileRangeIndex();
-	    readbase(out.data(), sizeof(T), mbeg, mend, fbeg);
+	    readbase(out.data(), mbeg, mend, fbeg);
 	    return out;
 	}
 
@@ -59,7 +59,7 @@ namespace CNORXZ
 	    I mend(outrange);
 	    mend = mend.lmax().val()-1;
 	    MArray<T> out(outrange);
-	    readbase(out.data(), sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(fbeg));
+	    readbase(out.data(), toYptr(mbeg), toYptr(mend), toYptr(fbeg));
 	    return out;
 	}
 
@@ -78,7 +78,7 @@ namespace CNORXZ
 	    CXZ_ASSERT(mbeg.dim() == mFileRange->dim(),
 		       "array range and file range have different dimensions ("
 		       << mbeg.dim() << " vs " << mFileRange->dim() << ")" );
-	    readbase(data, sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(fbeg));
+	    readbase(data, toYptr(mbeg), toYptr(mend), toYptr(fbeg));
 	}
 
 	template <typename T>
diff --git a/src/opt/hdf5/include/h5_dataset.h b/src/opt/hdf5/include/h5_dataset.h
index ca0ad60..bd65275 100644
--- a/src/opt/hdf5/include/h5_dataset.h
+++ b/src/opt/hdf5/include/h5_dataset.h
@@ -44,6 +44,9 @@ namespace CNORXZ
 	    virtual String filename() const override final;
 	    virtual bool exists() const override final;
 
+	    void iosetup(Sptr<YIndex> mbeg, Sptr<YIndex> mend, Sptr<YIndex> fbeg,
+			 hid_t& mem_space_id, hid_t& xfer_plist_id) const;
+	    
 	    /** Initalize the dataset.
 		@param dataRange A potentially multi-dimensional range characterizing the dataset.
 		@param type Data type id.
@@ -57,21 +60,21 @@ namespace CNORXZ
 	     */
 	    virtual Dataset& initbase(const RangePtr& dataRange, hid_t type, const void* data);
 
-	    /** Write data into dataset.
-		@param dataRange A potentially multi-dimensional range characterizing the format of the data to be written.
-		@param pos Position on target.
-		@param data Pointer to raw data to be writte.
-	     */
-	    virtual Dataset& writebase(const RangePtr& dataRange, Sptr<YIndex> pos, const void* data);
-	    
 	    /** Read the dataset.
-		@param dest Pointer to destination.
-		@param tsize Size of dest data type.
+		@param src Pointer to array to be written.
 		@param mbeg Index pointing to the begin of read destination.
 		@param mend Index pointing to the (inclusive) end of read destination.
 		@param fbeg Position within the file space.
 	     */
-	    virtual void readbase(void* dest, SizeT tsize, Sptr<YIndex> mbeg, Sptr<YIndex> mend, Sptr<YIndex> fbeg) const;
+	    virtual const Dataset& writebase(const void* src, Sptr<YIndex> mbeg, Sptr<YIndex> mend, Sptr<YIndex> fbeg) const;
+	    
+	    /** Read the dataset.
+		@param dest Pointer to destination.
+		@param mbeg Index pointing to the begin of read destination.
+		@param mend Index pointing to the (inclusive) end of read destination.
+		@param fbeg Position within the file space.
+	     */
+	    virtual void readbase(void* dest, Sptr<YIndex> mbeg, Sptr<YIndex> mend, Sptr<YIndex> fbeg) const;
 
 	    virtual hid_t mkdxpl() const;
 	    
diff --git a/src/opt/hdf5/lib/h5_dataset.cc b/src/opt/hdf5/lib/h5_dataset.cc
index b1a5294..9578539 100644
--- a/src/opt/hdf5/lib/h5_dataset.cc
+++ b/src/opt/hdf5/lib/h5_dataset.cc
@@ -108,37 +108,16 @@ namespace CNORXZ
 	Dataset& Dataset::initbase(const RangePtr& writeRange, hid_t type, const void* data)
 	{
 	    initbase(writeRange, type);
-	    writebase(writeRange, std::make_shared<YIndex>(mFileRange), data);
+	    auto mbeg = fileRangeIndex();
+	    auto mend = fileRangeIndex();
+	    *mend = mend->lmax().val()-1;
+	    auto fbeg = fileRangeIndex();
+	    writebase(data, mbeg, mend, fbeg);
 	    return *this;
 	}
 
-	Dataset& Dataset::writebase(const RangePtr& writeRange, Sptr<YIndex> pos, const void* data)
-	{
-	    //CXZ_ERROR("TODO!!!");
-	    CXZ_ASSERT(writeRange->dim() == mFileRange->dim(), "dimension of data range ("
-		       << writeRange->dim() << ") different from dimension of file range ("
-		       << mFileRange->dim() << ")");
-	    Vector<hsize_t> dims(writeRange->dim());
-	    for(SizeT i = 0; i != dims.size(); ++i){
-		dims[i] = writeRange->sub(i)->size();
-	    }
-	    if(pos){
-		CXZ_ASSERT(pos->range()->dim() == mFileRange->dim(), "dimension of position index ("
-			   << pos->range()->dim() << ") different from dimension of file range ("
-			   << mFileRange->dim() << ")");
-		const Vector<hsize_t> fpos = mkOff(pos);
-		H5Sselect_hyperslab(mFilespace, H5S_SELECT_SET, fpos.data(), NULL, dims.data(), NULL);
-	    }
-	    const hid_t memspace = H5Screate_simple(dims.size(), dims.data(), NULL);
-	    const hid_t xfer_plist_id = this->mkdxpl();
-	    H5Dwrite(mId, mType, memspace, mFilespace, xfer_plist_id, data);
-	    H5Pclose(xfer_plist_id);
-	    H5Sclose(memspace);
-	    return *this;
-	}
-
-	void Dataset::readbase(void* dest, SizeT tsize, Sptr<YIndex> mbeg, Sptr<YIndex> mend,
-			       Sptr<YIndex> fbeg) const
+	void Dataset::iosetup(Sptr<YIndex> mbeg, Sptr<YIndex> mend, Sptr<YIndex> fbeg,
+			      hid_t& mem_space_id, hid_t& xfer_plist_id) const
 	{
 	    const SizeT D = mFileRange->dim();
 	    const bool todo = mbeg != nullptr;
@@ -178,10 +157,33 @@ namespace CNORXZ
 		}
 	    }
 	    H5Sselect_hyperslab(mFilespace, H5S_SELECT_SET, offset.data(), NULL, dims.data(), NULL);
-	    const hid_t mem_space_id = H5Screate_simple(static_cast<hsize_t>(mdims.size()),
+	    mem_space_id = H5Screate_simple(static_cast<hsize_t>(mdims.size()),
 							mdims.data(), nullptr);
-	    const hid_t xfer_plist_id = this->mkdxpl();
+	    xfer_plist_id = this->mkdxpl();
 	    H5Sselect_hyperslab(mem_space_id, H5S_SELECT_SET, moffset.data(), NULL, dims.data(), NULL);
+	    
+	}
+	
+	const Dataset& Dataset::writebase(const void* src, Sptr<YIndex> mbeg,
+					  Sptr<YIndex> mend, Sptr<YIndex> fbeg) const
+	{
+	    hid_t mem_space_id = 0;
+	    hid_t xfer_plist_id = 0;
+	    iosetup(mbeg, mend, fbeg, mem_space_id, xfer_plist_id);
+	    const herr_t err = H5Dwrite(mId, mType, mem_space_id, mFilespace, xfer_plist_id, src);
+	    CXZ_ASSERT(err >= 0, "error while writing dataset '" << mName
+		       << "', errorcode :" << err);
+	    H5Pclose(xfer_plist_id);
+	    H5Sclose(mem_space_id);
+	    return *this;
+	}
+
+	void Dataset::readbase(void* dest, Sptr<YIndex> mbeg, Sptr<YIndex> mend,
+			       Sptr<YIndex> fbeg) const
+	{
+	    hid_t mem_space_id = 0;
+	    hid_t xfer_plist_id = 0;
+	    iosetup(mbeg, mend, fbeg, mem_space_id, xfer_plist_id);
 	    const herr_t err = H5Dread(mId, mType, mem_space_id, mFilespace, xfer_plist_id, dest);
 	    CXZ_ASSERT(err >= 0, "error while reading dataset '" << mName
 		       << "', errorcode :" << err);
diff --git a/src/opt/hdf5_mpi/include/h5_rdataset.cc.h b/src/opt/hdf5_mpi/include/h5_rdataset.cc.h
index 16bbc2e..23a6cc0 100644
--- a/src/opt/hdf5_mpi/include/h5_rdataset.cc.h
+++ b/src/opt/hdf5_mpi/include/h5_rdataset.cc.h
@@ -23,7 +23,8 @@ namespace CNORXZ
 	{
 	    const hid_t tid = getTypeId(*data.data());
 	    if(data.begin().formatIsTrivial()){
-		dynamic_cast<Dataset*>(this)->initbase(data.range(), tid, data.data());
+		initbase(data.range(), tid, data.data());
+		//dynamic_cast<Dataset*>(this)->initbase(data.range(), tid, data.data());
 	    }
 	    else {
 		CXZ_ERROR("Got array type with non-trivial format; non-contiguous data formats are not supported yet!");
@@ -45,7 +46,7 @@ namespace CNORXZ
 	    *mend = mend->lmax().val()-1;
 	    auto fbeg = std::make_shared<YIndex>(mFileRange);
 	    mpi::RArray<T> out(rr);
-	    rreadbase(out.data(), sizeof(T), mbeg, mend, fbeg);
+	    rreadbase(out.data(), mbeg, mend, fbeg);
 	    return out;
 	}
 	
@@ -64,7 +65,7 @@ namespace CNORXZ
 	    auto mend = std::make_shared<mpi::RIndex<YIndex,YIndex>>(outrange);
 	    *mend = mend->lmax().val()-1;
 	    auto fbeg = std::make_shared<YIndex>(mFileRange);
-	    rreadbase(out.data(), sizeof(T), mbeg, mend, fbeg);
+	    rreadbase(out.data(), mbeg, mend, fbeg);
 	    return out;
 	}
     }
diff --git a/src/opt/hdf5_mpi/include/h5_rdataset.h b/src/opt/hdf5_mpi/include/h5_rdataset.h
index 8b61397..7ba4b99 100644
--- a/src/opt/hdf5_mpi/include/h5_rdataset.h
+++ b/src/opt/hdf5_mpi/include/h5_rdataset.h
@@ -38,12 +38,25 @@ namespace CNORXZ
 	    RDataset(const String& name, const ContentBase* _parent);
 
 	    virtual RDataset& initbase(const RangePtr& fileRange, hid_t type) override;
-	    virtual RDataset& writebase(const RangePtr& writeRange, Sptr<YIndex> pos,
-					const void* data) override;
+	    virtual RDataset& initbase(const RangePtr& dataRange, hid_t type,
+				       const void* data) override;
 
 	    virtual hid_t mkdxpl() const override;
 
-	    void rreadbase(void* dest, SizeT size, Sptr<mpi::RIndex<YIndex,YIndex>> mbeg,
+	    RDataset& initbase(const RangePtr& fileRange, hid_t type,
+			       const RangePtr& geom, const void* data);
+
+	    /** Create global file range from data array range. */
+	    RangePtr mkFileRange(const RangePtr& dataRange) const;
+	    
+	    bool riosetup(Sptr<mpi::RIndex<YIndex,YIndex>> mbeg,
+			  Sptr<mpi::RIndex<YIndex,YIndex>> mend, Sptr<YIndex> fbeg,
+			  Sptr<YIndex>& mbegl, Sptr<YIndex>& mendl, Sptr<YIndex>& fbegl) const;
+	    
+	    void rwritebase(const void* src, Sptr<mpi::RIndex<YIndex,YIndex>> mbeg,
+			    Sptr<mpi::RIndex<YIndex,YIndex>> mend, Sptr<YIndex> fbeg) const;
+
+	    void rreadbase(void* dest, Sptr<mpi::RIndex<YIndex,YIndex>> mbeg,
 			   Sptr<mpi::RIndex<YIndex,YIndex>> mend, Sptr<YIndex> fbeg) const;
 	    
 	    /** Initalize the dataset.
diff --git a/src/opt/hdf5_mpi/lib/h5_rdataset.cc b/src/opt/hdf5_mpi/lib/h5_rdataset.cc
index 00bcb15..2de2f6d 100644
--- a/src/opt/hdf5_mpi/lib/h5_rdataset.cc
+++ b/src/opt/hdf5_mpi/lib/h5_rdataset.cc
@@ -23,14 +23,13 @@ namespace CNORXZ
 			<< parent()->filename() << " was opened in serial mode");
 	}
 
-	RDataset& RDataset::initbase(const RangePtr& fileRange, hid_t type)
+	RangePtr RDataset::mkFileRange(const RangePtr& dataRange) const
 	{
-	    RangePtr fr = fileRange;
+	    RangePtr fr = dataRange;
 	    if(fr->stype() == "R"){
 		const RangePtr local = fr->sub(1);
 		const RangePtr geom = fr->sub(0);
 		const SizeT ndims = local->dim();
-		//CXZ_ASSERT(ndims == geom->dim(), "")
 		Vector<RangePtr> rs(ndims);
 		for(SizeT i = 0; i != ndims; ++i){
 		    const SizeT ext = local->savesub(i)->size()*geom->savesub(i)->size();
@@ -38,59 +37,44 @@ namespace CNORXZ
 		}
 		fr = yrange(rs);
 	    }
-	    Dataset::initbase(fr, type);
-	    MPI_Barrier(MPI_COMM_WORLD);
-	    return *this;
-	}
-	    
-	RDataset& RDataset::writebase(const RangePtr& writeRange, Sptr<YIndex> pos, const void* data)
-	{
-	    //bool todo = true;
-	    RangePtr dr = writeRange;
-	    bool parallel = dr->stype() == "R";
-	    if(parallel){
-		dr = writeRange->sub(1);
-	    }
-	    CXZ_ASSERT(dr->dim() == mFileRange->dim(), "dimension of data range ("
-		       << dr->dim() << ") different from dimension of file range ("
-		       << mFileRange->dim() << ")");
-	    Vector<hsize_t> offset(mFileRange->dim());
-	    if(parallel){
-		mpi::RIndex<YIndex,YIndex> idx(writeRange);
-		idx.localize();
-		const SizeT rat = mpi::getNumRanks() / idx.rankI()->lmax().val();
-		assert(rat == 1); // for now...
-		assert(mpi::getRankNumber() == idx.rankI()->lex());
-		for(SizeT i = 0; i != offset.size(); ++i){
-		    offset[i] = idx.rankI()->pack().get(i)->lex() * dr->savesub(i)->size();
-		}
-	    }
-	    if(pos){
-		CXZ_ASSERT(pos->range()->dim() == mFileRange->dim(), "dimension of position index ("
-			   << pos->range()->dim() << ") different from dimension of file range ("
-			   << mFileRange->dim() << ")");
-		for(SizeT i = 0; i != offset.size(); ++i){
-		    offset[i] += pos->pack().get(i)->lex();
-		}
-	    }
-	    
-	    Vector<hsize_t> dims(dr->dim());
-	    for(SizeT i = 0; i != dims.size(); ++i){
-		dims[i] = dr->sub(i)->size();
-	    }
-	    H5Sselect_hyperslab(mFilespace, H5S_SELECT_SET, offset.data(), NULL, dims.data(), NULL);
-	    const hid_t memspace = H5Screate_simple(dims.size(), dims.data(), NULL);
-	    const hid_t xfer_plist_id = H5Pcreate(H5P_DATASET_XFER);
-	    H5Pset_dxpl_mpio(xfer_plist_id, H5FD_MPIO_COLLECTIVE);
-	    H5Dwrite(mId, mType, memspace, mFilespace, xfer_plist_id, data);
-	    H5Pclose(xfer_plist_id);
-	    H5Sclose(memspace);
-	    MPI_Barrier(MPI_COMM_WORLD);
-	    return *this;
+	    return fr;
 	}
 	
-	void RDataset::rreadbase(void* dest, SizeT tsize, Sptr<mpi::RIndex<YIndex,YIndex>> mbeg,
-				 Sptr<mpi::RIndex<YIndex,YIndex>> mend, Sptr<YIndex> fbeg) const
+	RDataset& RDataset::initbase(const RangePtr& fileRange, hid_t type)
+	{
+	    CXZ_ASSERT(fileRange->stype() != "R", "file range has to be non-ranked");
+	    Dataset::initbase(fileRange, type);
+	    MPI_Barrier(MPI_COMM_WORLD);
+	    return *this;
+	}
+	    
+	RDataset& RDataset::initbase(const RangePtr& dataRange, hid_t type, const void* data)
+	{
+	    if(dataRange->stype() == "R"){
+		initbase(mkFileRange(dataRange), type, dataRange->sub(0), data);
+	    }
+	    else {
+		Dataset::initbase(dataRange, type, data);
+	    }
+	    return *this;
+	}
+
+	RDataset& RDataset::initbase(const RangePtr& fileRange, hid_t type,
+				     const RangePtr& geom, const void* data)
+	{
+	    initbase(fileRange, type);
+	    auto rr = rangeCast<mpi::RRange<YRange,YRange>>( mpi::rrange(mFileRange, geom) );
+	    auto mbeg = std::make_shared<mpi::RIndex<YIndex,YIndex>>(rr);
+	    auto mend = std::make_shared<mpi::RIndex<YIndex,YIndex>>(rr);
+	    *mend = mend->lmax().val()-1;
+	    auto fbeg = std::make_shared<YIndex>(mFileRange);
+	    rwritebase(data, mbeg, mend, fbeg);
+	    return *this;
+	}
+
+	bool RDataset::riosetup(Sptr<mpi::RIndex<YIndex,YIndex>> mbeg,
+				Sptr<mpi::RIndex<YIndex,YIndex>> mend, Sptr<YIndex> fbeg,
+				Sptr<YIndex>& mbegl, Sptr<YIndex>& mendl, Sptr<YIndex>& fbegl) const
 	{
 	    const SizeT D = mFileRange->dim();
 	    CXZ_ASSERT(fbeg->range()->dim() == D, "dimension of file index ("
@@ -103,16 +87,12 @@ namespace CNORXZ
 		       << mend->local()->range()->dim() << ") different from dimension of file range ("
 		       << D << ")");
 
-	    Sptr<YIndex> mbegl = std::make_shared<YIndex>(*mbeg->local());
-	    Sptr<YIndex> mendl = std::make_shared<YIndex>(*mend->local());
-	    Sptr<YIndex> fbegl = std::make_shared<YIndex>(*fbeg);
+	    mbegl = std::make_shared<YIndex>(*mbeg->local());
+	    mendl = std::make_shared<YIndex>(*mend->local());
+	    fbegl = std::make_shared<YIndex>(*fbeg);
 
 	    auto mbeg2 = std::make_shared<mpi::RIndex<YIndex,YIndex>>(mbeg->range());
 	    mbeg2->localize();
-	    //mbeg->localize();
-	    //mend->localize();
-	    //auto mbeg2 = std::make_shared<mpi::RIndex<YIndex,YIndex>>(mbeg->range());
-	    //mbeg2->synchronize();
 	    auto rmy = mbeg2->rankI();
 	    auto rb = mbeg->rankI();
 	    auto re = mend->rankI();
@@ -144,13 +124,38 @@ namespace CNORXZ
 	    (*mbegl)();
 	    (*mendl)();
 	    (*fbegl)();
+	    return skip;
+	}
 
+	void RDataset::rwritebase(const void* src, Sptr<mpi::RIndex<YIndex,YIndex>> mbeg,
+				  Sptr<mpi::RIndex<YIndex,YIndex>> mend, Sptr<YIndex> fbeg) const
+	{
+	    Sptr<YIndex> mbegl;
+	    Sptr<YIndex> mendl;
+	    Sptr<YIndex> fbegl;
+	    const bool skip = riosetup(mbeg, mend, fbeg, mbegl, mendl, fbegl);
 	    if(skip){
 		// synchronous MPI read -> have to call this functio on all ranks
-		Dataset::readbase(dest, tsize, nullptr, nullptr, fbegl);
+		Dataset::writebase(src, nullptr, nullptr, fbegl);
 	    }
 	    else {
-		Dataset::readbase(dest, tsize, mbegl, mendl, fbegl);
+		Dataset::writebase(src, mbegl, mendl, fbegl);
+	    }
+	}
+	
+	void RDataset::rreadbase(void* dest, Sptr<mpi::RIndex<YIndex,YIndex>> mbeg,
+				 Sptr<mpi::RIndex<YIndex,YIndex>> mend, Sptr<YIndex> fbeg) const
+	{
+	    Sptr<YIndex> mbegl;
+	    Sptr<YIndex> mendl;
+	    Sptr<YIndex> fbegl;
+	    const bool skip = riosetup(mbeg, mend, fbeg, mbegl, mendl, fbegl);
+	    if(skip){
+		// synchronous MPI read -> have to call this functio on all ranks
+		Dataset::readbase(dest, nullptr, nullptr, fbegl);
+	    }
+	    else {
+		Dataset::readbase(dest, mbegl, mendl, fbegl);
 	    }
 	}