From 8b70e3c6dda34f7d37dc128708489e14a1e56313 Mon Sep 17 00:00:00 2001
From: Christian Zimmermann <chizeta@f3l.de>
Date: Tue, 18 Mar 2025 14:54:54 -0700
Subject: [PATCH] hdf5: subslab read: fixes + new test

---
 src/include/array/aindex.cc.h            |  4 +-
 src/include/array/aindex.h               |  4 +-
 src/opt/hdf5/include/h5_dataset.cc.h     | 47 ++++++++++----------
 src/opt/hdf5/include/h5_dataset.h        | 29 ++++++------
 src/opt/hdf5/lib/h5_dataset.cc           | 27 +++++++++---
 src/opt/hdf5/tests/h5_basic_unit_test.cc | 56 ++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 47 deletions(-)

diff --git a/src/include/array/aindex.cc.h b/src/include/array/aindex.cc.h
index e28c257..3fa0980 100644
--- a/src/include/array/aindex.cc.h
+++ b/src/include/array/aindex.cc.h
@@ -93,13 +93,13 @@ namespace CNORXZ
     }
 
     template <typename T>
-    T& BIndex<T>::operator*()
+    T& BIndex<T>::operator*() const
     {
 	return mData[IB::mPos];
     }
 
     template <typename T>
-    T* BIndex<T>::operator->()
+    T* BIndex<T>::operator->() const
     {
 	return mData + IB::mPos;
     }
diff --git a/src/include/array/aindex.h b/src/include/array/aindex.h
index 7f7a411..8ebb09d 100644
--- a/src/include/array/aindex.h
+++ b/src/include/array/aindex.h
@@ -59,8 +59,8 @@ namespace CNORXZ
 	BIndex operator+(Int n) const;
 	BIndex operator-(Int n) const;
 
-	T& operator*();
-	T* operator->();
+	T& operator*() const;
+	T* operator->() const;
 
     private:
 	T* mData = nullptr;
diff --git a/src/opt/hdf5/include/h5_dataset.cc.h b/src/opt/hdf5/include/h5_dataset.cc.h
index 22df7ad..ed57954 100644
--- a/src/opt/hdf5/include/h5_dataset.cc.h
+++ b/src/opt/hdf5/include/h5_dataset.cc.h
@@ -40,46 +40,45 @@ namespace CNORXZ
 	MArray<T> SDataset<T>::read() const
 	{
 	    MArray<T> out(mFileRange);
-	    readbase(out.data(), nullptr, nullptr);
+	    auto mbeg = fileRangeIndex();
+	    auto mend = fileRangeIndex();
+	    *mend = mend->lmax().val()-1;
+	    auto fbeg = fileRangeIndex();
+	    readbase(out.data(), sizeof(T), mbeg, mend, fbeg);
 	    return out;
 	}
 
 	template <typename T>
 	template <class I, typename M>
-	MArray<T> SDataset<T>::read(const IndexInterface<I,M>& idx) const
+	MArray<T> SDataset<T>::read(const IndexInterface<I,M>& fbeg, const IndexInterface<I,M>& fend) const
 	{
-	    // DEPRECATED!!!
-	    CXZ_ASSERT(idx.lex() == 0, "only full read supported")
-	    CXZ_ASSERT(idx.dim() == mFileRange->dim(), "got index of inconsistent dimension, got"
-		       << idx.dim() << ", expected " << mFileRange->dim());
-	    const RangePtr outrange = idx.range();
-	    CXZ_ASSERT(outrange->size() == mFileRange->size(),
-		       "got index of range of inconsistent size, expected "
-		       << mFileRange->size() << ", got " << outrange->size());
+	    CXZ_ASSERT(fbeg.dim() == mFileRange->dim(), "got index of inconsistent dimension, got"
+		       << fbeg.dim() << ", expected " << mFileRange->dim());
+	    const RangePtr outrange = fbeg.prange(fend);
 	    const I mbeg(outrange);
 	    I mend(outrange);
 	    mend = mend.lmax().val()-1;
 	    MArray<T> out(outrange);
-	    //readbase(out.data(), outrange, nullptr);
-	    readbase(out.data(), sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(idx));
+	    readbase(out.data(), sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(fbeg));
 	    return out;
 	}
 
 	template <typename T>
 	template <class I, typename M>
-	MArray<T> SDataset<T>::read(const IndexInterface<I,M>& beg, const IndexInterface<I,M>& end) const
+	void SDataset<T>::read(const BIndex<T>& mbeg, const BIndex<T>& mend,
+			       const IndexInterface<I,M>& fbeg) const
 	{
-	    CXZ_ASSERT(beg.dim() == mFileRange->dim(), "got index of inconsistent dimension, got"
-		       << beg.dim() << ", expected " << mFileRange->dim());
-	    const RangePtr outrange = beg.prange(end);
-	    const I mbeg(outrange);
-	    I mend(outrange);
-	    mend = mend.lmax().val()-1;
-	    MArray<T> out(outrange);
-	    //readbase(out.data(), outrange, toYptr(beg));
-	    //readbase(out.data(), toYptr(beg), toYptr(end), toYptr(beg));
-	    readbase(out.data(), sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(beg));
-	    return out;
+	    T* const data = &*mbeg - mbeg.pos();
+	    T* const data2 = &*mend - mend.pos();
+	    CXZ_ASSERT(mbeg.range() == mend.range(),
+		       "array begin index and array end index are of different range");
+	    CXZ_ASSERT(data == data2,
+		       "array begin index and array end index are pointing to different arrays");
+	    CXZ_ASSERT(fbeg.range() == mFileRange, "file begin index is not index of file range");
+	    CXZ_ASSERT(mbeg.dim() == mFileRange->dim(),
+		       "array range and file range have different dimensions ("
+		       << mbeg.dim() << " vs " << mFileRange->dim() << ")" );
+	    readbase(data, sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(fbeg));
 	}
 
 	template <typename T>
diff --git a/src/opt/hdf5/include/h5_dataset.h b/src/opt/hdf5/include/h5_dataset.h
index 338cfb1..3c58cd3 100644
--- a/src/opt/hdf5/include/h5_dataset.h
+++ b/src/opt/hdf5/include/h5_dataset.h
@@ -95,6 +95,7 @@ namespace CNORXZ
 	    
 	protected:
 	    Vector<hsize_t> mkOff(const Sptr<YIndex>& beg) const;
+	    Sptr<YIndex> fileRangeIndex() const;
 	    
 	    RangePtr mFileRange; /**< The range of the dataset. */
 	    hid_t mType; /**< The data type identifier. */
@@ -118,32 +119,32 @@ namespace CNORXZ
 	     */
 	    SDataset(const String& name, const ContentBase* _parent);
 
-	    /** Read the dataset.
+	    /** Read the entire dataset and store it in newly created array.
 		@return Array containing the dataset values.
 	     */
 	    MArray<T> read() const;
-
-	    /** Read the dataset using range of given index.
-		The index position is ignored.
-		@param idx Index specifying the range.
+	    
+	    /** Read a given hypercubic subset of the dataset and store it in newly created array.
+		@param fbeg Index pointing to the read begin hypercube edge file position.
+		@param fend Index pointing to the (inclusive) read end hypercuve edge file position.
 		@return Array containing the dataset values.
 	     */
 	    template <class I, typename M>
-	    MArray<T> read(const IndexInterface<I,M>& idx) const;
+	    MArray<T> read(const IndexInterface<I,M>& fbeg, const IndexInterface<I,M>& fend) const;
 
-	    /** Read a given subset of the dataset.
-		The subset needs to be hypercubic.
-		@param beg Index indicating the begin edge of the hypercube.
-		@param end Index indicating the end edge of the hypercube (inclusive).
+	    /** Read a given hypercubic subset of the dataset and store it in array pointed
+		to by mbeg and mend.
+		@param mbeg Index pointing to the begin edge of the target array.
+		@param mend Index pointing to the (inclusive) end edge of the target array.
+		@param fbeg Index pointing to the begin edge within the file range.
 		@return Array containing the dataset values.
 	     */
 	    template <class I, typename M>
-	    MArray<T> read(const IndexInterface<I,M>& beg, const IndexInterface<I,M>& end) const;
-
+	    void read(const BIndex<T>& mbeg, const BIndex<T>& mend,
+		      const IndexInterface<I,M>& fbeg) const;
+	    
 	private:
 
-	    //template <class I, typename M>
-	    //Vector<hsize_t> mkFPos(const IndexInterface<I,M>& beg) const;
 	    template <class I, typename M>
 	    Sptr<YIndex> toYptr(const IndexInterface<I,M>& beg) const;
 	};
diff --git a/src/opt/hdf5/lib/h5_dataset.cc b/src/opt/hdf5/lib/h5_dataset.cc
index e5f9ac8..6e2a468 100644
--- a/src/opt/hdf5/lib/h5_dataset.cc
+++ b/src/opt/hdf5/lib/h5_dataset.cc
@@ -175,7 +175,9 @@ namespace CNORXZ
 	    const SizeT D = mFileRange->dim();
 	    const bool todo = mbeg != nullptr;
 	    Vector<hsize_t> offset(D);
+	    Vector<hsize_t> moffset(D);//!!!
 	    Vector<hsize_t> dims(D);
+	    Vector<hsize_t> mdims(D);//!!!
 	    if(todo){
 		//RangePtr dr = mbeg->range();
 		//const bool parallel = true;
@@ -194,8 +196,13 @@ namespace CNORXZ
 		    const SizeT bi = mbeg->pack().get(i)->lex();
 		    const SizeT ei = mend->pack().get(i)->lex();
 		    const SizeT fbegi = fbeg->pack().get(i)->lex();
+		    CXZ_ASSERT(fbegi + ei - bi < fbeg->pack().get(i)->lmax().val(),
+			       "exceeding file range at dimension " << i << ": "
+			       << fbegi + ei - bi << " >= " << fbeg->pack().get(i)->lmax().val());
 		    dims[i] = ei - bi + 1; // inclusive!
+		    mdims[i] = mbeg->pack().get(i)->range()->size();
 		    offset[i] = fbegi;
+		    moffset[i] = bi;
 		}
 	    }
 	    else {
@@ -205,11 +212,11 @@ namespace CNORXZ
 		}
 	    }
 	    H5Sselect_hyperslab(mFilespace, H5S_SELECT_SET, offset.data(), NULL, dims.data(), NULL);
-	    const hid_t mem_space_id = H5Screate_simple(static_cast<hsize_t>(dims.size()),
-							dims.data(), nullptr);
+	    const hid_t mem_space_id = H5Screate_simple(static_cast<hsize_t>(mdims.size()),
+							mdims.data(), nullptr);
 	    const hid_t xfer_plist_id = this->mkdxpl();
-	    char* truedest = reinterpret_cast<char*>(dest) + tsize*mbeg->pos();
-	    const herr_t err = H5Dread(mId, mType, mem_space_id, mFilespace, xfer_plist_id, reinterpret_cast<void*>(truedest));
+	    H5Sselect_hyperslab(mem_space_id, H5S_SELECT_SET, moffset.data(), NULL, dims.data(), NULL);
+	    const herr_t err = H5Dread(mId, mType, mem_space_id, mFilespace, xfer_plist_id, dest);
 	    CXZ_ASSERT(err >= 0, "error while reading dataset '" << mName
 		       << "', errorcode :" << err);
 	    H5Pclose(xfer_plist_id);
@@ -235,6 +242,16 @@ namespace CNORXZ
 	    }
 	    return off;
 	}
-	
+
+	Sptr<YIndex> Dataset::fileRangeIndex() const
+	{
+	    const SizeT D = mFileRange->dim();
+	    Vector<XIndexPtr> idxs(D);
+	    for(SizeT i = 0; i != D; ++i){
+		idxs[i] = xindexPtr( std::make_shared<CIndex>( mFileRange->savesub(i) ) );
+	    }
+	    return yindexPtr(idxs);
+	}
+	    
     }
 }
diff --git a/src/opt/hdf5/tests/h5_basic_unit_test.cc b/src/opt/hdf5/tests/h5_basic_unit_test.cc
index e589556..afad1eb 100644
--- a/src/opt/hdf5/tests/h5_basic_unit_test.cc
+++ b/src/opt/hdf5/tests/h5_basic_unit_test.cc
@@ -246,6 +246,62 @@ namespace
 	i->ifor( operation( [](Double a, Double b) { EXPECT_EQ(a,b); }, data(i), mData(j*i) ), NoF{} )();
 	h5f.close();
     }
+
+    TEST_F(Group_Test, ReadDatasetPart2)
+    {
+	File h5f(mFileName, true);
+	h5f.open();
+	auto dset = h5f.getGroup("gr2")->open().getDataset("dat1", Double{});
+	//YIndex beg(dset->dataRange());
+	//beg.setSub(0,2);
+	//YIndex end = beg - 1;
+	//end.setSub(0,2);
+	//auto data = dset->read(beg,end);
+	Vector<RangePtr> dranges(dset->dataRange()->dim());
+	EXPECT_EQ(dranges.size(), 5u);
+	dranges[0] = CRangeFactory(5).create();
+	dranges[1] = CRangeFactory(2).create();
+	dranges[2] = CRangeFactory(5).create();
+	dranges[3] = CRangeFactory(5).create();
+	dranges[4] = CRangeFactory(2).create();
+	const RangePtr drange = yrange(dranges);
+	MArray<Double> data(drange);
+	auto mbeg = data.begin();
+	auto mend = data.begin();
+	YIndex fbeg(dset->dataRange());
+	Arr<SizeT,5> mbegpos = { 1,0,0,2,0 };
+	Arr<SizeT,5> mendpos = { 3,1,3,4,1 };
+	Arr<SizeT,5> fbegpos = { 3,1,4,2,0 };
+	for(SizeT i = 0; i != 5u; ++i){
+	    mbeg.setSub(i,mbegpos[i]);
+	    mend.setSub(i,mendpos[i]);
+	    fbeg.setSub(i,fbegpos[i]);
+	}
+	dset->read(mbeg, mend, fbeg);
+	
+	EXPECT_EQ( data.range()->dim(), 5u );
+	auto mi = MIndex<CIndex,CIndex,CIndex,CIndex,CIndex>(drange);
+	auto mj = MIndex<CIndex,CIndex,CIndex,CIndex,CIndex>(mData.range());
+	for(mi = 0; mi.lex() != mi.lmax().val(); ++mi){
+	    auto meta = mi.meta();
+	    bool todo = true;
+	    iter<0,5>
+		( [&](auto mu) {
+		    if(std::get<mu>(meta) < mbegpos[mu] or std::get<mu>(meta) > mendpos[mu]){
+			todo = false;
+		    }
+		    std::get<mu>(meta) += fbegpos[mu] - mbegpos[mu];
+		}, NoF{} );
+	    if(todo){
+		mj.at(meta);
+		EXPECT_EQ(data[mi], mData[mj]);
+	    }
+	    else {
+		EXPECT_EQ(data[mi], 0);
+	    }
+	}
+	h5f.close();
+    }
     
     TEST_F(Group_Test, Read2)
     {