From 8b70e3c6dda34f7d37dc128708489e14a1e56313 Mon Sep 17 00:00:00 2001 From: Christian Zimmermann <chizeta@f3l.de> Date: Tue, 18 Mar 2025 14:54:54 -0700 Subject: [PATCH] hdf5: subslab read: fixes + new test --- src/include/array/aindex.cc.h | 4 +- src/include/array/aindex.h | 4 +- src/opt/hdf5/include/h5_dataset.cc.h | 47 ++++++++++---------- src/opt/hdf5/include/h5_dataset.h | 29 ++++++------ src/opt/hdf5/lib/h5_dataset.cc | 27 +++++++++--- src/opt/hdf5/tests/h5_basic_unit_test.cc | 56 ++++++++++++++++++++++++ 6 files changed, 120 insertions(+), 47 deletions(-) diff --git a/src/include/array/aindex.cc.h b/src/include/array/aindex.cc.h index e28c257..3fa0980 100644 --- a/src/include/array/aindex.cc.h +++ b/src/include/array/aindex.cc.h @@ -93,13 +93,13 @@ namespace CNORXZ } template <typename T> - T& BIndex<T>::operator*() + T& BIndex<T>::operator*() const { return mData[IB::mPos]; } template <typename T> - T* BIndex<T>::operator->() + T* BIndex<T>::operator->() const { return mData + IB::mPos; } diff --git a/src/include/array/aindex.h b/src/include/array/aindex.h index 7f7a411..8ebb09d 100644 --- a/src/include/array/aindex.h +++ b/src/include/array/aindex.h @@ -59,8 +59,8 @@ namespace CNORXZ BIndex operator+(Int n) const; BIndex operator-(Int n) const; - T& operator*(); - T* operator->(); + T& operator*() const; + T* operator->() const; private: T* mData = nullptr; diff --git a/src/opt/hdf5/include/h5_dataset.cc.h b/src/opt/hdf5/include/h5_dataset.cc.h index 22df7ad..ed57954 100644 --- a/src/opt/hdf5/include/h5_dataset.cc.h +++ b/src/opt/hdf5/include/h5_dataset.cc.h @@ -40,46 +40,45 @@ namespace CNORXZ MArray<T> SDataset<T>::read() const { MArray<T> out(mFileRange); - readbase(out.data(), nullptr, nullptr); + auto mbeg = fileRangeIndex(); + auto mend = fileRangeIndex(); + *mend = mend->lmax().val()-1; + auto fbeg = fileRangeIndex(); + readbase(out.data(), sizeof(T), mbeg, mend, fbeg); return out; } template <typename T> template <class I, typename M> - MArray<T> SDataset<T>::read(const IndexInterface<I,M>& idx) const + MArray<T> SDataset<T>::read(const IndexInterface<I,M>& fbeg, const IndexInterface<I,M>& fend) const { - // DEPRECATED!!! - CXZ_ASSERT(idx.lex() == 0, "only full read supported") - CXZ_ASSERT(idx.dim() == mFileRange->dim(), "got index of inconsistent dimension, got" - << idx.dim() << ", expected " << mFileRange->dim()); - const RangePtr outrange = idx.range(); - CXZ_ASSERT(outrange->size() == mFileRange->size(), - "got index of range of inconsistent size, expected " - << mFileRange->size() << ", got " << outrange->size()); + CXZ_ASSERT(fbeg.dim() == mFileRange->dim(), "got index of inconsistent dimension, got" + << fbeg.dim() << ", expected " << mFileRange->dim()); + const RangePtr outrange = fbeg.prange(fend); const I mbeg(outrange); I mend(outrange); mend = mend.lmax().val()-1; MArray<T> out(outrange); - //readbase(out.data(), outrange, nullptr); - readbase(out.data(), sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(idx)); + readbase(out.data(), sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(fbeg)); return out; } template <typename T> template <class I, typename M> - MArray<T> SDataset<T>::read(const IndexInterface<I,M>& beg, const IndexInterface<I,M>& end) const + void SDataset<T>::read(const BIndex<T>& mbeg, const BIndex<T>& mend, + const IndexInterface<I,M>& fbeg) const { - CXZ_ASSERT(beg.dim() == mFileRange->dim(), "got index of inconsistent dimension, got" - << beg.dim() << ", expected " << mFileRange->dim()); - const RangePtr outrange = beg.prange(end); - const I mbeg(outrange); - I mend(outrange); - mend = mend.lmax().val()-1; - MArray<T> out(outrange); - //readbase(out.data(), outrange, toYptr(beg)); - //readbase(out.data(), toYptr(beg), toYptr(end), toYptr(beg)); - readbase(out.data(), sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(beg)); - return out; + T* const data = &*mbeg - mbeg.pos(); + T* const data2 = &*mend - mend.pos(); + CXZ_ASSERT(mbeg.range() == mend.range(), + "array begin index and array end index are of different range"); + CXZ_ASSERT(data == data2, + "array begin index and array end index are pointing to different arrays"); + CXZ_ASSERT(fbeg.range() == mFileRange, "file begin index is not index of file range"); + CXZ_ASSERT(mbeg.dim() == mFileRange->dim(), + "array range and file range have different dimensions (" + << mbeg.dim() << " vs " << mFileRange->dim() << ")" ); + readbase(data, sizeof(T), toYptr(mbeg), toYptr(mend), toYptr(fbeg)); } template <typename T> diff --git a/src/opt/hdf5/include/h5_dataset.h b/src/opt/hdf5/include/h5_dataset.h index 338cfb1..3c58cd3 100644 --- a/src/opt/hdf5/include/h5_dataset.h +++ b/src/opt/hdf5/include/h5_dataset.h @@ -95,6 +95,7 @@ namespace CNORXZ protected: Vector<hsize_t> mkOff(const Sptr<YIndex>& beg) const; + Sptr<YIndex> fileRangeIndex() const; RangePtr mFileRange; /**< The range of the dataset. */ hid_t mType; /**< The data type identifier. */ @@ -118,32 +119,32 @@ namespace CNORXZ */ SDataset(const String& name, const ContentBase* _parent); - /** Read the dataset. + /** Read the entire dataset and store it in newly created array. @return Array containing the dataset values. */ MArray<T> read() const; - - /** Read the dataset using range of given index. - The index position is ignored. - @param idx Index specifying the range. + + /** Read a given hypercubic subset of the dataset and store it in newly created array. + @param fbeg Index pointing to the read begin hypercube edge file position. + @param fend Index pointing to the (inclusive) read end hypercuve edge file position. @return Array containing the dataset values. */ template <class I, typename M> - MArray<T> read(const IndexInterface<I,M>& idx) const; + MArray<T> read(const IndexInterface<I,M>& fbeg, const IndexInterface<I,M>& fend) const; - /** Read a given subset of the dataset. - The subset needs to be hypercubic. - @param beg Index indicating the begin edge of the hypercube. - @param end Index indicating the end edge of the hypercube (inclusive). + /** Read a given hypercubic subset of the dataset and store it in array pointed + to by mbeg and mend. + @param mbeg Index pointing to the begin edge of the target array. + @param mend Index pointing to the (inclusive) end edge of the target array. + @param fbeg Index pointing to the begin edge within the file range. @return Array containing the dataset values. */ template <class I, typename M> - MArray<T> read(const IndexInterface<I,M>& beg, const IndexInterface<I,M>& end) const; - + void read(const BIndex<T>& mbeg, const BIndex<T>& mend, + const IndexInterface<I,M>& fbeg) const; + private: - //template <class I, typename M> - //Vector<hsize_t> mkFPos(const IndexInterface<I,M>& beg) const; template <class I, typename M> Sptr<YIndex> toYptr(const IndexInterface<I,M>& beg) const; }; diff --git a/src/opt/hdf5/lib/h5_dataset.cc b/src/opt/hdf5/lib/h5_dataset.cc index e5f9ac8..6e2a468 100644 --- a/src/opt/hdf5/lib/h5_dataset.cc +++ b/src/opt/hdf5/lib/h5_dataset.cc @@ -175,7 +175,9 @@ namespace CNORXZ const SizeT D = mFileRange->dim(); const bool todo = mbeg != nullptr; Vector<hsize_t> offset(D); + Vector<hsize_t> moffset(D);//!!! Vector<hsize_t> dims(D); + Vector<hsize_t> mdims(D);//!!! if(todo){ //RangePtr dr = mbeg->range(); //const bool parallel = true; @@ -194,8 +196,13 @@ namespace CNORXZ const SizeT bi = mbeg->pack().get(i)->lex(); const SizeT ei = mend->pack().get(i)->lex(); const SizeT fbegi = fbeg->pack().get(i)->lex(); + CXZ_ASSERT(fbegi + ei - bi < fbeg->pack().get(i)->lmax().val(), + "exceeding file range at dimension " << i << ": " + << fbegi + ei - bi << " >= " << fbeg->pack().get(i)->lmax().val()); dims[i] = ei - bi + 1; // inclusive! + mdims[i] = mbeg->pack().get(i)->range()->size(); offset[i] = fbegi; + moffset[i] = bi; } } else { @@ -205,11 +212,11 @@ namespace CNORXZ } } H5Sselect_hyperslab(mFilespace, H5S_SELECT_SET, offset.data(), NULL, dims.data(), NULL); - const hid_t mem_space_id = H5Screate_simple(static_cast<hsize_t>(dims.size()), - dims.data(), nullptr); + const hid_t mem_space_id = H5Screate_simple(static_cast<hsize_t>(mdims.size()), + mdims.data(), nullptr); const hid_t xfer_plist_id = this->mkdxpl(); - char* truedest = reinterpret_cast<char*>(dest) + tsize*mbeg->pos(); - const herr_t err = H5Dread(mId, mType, mem_space_id, mFilespace, xfer_plist_id, reinterpret_cast<void*>(truedest)); + H5Sselect_hyperslab(mem_space_id, H5S_SELECT_SET, moffset.data(), NULL, dims.data(), NULL); + const herr_t err = H5Dread(mId, mType, mem_space_id, mFilespace, xfer_plist_id, dest); CXZ_ASSERT(err >= 0, "error while reading dataset '" << mName << "', errorcode :" << err); H5Pclose(xfer_plist_id); @@ -235,6 +242,16 @@ namespace CNORXZ } return off; } - + + Sptr<YIndex> Dataset::fileRangeIndex() const + { + const SizeT D = mFileRange->dim(); + Vector<XIndexPtr> idxs(D); + for(SizeT i = 0; i != D; ++i){ + idxs[i] = xindexPtr( std::make_shared<CIndex>( mFileRange->savesub(i) ) ); + } + return yindexPtr(idxs); + } + } } diff --git a/src/opt/hdf5/tests/h5_basic_unit_test.cc b/src/opt/hdf5/tests/h5_basic_unit_test.cc index e589556..afad1eb 100644 --- a/src/opt/hdf5/tests/h5_basic_unit_test.cc +++ b/src/opt/hdf5/tests/h5_basic_unit_test.cc @@ -246,6 +246,62 @@ namespace i->ifor( operation( [](Double a, Double b) { EXPECT_EQ(a,b); }, data(i), mData(j*i) ), NoF{} )(); h5f.close(); } + + TEST_F(Group_Test, ReadDatasetPart2) + { + File h5f(mFileName, true); + h5f.open(); + auto dset = h5f.getGroup("gr2")->open().getDataset("dat1", Double{}); + //YIndex beg(dset->dataRange()); + //beg.setSub(0,2); + //YIndex end = beg - 1; + //end.setSub(0,2); + //auto data = dset->read(beg,end); + Vector<RangePtr> dranges(dset->dataRange()->dim()); + EXPECT_EQ(dranges.size(), 5u); + dranges[0] = CRangeFactory(5).create(); + dranges[1] = CRangeFactory(2).create(); + dranges[2] = CRangeFactory(5).create(); + dranges[3] = CRangeFactory(5).create(); + dranges[4] = CRangeFactory(2).create(); + const RangePtr drange = yrange(dranges); + MArray<Double> data(drange); + auto mbeg = data.begin(); + auto mend = data.begin(); + YIndex fbeg(dset->dataRange()); + Arr<SizeT,5> mbegpos = { 1,0,0,2,0 }; + Arr<SizeT,5> mendpos = { 3,1,3,4,1 }; + Arr<SizeT,5> fbegpos = { 3,1,4,2,0 }; + for(SizeT i = 0; i != 5u; ++i){ + mbeg.setSub(i,mbegpos[i]); + mend.setSub(i,mendpos[i]); + fbeg.setSub(i,fbegpos[i]); + } + dset->read(mbeg, mend, fbeg); + + EXPECT_EQ( data.range()->dim(), 5u ); + auto mi = MIndex<CIndex,CIndex,CIndex,CIndex,CIndex>(drange); + auto mj = MIndex<CIndex,CIndex,CIndex,CIndex,CIndex>(mData.range()); + for(mi = 0; mi.lex() != mi.lmax().val(); ++mi){ + auto meta = mi.meta(); + bool todo = true; + iter<0,5> + ( [&](auto mu) { + if(std::get<mu>(meta) < mbegpos[mu] or std::get<mu>(meta) > mendpos[mu]){ + todo = false; + } + std::get<mu>(meta) += fbegpos[mu] - mbegpos[mu]; + }, NoF{} ); + if(todo){ + mj.at(meta); + EXPECT_EQ(data[mi], mData[mj]); + } + else { + EXPECT_EQ(data[mi], 0); + } + } + h5f.close(); + } TEST_F(Group_Test, Read2) {