diff --git a/core/src/commonMain/kotlin/com/sunya/cdm/layout/Tiling.kt b/core/src/commonMain/kotlin/com/sunya/cdm/layout/Tiling.kt index ff105a91..30862ee0 100644 --- a/core/src/commonMain/kotlin/com/sunya/cdm/layout/Tiling.kt +++ b/core/src/commonMain/kotlin/com/sunya/cdm/layout/Tiling.kt @@ -1,6 +1,8 @@ package com.sunya.cdm.layout import com.sunya.cdm.api.computeSize +import com.sunya.cdm.api.toIntArray +import com.sunya.cdm.api.toLongArray import kotlin.math.max import kotlin.math.min @@ -83,6 +85,10 @@ class Tiling(varShape: LongArray, chunkShape: LongArray) { return order } + fun order(index: IntArray): Int { + return order(index.toLongArray()).toInt() + } + /** inverse of order() */ fun orderToIndex(order: Long) : LongArray { // calculate tile @@ -99,6 +105,10 @@ class Tiling(varShape: LongArray, chunkShape: LongArray) { return index(tile) } + fun orderToIndex(order: Int) : IntArray { + return orderToIndex(order.toLong()).toIntArray() + } + /** * Create an ordering of index points based on which tile the point is in. * diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt index 78772ac8..cc9ea87d 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt @@ -2,6 +2,7 @@ package com.sunya.netchdf.hdf5 +import com.sunya.cdm.api.toIntArray import com.sunya.cdm.iosp.OpenFileState import com.sunya.cdm.layout.Tiling import com.sunya.cdm.util.InternalLibraryApi @@ -24,11 +25,11 @@ internal class BTree1data( } // if other layouts like BTree2data had this interface we could use in chunkConcurrent - override fun asSequence(): Sequence = sequence { + override fun asSequence(): Sequence = sequence { repeat( tiling.nelems) { //val startingIndex = tiling.orderToIndex(it.toLong()) //val indexSpace = IndexSpace(startingIndex, tiling.chunk) - yield(findDataChunk(it) ?: missingDataChunk(it)) + yield(findDataChunk(it) ?: missingDataChunk(it, tiling)) } } @@ -41,7 +42,7 @@ internal class BTree1data( var level: Int = 0 var nentries: Int = 0 - val keyValues = mutableListOf>() // tile order to DataChunk + val dataChunks = mutableListOf() // tile order to DataChunk val children = mutableListOf() var lastOrder : Int = 0 @@ -63,12 +64,13 @@ internal class BTree1data( repeat(nentries) { val chunkSize = raf.readInt(state) val filterMask = raf.readInt(state) - val inner = LongArray(ndimStorage) { j -> raf.readLong(state) } - val order = tiling.order(inner).toInt() - val key = DataChunkKey(order, chunkSize, filterMask) + val chunkOffset = LongArray(ndimStorage) { j -> raf.readLong(state) } + val order = tiling.order(chunkOffset).toInt() val childPointer = raf.readAddress(state) // 4 or 8 bytes, then add fileOffset if (level == 0) { - keyValues.add(Pair(order, DataChunk(key, childPointer))) + // data class DataChunk(val address: Long, val size: Int, val chunkOffset: IntArray, val filterMask: Int?, val order: Int, val tiling: Tiling?=null) { + val dataChunk = DataChunk(childPointer, chunkSize, chunkOffset.toIntArray(), filterMask, order, tiling) + dataChunks.add(dataChunk) lastOrder = order } else { children.add( BTreeNode(childPointer, this) ) @@ -91,8 +93,7 @@ internal class BTree1data( return childNode.findDataChunk(wantOrder) } } else { // If it's a leaf node (no children) - val kv = keyValues.find { it.first == wantOrder } - return kv?.second + return dataChunks.find { it.order == wantOrder } } return null } @@ -102,24 +103,5 @@ internal class BTree1data( } } - - data class DataChunkKey(val order: Int, val chunkSize: Int, val filterMask : Int) - - inner class DataChunk(val key : DataChunkKey, val childAddress : Long) : DataChunkIF { - override fun childAddress() = childAddress - override fun offsets() = tiling.orderToIndex(key.order.toLong()) - override fun isMissing() = (childAddress <= 0L) // may be 0 or -1 - override fun chunkSize() = key.chunkSize - override fun filterMask() = key.filterMask - override fun show() = show(tiling) - - fun show(tiling : Tiling) : String = "order=$key, chunkSize=${key.chunkSize}, chunkStart=${offsets().contentToString()}" + - ", tile= ${tiling.tile(offsets() ).contentToString()}" - - } - - fun missingDataChunk(order: Int) : DataChunk { - return DataChunk(DataChunkKey(order, 0, 0), -1L) - } } diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree2data.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree2data.kt index dde01ce5..68ccfb51 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree2data.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree2data.kt @@ -2,7 +2,6 @@ package com.sunya.netchdf.hdf5 import com.sunya.cdm.api.computeSize import com.sunya.cdm.api.toIntArray -import com.sunya.cdm.api.toLongArray import com.sunya.cdm.iosp.OpenFileState import com.sunya.cdm.layout.Tiling @@ -57,17 +56,16 @@ internal class BTree2data( rootNode = BTreeNode(rootNodeAddress, treeDepth, numberOfRecordsInRoot, totalNumberOfRecordsInTree, null) } - override fun asSequence(): Sequence = sequence { + override fun asSequence(): Sequence = sequence { repeat( tiling.nelems) { - //val startingIndex = tiling.orderToIndex(it.toLong()) - //val indexSpace = IndexSpace(startingIndex, tiling.chunk) - yield(findDataChunk(it) ?: missingDataChunk(it)) + val result = findDataChunk(it) ?: missingDataChunk(it, tiling) + yield(result) } } - fun chunkIterator(): Iterator = asSequence().iterator() + fun chunkIterator(): Iterator = asSequence().iterator() - internal fun findDataChunk(order: Int): DataChunkIF? { + internal fun findDataChunk(order: Int): DataChunk? { return rootNode.findDataChunk(order) } @@ -75,7 +73,7 @@ internal class BTree2data( var level: Int = 0 var nentries: Int = 0 - val keyValues = mutableListOf>() // tile order to DataChunk + val dataChunks = mutableListOf() // tile order to DataChunk val children = mutableListOf() var lastOrder : Int = 0 @@ -99,10 +97,9 @@ internal class BTree2data( // dataChunks repeat(numberOfRecords) { - val chunkImpl = readRecord(state, nodeType) - val order = tiling.order(chunkImpl.chunkOffset.toLongArray()).toInt() - keyValues.add(Pair(order, chunkImpl)) - lastOrder = order + val dataChunk = readRecord(state, nodeType) + dataChunks.add(dataChunk) + lastOrder = dataChunk.order } // children @@ -128,16 +125,20 @@ internal class BTree2data( } // uses a tree search = O(log n) - fun findDataChunk(wantOrder: Int): DataChunkIF? { + // this algo assume you dont have xised noted, not true + fun findDataChunk(wantOrder: Int): DataChunk? { + if (dataChunks.isNotEmpty()) { + val result = dataChunks.find { it.order == wantOrder } + if (result != null) return result + } if (children.isNotEmpty()) { // search tree; assumes that chunks are ordered children.forEach { childNode -> if (wantOrder <= childNode.lastOrder) return childNode.findDataChunk(wantOrder) } - } else { // If it's a leaf node (no children) - val kv = keyValues.find { it.first == wantOrder } - return kv?.second - } + } //else { // If it's a leaf node (no children) + // return dataChunks.find { it.order == wantOrder } + //} return null } @@ -147,7 +148,7 @@ internal class BTree2data( } // BTreeNode - fun readRecord(state: OpenFileState, type: Int): ChunkImpl { + fun readRecord(state: OpenFileState, type: Int): DataChunk { return when (type) { 10 -> readRecord10(state, chunkShape.toIntArray(), chunkSize.toInt()) 11 -> readRecord11(state, chunkShape.toIntArray() ) @@ -156,7 +157,7 @@ internal class BTree2data( } // Type 10 Record Layout - Non-filtered Dataset Chunks - fun readRecord10(state: OpenFileState, dims : IntArray, chunkSize: Int): ChunkImpl { + fun readRecord10(state: OpenFileState, dims : IntArray, chunkSize: Int): DataChunk { val address = raf.readOffset(state) // This field is the scaled offset of the chunk within the dataset. n is the number of dimensions for the dataset. @@ -169,13 +170,13 @@ internal class BTree2data( // for (int i = 0; i < chunkOffset.length; i++) { // chunkOffset[i] = Utils.readBytesAsUnsignedInt(buffer, 8) * datasetInfo.getChunkDimensions()[i]; // } - val chunkOffset = scaledOffset.mapIndexed { idx, scaledOffset -> (scaledOffset * dims[idx]).toInt() } + val chunkOffset = scaledOffset.mapIndexed { idx, scaledOffset -> (scaledOffset * dims[idx]).toInt() }.toIntArray() - return ChunkImpl(address, chunkSize, chunkOffset.toIntArray(), null, tiling) + return DataChunk(address, chunkSize, chunkOffset, null, tiling.order(chunkOffset), tiling) } // Type 11 Record Layout - Filtered Dataset Chunks - fun readRecord11(state: OpenFileState, dims : IntArray): ChunkImpl { + fun readRecord11(state: OpenFileState, dims : IntArray): DataChunk { val address = raf.readOffset(state) // LOOK variable size based on what? "Chunk Size (variable size; at most 8 bytes)" @@ -200,14 +201,10 @@ internal class BTree2data( // for (int i = 0; i < chunkOffset.length; i++) { // chunkOffset[i] = Utils.readBytesAsUnsignedInt(buffer, 8) * datasetInfo.getChunkDimensions()[i]; // } - val chunkOffset = scaledOffset.mapIndexed { idx, scaledOffset -> (scaledOffset * dims[idx]).toInt() } + val chunkOffset = scaledOffset.mapIndexed { idx, scaledOffset -> (scaledOffset * dims[idx]).toInt() }.toIntArray() // ChunkImpl(val address: Long, val size: Int, val chunkOffset: IntArray, val filterMask: Int?) - return ChunkImpl(address, chunkSize, chunkOffset.toIntArray(), filterMask, tiling) - } - - fun missingDataChunk(order: Int) : ChunkImpl { - return ChunkImpl(-1, 0, tiling.orderToIndex(order.toLong()).toIntArray(), 0, tiling) + return DataChunk(address, chunkSize, chunkOffset, filterMask, tiling.order(chunkOffset), tiling) } } diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/ChunkedDataLayoutV4.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/ChunkedDataLayoutV4.kt index 765d33dd..a25cc2fa 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/ChunkedDataLayoutV4.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/ChunkedDataLayoutV4.kt @@ -3,12 +3,12 @@ package com.sunya.netchdf.hdf5 import com.sunya.cdm.api.computeSize -import com.sunya.cdm.api.toLongArray import com.sunya.cdm.iosp.OpenFileIF import com.sunya.cdm.iosp.OpenFileState import com.sunya.cdm.layout.Tiling import com.sunya.cdm.util.InternalLibraryApi import io.github.oshai.kotlinlogging.KotlinLogging +import kotlin.Long import kotlin.math.ceil // DataLayoutMessage version 4, layout class 2 (chunked), chunkIndexingType 1-5 @@ -81,7 +81,7 @@ internal class FixedArrayIndex(val h5: H5builder, val varShape: IntArray, val md val dataAddress: Long val state = OpenFileState(h5.getFileOffset(mdl.indexAddress), false) - val chunks = mutableListOf() + val chunks = mutableListOf() init { val raf = h5.raf @@ -200,7 +200,7 @@ internal class FixedArrayIndex(val h5: H5builder, val varShape: IntArray, val md val filterMask = raf.readInt(state) // java.util.BitSet = java.util.BitSet.valueOf(byteArrayOf(bb.get(), bb.get(), bb.get(), bb.get())) val chunkOffset: IntArray = chunkIndexToChunkOffset(chunkIndex, chunkDimensions, varShape) - chunks.add(ChunkImpl(chunkAddress, chunkSizeInBytes, chunkOffset, filterMask)) + chunks.add(makeDataChunk(chunkAddress, chunkSizeInBytes, chunkOffset, filterMask)) } fun readUnfiltered(raf: OpenFileIF, state : OpenFileState, chunkIndex: Int) { @@ -208,10 +208,10 @@ internal class FixedArrayIndex(val h5: H5builder, val varShape: IntArray, val md val chunkOffset: IntArray = chunkIndexToChunkOffset(chunkIndex, chunkDimensions, varShape) val unfilteredChunkSize = mdl.chunkDimensions.computeSize() - chunks.add(ChunkImpl(chunkAddress, unfilteredChunkSize, chunkOffset, null)) + chunks.add(makeDataChunk(chunkAddress, unfilteredChunkSize, chunkOffset, 0, null)) } - fun chunkIterator() : Iterator = chunks.iterator() + fun chunkIterator() : Iterator = chunks.iterator() companion object { val logger = KotlinLogging.logger("ChunkedDataLayoutMessageV4") @@ -224,16 +224,16 @@ internal class ImplicitChunkIndex(val h5: H5builder, val varShape: IntArray, val val chunkDimensions = IntArray(mdl.chunkDimensions.size - 1) { mdl.chunkDimensions[it] } // remove the element "dimension" var chunkSize = mdl.chunkDimensions.computeSize() - fun getAllChunks(): List { + fun getAllChunks(): List { val totalChunks: Int = totalChunks(varShape, chunkDimensions) - val chunks = mutableListOf() + val chunks = mutableListOf() for (i in 0..< totalChunks) { chunks.add( - ChunkImpl( + makeDataChunk( mdl.address + i * chunkSize, chunkSize, chunkIndexToChunkOffset(i, chunkDimensions, varShape), - null) + 0, null) ) } return chunks @@ -250,7 +250,7 @@ internal class ImplicitChunkIndex(val h5: H5builder, val varShape: IntArray, val return chunks } - fun chunkIterator() : Iterator = getAllChunks().iterator() + fun chunkIterator() : Iterator = getAllChunks().iterator() } @@ -275,26 +275,6 @@ fun chunkIndexToChunkOffset(chunkIndex: Int, chunkDimensions: IntArray, datasetD } //////////////////////////////////////////////////// -data class ChunkImpl(val address: Long, val size: Int, val chunkOffset: IntArray, val filterMask: Int?, val tiling: Tiling?=null): DataChunkIF { - override fun toString(): String { - return "ChunkImpl(address=$address, size=$size, chunkOffset=${chunkOffset.contentToString()}, filterMask=$filterMask)" - } - - override fun childAddress() = address - - override fun offsets() = chunkOffset.toLongArray() - - override fun isMissing() = address <= 0 - - override fun chunkSize() = size - override fun filterMask() = filterMask ?: 0 - - override fun show(): String { - return if (tiling != null) { - "address=$address, chunkSize=${size}, chunkStart=${offsets().contentToString()}, tile= ${tiling.tile(offsets() ).contentToString()}" - } else { - "TODO(Not yet implemented)" - } - } -} \ No newline at end of file +fun makeDataChunk(address: Long, size: Int, chunkOffset: IntArray, filterMask: Int, tiling: Tiling?=null) = + DataChunk( address, size, chunkOffset, filterMask, 0, tiling) diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/DataChunkSequence.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/DataChunkSequence.kt index c20834e1..74110263 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/DataChunkSequence.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/DataChunkSequence.kt @@ -1,15 +1,18 @@ package com.sunya.netchdf.hdf5 +import com.sunya.cdm.api.toLongArray +import com.sunya.cdm.layout.Tiling + interface DataChunkSequence { - fun asSequence(): Sequence + fun asSequence(): Sequence } -interface DataChunkIF { - fun childAddress(): Long - fun offsets(): LongArray - fun isMissing(): Boolean - fun chunkSize(): Int - fun filterMask(): Int +data class DataChunk(val address: Long, val size: Int, val offsets: IntArray, val filterMask: Int?, val order: Int, val tiling: Tiling?) { + fun isMissing() = (address <= 0) + fun show() : String = "order=$order, chunkSize=${size}, chunkStart=${offsets.contentToString()}" + + ", tile= ${tiling?.tile(offsets.toLongArray()).contentToString()}" +} - fun show(): String +fun missingDataChunk(order: Int, tiling: Tiling) : DataChunk { + return DataChunk(-1, 0, tiling.orderToIndex(order), 0, order, tiling) } \ No newline at end of file diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/ExtensibleArrayIndex.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/ExtensibleArrayIndex.kt index f0074d79..af023226 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/ExtensibleArrayIndex.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/ExtensibleArrayIndex.kt @@ -34,7 +34,7 @@ class ExtensibleArrayIndex(val h5: H5builder, address: Long, datasetDimensions: private var elementCounter = 0 - val chunks: MutableList + val chunks: MutableList init { this.headerAddress = address @@ -81,7 +81,7 @@ class ExtensibleArrayIndex(val h5: H5builder, address: Long, datasetDimensions: dataBlockSize = h5.readLength(state).toInt() val maxIndexSet: Int = h5.readLength(state).toInt() - chunks = ArrayList(maxIndexSet) + chunks = mutableListOf() numberOfElements = h5.readLength(state).toInt() @@ -262,9 +262,9 @@ class ExtensibleArrayIndex(val h5: H5builder, address: Long, datasetDimensions: if (filtered) { val chunkSizeInBytes: Int = h5.readVariableSizeUnsigned(state, extensibleArrayElementSize - h5.sizeOffsets - 4).toInt() val filterMask = raf.readInt(state) - chunks.add(ChunkImpl(chunkAddress, chunkSizeInBytes, chunkOffset, filterMask)) + chunks.add(makeDataChunk(chunkAddress, chunkSizeInBytes, chunkOffset, filterMask)) } else { - chunks.add( ChunkImpl(chunkAddress, unfilteredChunkSize, chunkOffset, null)) + chunks.add( makeDataChunk(chunkAddress, unfilteredChunkSize, chunkOffset, 0, null)) } elementCounter++ return true @@ -328,6 +328,6 @@ class ExtensibleArrayIndex(val h5: H5builder, address: Long, datasetDimensions: } } - fun chunkIterator() : Iterator = chunks.iterator() + fun chunkIterator() : Iterator = chunks.iterator() } \ No newline at end of file diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkConcurrent.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkConcurrent.kt index 13564728..26232fa2 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkConcurrent.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkConcurrent.kt @@ -44,8 +44,10 @@ class H5chunkConcurrent(val h5: H5builder, val v2: Variable, wantSection: if (vinfo.mdl is DataLayoutBTreeVer1) { val mdl = vinfo.mdl chunks = BTree1data(rafext, mdl.btreeAddress, varShape, mdl.chunkDims.toLongArray()) + } else if (vinfo.mdl is DataLayoutBtreeVer2) { + chunks = BTree2data(rafext, v2.name, vinfo.dataPos, v2.shape, vinfo.storageDims) } else { - throw RuntimeException() + throw RuntimeException("H5chunkConcurrent cant read ${vinfo.mdl.javaClass.simpleName}") } } @@ -69,7 +71,7 @@ class H5chunkConcurrent(val h5: H5builder, val v2: Variable, wantSection: } private var count = 0 - private fun CoroutineScope.produceChunks(producer: Sequence): ReceiveChannel = + private fun CoroutineScope.produceChunks(producer: Sequence): ReceiveChannel = produce { for (dataChunk in producer) { send(dataChunk) @@ -81,10 +83,10 @@ class H5chunkConcurrent(val h5: H5builder, val v2: Variable, wantSection: private fun CoroutineScope.launchJob( worker: Worker, - input: ReceiveChannel, + input: ReceiveChannel, lamda: (ArraySection) -> Unit, ) = launch(Dispatchers.Default) { - for (chunk: DataChunkIF in input) { + for (chunk: DataChunk in input) { val arraySection = worker.work(chunk) if (arraySection != null) lamda(arraySection) yield() @@ -111,8 +113,8 @@ class H5chunkConcurrent(val h5: H5builder, val v2: Variable, wantSection: state = OpenFileState(0L, h5type.isBE) } - fun work(dataChunk : DataChunkIF) : ArraySection? { - val dataSpace = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims) + fun work(dataChunk : DataChunk) : ArraySection? { + val dataSpace = IndexSpace(v2.rank, dataChunk.offsets.toLongArray(), vinfo.storageDims) if (!allData && !wantSpace.intersects(dataSpace)) { return null } @@ -128,9 +130,9 @@ class H5chunkConcurrent(val h5: H5builder, val v2: Variable, wantSection: bbmissing } else { if (debugChunking) println(" chunkIterator=${dataChunk.show()}") - state.pos = dataChunk.childAddress() - val rawdata = rafext.readByteArray(state, dataChunk.chunkSize()) - val filteredData = if (dataChunk.filterMask() == null) rawdata else filters.apply(rawdata, dataChunk.filterMask()!!) + state.pos = dataChunk.address + val rawdata = rafext.readByteArray(state, dataChunk.size) + val filteredData = if (dataChunk.filterMask == null) rawdata else filters.apply(rawdata, dataChunk.filterMask) if (useEntireChunk) { filteredData } else { diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt index abfa58a4..1adf5600 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt @@ -14,7 +14,7 @@ import kotlin.collections.iterator private val debugChunking = false // DataLayoutSingleChunk4, DataLayoutImplicit4, DataLayoutFixedArray4, DataLayoutExtensibleArray4, DataLayoutBtreeVer2 -internal fun H5builder.readChunkedData(v2: Variable, wantSection: Section, index: Iterator): ArrayTyped { +internal fun H5builder.readChunkedData(v2: Variable, wantSection: Section, index: Iterator): ArrayTyped { val vinfo = v2.spObject as DataContainerVariable val h5type = vinfo.h5type @@ -35,8 +35,8 @@ internal fun H5builder.readChunkedData(v2: Variable, wantSection: Section val state = OpenFileState(0L, vinfo.h5type.isBE) // just run through all the chunks, we wont read any that we dont need - for (dataChunk: DataChunkIF in index) { - val dataSection = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims) + for (dataChunk: DataChunk in index) { + val dataSection = IndexSpace(v2.rank, dataChunk.offsets.toLongArray(), vinfo.storageDims) val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration if (chunker.nelems > 0) { if (dataChunk.isMissing()) { @@ -44,10 +44,10 @@ internal fun H5builder.readChunkedData(v2: Variable, wantSection: Section chunker.transferMissing(vinfo.fillValue, elemSize, ba) } else { // println(dataChunk.show()) - state.pos = dataChunk.childAddress() - val rawdata = this.raf.readByteArray(state, dataChunk.chunkSize()) - val filteredData = if (vinfo.mfp == null || dataChunk.filterMask() == null) rawdata - else filters.apply(rawdata, dataChunk.filterMask()) + state.pos = dataChunk.address + val rawdata = this.raf.readByteArray(state, dataChunk.size) + val filteredData = if (vinfo.mfp == null || dataChunk.filterMask == null) rawdata + else filters.apply(rawdata, dataChunk.filterMask) chunker.transferBA(filteredData, 0, elemSize, ba, 0) } } @@ -88,7 +88,7 @@ internal fun H5builder.readBtreeVer1(v2: Variable, wantSection: Section): var transferChunks = 0 val state = OpenFileState(0L, vinfo.h5type.isBE) - for (dataChunk: DataChunkIF in tiledData.dataChunks(wantSpace)) { // : Iterable + for (dataChunk: DataChunk in tiledData.dataChunks(wantSpace)) { // : Iterable val dataSection = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims) val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration if (dataChunk.isMissing()) { @@ -156,17 +156,17 @@ internal fun H5builder.readBtree1data(v2: Variable, wantSection: Section) var transferChunks = 0 val state = OpenFileState(0L, vinfo.h5type.isBE) btree1.asSequence().forEach { dataChunk -> - val dataSection = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims) + val dataSection = IndexSpace(v2.rank, dataChunk.offsets.toLongArray(), vinfo.storageDims) val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration if (dataChunk.isMissing()) { if (debugChunking) println(" missing ${dataChunk.show()}") chunker.transferMissing(vinfo.fillValue, elemSize, ba) } else { if (debugChunking) println(" chunk=${dataChunk.show()}") - state.pos = dataChunk.childAddress() - val chunkData = this.raf.readByteArray(state, dataChunk.chunkSize()) - val filteredData = if (dataChunk.filterMask() == null) chunkData - else filters.apply(chunkData, dataChunk.filterMask()!!) + state.pos = dataChunk.address + val chunkData = this.raf.readByteArray(state, dataChunk.size) + val filteredData = if (dataChunk.filterMask == null) chunkData + else filters.apply(chunkData, dataChunk.filterMask) chunker.transferBA(filteredData, 0, elemSize, ba, 0) transferChunks += chunker.transferChunks } @@ -182,7 +182,7 @@ internal fun H5builder.readBtree1data(v2: Variable, wantSection: Section) } // DataLayoutBTreeVer1 using chunkIterator -internal fun readBtree1dataWithChunkIterator(hdf5: Hdf5File, v2: Variable, wantSection: SectionPartial?): ArrayTyped { +internal fun readBtreeWithChunkIterator(hdf5: Hdf5File, v2: Variable, wantSection: SectionPartial?): ArrayTyped { val vinfo = v2.spObject as DataContainerVariable val datatype = vinfo.h5type.datatype() diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5maxIterator.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5maxIterator.kt index 5241545b..3b177bd4 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5maxIterator.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5maxIterator.kt @@ -18,7 +18,7 @@ internal class H5maxIterator(val h5: Hdf5File, val v2: Variable, val wantS if (debugChunking) println(" chunk=${indexSection}") val section = indexSection.section(v2.shape) - val array = h5.readArrayData(v2, SectionPartial( section.ranges)) + val array = h5.readArrayData(v2, SectionPartial( section.ranges), recurse = true) setNext(ArraySection(array, section)) } else { done() diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt index 84995f63..21594e72 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt @@ -48,6 +48,10 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf { } override fun readArrayData(v2: Variable, wantSection: SectionPartial?): ArrayTyped { + return readArrayData(v2, wantSection, recurse = false) + } + + fun readArrayData(v2: Variable, wantSection: SectionPartial?, recurse: Boolean): ArrayTyped { if (v2.nelems == 0L) { return ArrayEmpty(v2.shape.toIntArray(), v2.datatype) } @@ -79,17 +83,17 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf { } else if (vinfo.mdl is DataLayoutBTreeVer1) { // skip the concurrent read on the hard stuff - if (v2.datatype == Datatype.CHAR || v2.datatype == Datatype.COMPOUND || v2.datatype == Datatype.OPAQUE || - v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN) + if ( recurse || (v2.datatype == Datatype.CHAR || v2.datatype == Datatype.COMPOUND || v2.datatype == Datatype.OPAQUE || + v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN)) header.readBtree1data(v2, section) else - readBtree1dataWithChunkIterator(this, v2, wantSection) + readBtreeWithChunkIterator(this, v2, wantSection) } else if (vinfo.mdl is DataLayoutSingleChunk4) { // header.readSingleChunk(v2, wantSection) // internal data class DataLayoutSingleChunk4(val flags: Byte, val chunkDimensions: IntArray, val chunkSize: Int, val heapAddress: Long, val filterMask: Int?) : DataLayoutMessage() { val offset = IntArray(v2.rank) - val chunk = ChunkImpl(vinfo.mdl.heapAddress, vinfo.mdl.chunkSize, offset, vinfo.mdl.filterMask) + val chunk = makeDataChunk(vinfo.mdl.heapAddress, vinfo.mdl.chunkSize, offset, vinfo.mdl.filterMask?: 0) header.readChunkedData(v2, section, listOf(chunk).iterator()) @@ -109,9 +113,15 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf { header.readChunkedData(v2, section, index.chunkIterator()) } else if (vinfo.mdl is DataLayoutBtreeVer2) { - // header.readBtreeVer2j(v2, wantSection) - val index = BTree2data(header.makeFileExtended(), v2.name, vinfo.dataPos, v2.shape, vinfo.storageDims) - header.readChunkedData(v2, section, index.chunkIterator()) + // skip the concurrent reading on the hard stuff + if ( recurse || (v2.datatype == Datatype.CHAR || v2.datatype == Datatype.COMPOUND || v2.datatype == Datatype.OPAQUE || + v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN)) { + val index = BTree2data(header.makeFileExtended(), v2.name, vinfo.dataPos, v2.shape, vinfo.storageDims) + header.readChunkedData(v2, section, index.chunkIterator()) + // header.readBtree1data(v2, section) + } else { + readBtreeWithChunkIterator(this, v2, wantSection) + } } else { throw RuntimeException("Unsupported data layer type ${vinfo.mdl}") @@ -137,8 +147,8 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf { } } - // TODO can we use concurrent reading ?? - return if (this.layoutName(v2) == "DataLayoutBTreeVer1") { + val layoutName = this.layoutName(v2) + return if (layoutName == "DataLayoutBTreeVer1" || layoutName == "DataLayoutBtreeVer2") { // H5chunkIterator(header, v2, wantSection) H5chunkIterator2(this, v2, wantSection) } else { diff --git a/testfiles/src/test/kotlin/com/sunya/netchdf/jhdf/JhdfCompare.kt b/testfiles/src/test/kotlin/com/sunya/netchdf/jhdf/JhdfCompare.kt index 2098ca3a..26f580ed 100644 --- a/testfiles/src/test/kotlin/com/sunya/netchdf/jhdf/JhdfCompare.kt +++ b/testfiles/src/test/kotlin/com/sunya/netchdf/jhdf/JhdfCompare.kt @@ -44,6 +44,11 @@ class JhdfCompare { compareDataWithJhdf("../core/src/commonTest/data/jhdf/chunked_v4_datasets.hdf5", "/btree_v2/large_int16", true, true) } + @Test + fun problem() { + compareDataWithJhdf("../core/src/commonTest/data/jhdf/100B_max_dimension_size.hdf5", "100B-MaxSize", true, true) + } + // @Test horror show fun superblocks() { compareDataWithJhdf(testData + "netcdf-c_hdf5_superblocks/netcdf-c-test-files/v1_8/nc_test4__tst_xplatform2_3.nc", null, true, true)