diff --git a/autotest/ogr/ogr_shape_sbn.py b/autotest/ogr/ogr_shape_sbn.py index 6fe5cd0ca991..f528308b6e6e 100755 --- a/autotest/ogr/ogr_shape_sbn.py +++ b/autotest/ogr/ogr_shape_sbn.py @@ -142,3 +142,22 @@ def test_ogr_shape_sbn_2(): ds = ogr.Open("data/shp/CoHI_GCS12.shp") lyr = ds.GetLayer(0) return search_all_features(lyr) + + +############################################################################### +# Test bugfix for https://github.com/OSGeo/gdal/issues/9430 + + +@pytest.mark.require_curl() +def test_ogr_shape_sbn_out_of_order_bin_start(): + + srv = "https://github.com/OSGeo/gdal-test-datasets/raw/master/shapefile/65sv5l285i_GLWD_level2/README.TXT" + if gdaltest.gdalurlopen(srv, timeout=5) is None: + pytest.skip(reason=f"{srv} is down") + + ds = ogr.Open( + "/vsizip//vsicurl/https://github.com/OSGeo/gdal-test-datasets/raw/master/shapefile/65sv5l285i_GLWD_level2/65sv5l285i_GLWD_level2_sozip.zip" + ) + lyr = ds.GetLayer(0) + lyr.SetSpatialFilterRect(5, 5, 6, 6) + assert lyr.GetFeatureCount() == 13 diff --git a/ogr/ogrsf_frmts/shape/sbnsearch.c b/ogr/ogrsf_frmts/shape/sbnsearch.c index ca95446da305..ef94f6491f75 100644 --- a/ogr/ogrsf_frmts/shape/sbnsearch.c +++ b/ogr/ogrsf_frmts/shape/sbnsearch.c @@ -134,6 +134,24 @@ typedef struct #endif } SearchStruct; +/* Associates a node id with the index of its first bin */ +typedef struct +{ + int nNodeId; + int nBinStart; +} SBNNodeIdBinStartPair; + +/************************************************************************/ +/* SBNCompareNodeIdBinStartPairs() */ +/************************************************************************/ + +/* helper for qsort, to sort SBNNodeIdBinStartPair by increasing nBinStart */ +static int SBNCompareNodeIdBinStartPairs(const void *a, const void *b) +{ + return STATIC_CAST(const SBNNodeIdBinStartPair *, a)->nBinStart - + STATIC_CAST(const SBNNodeIdBinStartPair *, b)->nBinStart; +} + /************************************************************************/ /* SwapWord() */ /* */ @@ -318,54 +336,143 @@ SBNSearchHandle SBNOpenDiskTree(const char *pszSBNFilename, SAHooks *psHooks) hSBN->pasNodeDescriptor = pasNodeDescriptor; + SBNNodeIdBinStartPair *pasNodeIdBinStartPairs = + STATIC_CAST(SBNNodeIdBinStartPair *, + malloc(nNodeDescCount * sizeof(SBNNodeIdBinStartPair))); + if (pasNodeIdBinStartPairs == SHPLIB_NULLPTR) + { + free(pabyData); + hSBN->sHooks.Error("Out of memory error"); + SBNCloseDiskTree(hSBN); + return SHPLIB_NULLPTR; + } + +#ifdef ENABLE_SBN_SANITY_CHECKS + int nShapeCountAcc = 0; +#endif + int nEntriesInNodeIdBinStartPairs = 0; for (int i = 0; i < nNodeDescCount; i++) { - /* -------------------------------------------------------------------- - */ - /* Each node descriptor contains the index of the first bin that */ - /* described it, and the number of shapes in this first bin and */ - /* the following bins (in the relevant case). */ - /* -------------------------------------------------------------------- - */ - int nBinStart = READ_MSB_INT(pabyData + 8 * i); - int nNodeShapeCount = READ_MSB_INT(pabyData + 8 * i + 4); + /* -------------------------------------------------------------------- */ + /* Each node descriptor contains the index of the first bin that */ + /* described it, and the number of shapes in this first bin and */ + /* the following bins (in the relevant case). */ + /* -------------------------------------------------------------------- */ + const int nBinStart = READ_MSB_INT(pabyData + 8 * i); + const int nNodeShapeCount = READ_MSB_INT(pabyData + 8 * i + 4); pasNodeDescriptor[i].nBinStart = nBinStart > 0 ? nBinStart : 0; pasNodeDescriptor[i].nShapeCount = nNodeShapeCount; +#ifdef DEBUG_SBN + fprintf(stderr, "node[%d], nBinStart=%d, nShapeCount=%d\n", i, + nBinStart, nNodeShapeCount); +#endif + if ((nBinStart > 0 && nNodeShapeCount == 0) || nNodeShapeCount < 0 || nNodeShapeCount > nShapeCount) { hSBN->sHooks.Error("Inconsistent shape count in bin"); + free(pabyData); + free(pasNodeIdBinStartPairs); + SBNCloseDiskTree(hSBN); + return SHPLIB_NULLPTR; + } + +#ifdef ENABLE_SBN_SANITY_CHECKS + if (nShapeCountAcc > nShapeCount - nNodeShapeCount) + { + hSBN->sHooks.Error("Inconsistent shape count in bin"); + free(pabyData); + free(pasNodeIdBinStartPairs); SBNCloseDiskTree(hSBN); return SHPLIB_NULLPTR; } + nShapeCountAcc += nNodeShapeCount; +#endif + + if (nBinStart > 0) + { + pasNodeIdBinStartPairs[nEntriesInNodeIdBinStartPairs].nNodeId = i; + pasNodeIdBinStartPairs[nEntriesInNodeIdBinStartPairs].nBinStart = + nBinStart; + ++nEntriesInNodeIdBinStartPairs; + } } free(pabyData); /* pabyData = SHPLIB_NULLPTR; */ - /* Locate first non-empty node */ - int nCurNode = 0; - while (nCurNode < nMaxNodes && pasNodeDescriptor[nCurNode].nBinStart <= 0) - nCurNode++; - - if (nCurNode >= nMaxNodes) + if (nEntriesInNodeIdBinStartPairs == 0) { + free(pasNodeIdBinStartPairs); hSBN->sHooks.Error("All nodes are empty"); SBNCloseDiskTree(hSBN); return SHPLIB_NULLPTR; } - pasNodeDescriptor[nCurNode].nBinOffset = - STATIC_CAST(int, hSBN->sHooks.FTell(hSBN->fpSBN)); +#ifdef ENABLE_SBN_SANITY_CHECKS + if (nShapeCountAcc != nShapeCount) + { + /* Not totally sure if the above condition is always true */ + /* Not enabled by default, as non-needed for the good working */ + /* of our code. */ + free(pasNodeIdBinStartPairs); + char szMessage[128]; + snprintf(szMessage, sizeof(szMessage), + "Inconsistent shape count read in .sbn header (%d) vs total " + "of shapes over nodes (%d)", + nShapeCount, nShapeCountAcc); + hSBN->sHooks.Error(szMessage); + SBNCloseDiskTree(hSBN); + return SHPLIB_NULLPTR; + } +#endif + + /* Sort node descriptors by increasing nBinStart */ + /* In most cases, the node descriptors have already an increasing nBinStart, + * but not for https://github.com/OSGeo/gdal/issues/9430 */ + qsort(pasNodeIdBinStartPairs, nEntriesInNodeIdBinStartPairs, + sizeof(SBNNodeIdBinStartPair), SBNCompareNodeIdBinStartPairs); - /* Compute the index of the next non empty node. */ - int nNextNonEmptyNode = nCurNode + 1; - while (nNextNonEmptyNode < nMaxNodes && - pasNodeDescriptor[nNextNonEmptyNode].nBinStart <= 0) - nNextNonEmptyNode++; + /* Consistency check: the first referenced nBinStart should be 2. */ + if (pasNodeIdBinStartPairs[0].nBinStart != 2) + { + char szMessage[128]; + snprintf(szMessage, sizeof(szMessage), + "First referenced bin (by node %d) should be 2, but %d found", + pasNodeIdBinStartPairs[0].nNodeId, + pasNodeIdBinStartPairs[0].nBinStart); + hSBN->sHooks.Error(szMessage); + SBNCloseDiskTree(hSBN); + free(pasNodeIdBinStartPairs); + return SHPLIB_NULLPTR; + } + + /* And referenced nBinStart should be all distinct. */ + for (int i = 1; i < nEntriesInNodeIdBinStartPairs; ++i) + { + if (pasNodeIdBinStartPairs[i].nBinStart == + pasNodeIdBinStartPairs[i - 1].nBinStart) + { + char szMessage[128]; + snprintf(szMessage, sizeof(szMessage), + "Node %d and %d have the same nBinStart=%d", + pasNodeIdBinStartPairs[i - 1].nNodeId, + pasNodeIdBinStartPairs[i].nNodeId, + pasNodeIdBinStartPairs[i].nBinStart); + hSBN->sHooks.Error(szMessage); + SBNCloseDiskTree(hSBN); + free(pasNodeIdBinStartPairs); + return SHPLIB_NULLPTR; + } + } int nExpectedBinId = 1; + int nIdxInNodeBinPair = 0; + int nCurNode = pasNodeIdBinStartPairs[nIdxInNodeBinPair].nNodeId; + + pasNodeDescriptor[nCurNode].nBinOffset = + STATIC_CAST(int, hSBN->sHooks.FTell(hSBN->fpSBN)); /* -------------------------------------------------------------------- */ /* Traverse bins to compute the offset of the first bin of each */ @@ -382,10 +489,22 @@ SBNSearchHandle SBNOpenDiskTree(const char *pszSBNFilename, SAHooks *psHooks) int nBinSize = READ_MSB_INT(abyBinHeader + 4); nBinSize *= 2; /* 16-bit words */ +#ifdef DEBUG_SBN + fprintf(stderr, "bin id=%d, bin size (in features) = %d\n", nBinId, + nBinSize / 8); +#endif + if (nBinId != nExpectedBinId) { - hSBN->sHooks.Error("Unexpected bin id"); + char szMessage[128]; + snprintf(szMessage, sizeof(szMessage), + "Unexpected bin id at bin starting at offset %d. Got %d, " + "expected %d", + STATIC_CAST(int, hSBN->sHooks.FTell(hSBN->fpSBN)) - 8, + nBinId, nExpectedBinId); + hSBN->sHooks.Error(szMessage); SBNCloseDiskTree(hSBN); + free(pasNodeIdBinStartPairs); return SHPLIB_NULLPTR; } @@ -393,23 +512,24 @@ SBNSearchHandle SBNOpenDiskTree(const char *pszSBNFilename, SAHooks *psHooks) /* If there are more, then they are located in continuous bins */ if ((nBinSize % 8) != 0 || nBinSize <= 0 || nBinSize > 100 * 8) { - hSBN->sHooks.Error("Unexpected bin size"); + char szMessage[128]; + snprintf(szMessage, sizeof(szMessage), + "Unexpected bin size at bin starting at offset %d. Got %d", + STATIC_CAST(int, hSBN->sHooks.FTell(hSBN->fpSBN)) - 8, + nBinSize); + hSBN->sHooks.Error(szMessage); SBNCloseDiskTree(hSBN); + free(pasNodeIdBinStartPairs); return SHPLIB_NULLPTR; } - if (nNextNonEmptyNode < nMaxNodes && - nBinId == pasNodeDescriptor[nNextNonEmptyNode].nBinStart) + if (nIdxInNodeBinPair + 1 < nEntriesInNodeIdBinStartPairs && + nBinId == pasNodeIdBinStartPairs[nIdxInNodeBinPair + 1].nBinStart) { - nCurNode = nNextNonEmptyNode; + ++nIdxInNodeBinPair; + nCurNode = pasNodeIdBinStartPairs[nIdxInNodeBinPair].nNodeId; pasNodeDescriptor[nCurNode].nBinOffset = STATIC_CAST(int, hSBN->sHooks.FTell(hSBN->fpSBN)) - 8; - - /* Compute the index of the next non empty node. */ - nNextNonEmptyNode = nCurNode + 1; - while (nNextNonEmptyNode < nMaxNodes && - pasNodeDescriptor[nNextNonEmptyNode].nBinStart <= 0) - nNextNonEmptyNode++; } pasNodeDescriptor[nCurNode].nBinCount++; @@ -418,6 +538,17 @@ SBNSearchHandle SBNOpenDiskTree(const char *pszSBNFilename, SAHooks *psHooks) hSBN->sHooks.FSeek(hSBN->fpSBN, nBinSize, SEEK_CUR); } + if (nIdxInNodeBinPair + 1 != nEntriesInNodeIdBinStartPairs) + { + hSBN->sHooks.Error("Could not determine nBinOffset / nBinCount for all " + "non-empty nodes."); + SBNCloseDiskTree(hSBN); + free(pasNodeIdBinStartPairs); + return SHPLIB_NULLPTR; + } + + free(pasNodeIdBinStartPairs); + return hSBN; } @@ -618,7 +749,13 @@ static bool SBNSearchDiskInternal(SearchStruct *psSearch, int nDepth, { free(psNode->pabyShapeDesc); psNode->pabyShapeDesc = SHPLIB_NULLPTR; - hSBN->sHooks.Error("Inconsistent shape count for bin"); + char szMessage[128]; + snprintf( + szMessage, sizeof(szMessage), + "Inconsistent shape count for bin idx=%d of node %d. " + "nShapeCountAcc=(%d) + nShapes=(%d) > nShapeCount(=%d)", + i, nNodeId, nShapeCountAcc, nShapes, psNode->nShapeCount); + hSBN->sHooks.Error(szMessage); return false; } @@ -663,18 +800,14 @@ static bool SBNSearchDiskInternal(SearchStruct *psSearch, int nDepth, if (!psNode->bBBoxInit) { -#ifdef sanity_checks - /* -------------------------------------------------------------------- - */ - /* Those tests only check that the shape bounding box - * in the bin */ - /* are consistent (self-consistent and consistent with - * the node */ - /* they are attached to). They are optional however (as - * far as */ - /* the safety of runtime is concerned at least). */ - /* -------------------------------------------------------------------- - */ + /* clang-format off */ +#ifdef ENABLE_SBN_SANITY_CHECKS + /* -------------------------------------------------------------------- */ + /* Those tests only check that the shape bounding box in the bin */ + /* are consistent (self-consistent and consistent with the node */ + /* they are attached to). They are optional however (as far as */ + /* the safety of runtime is concerned at least). */ + /* -------------------------------------------------------------------- */ if (!(((bMinX < bMaxX || (bMinX == 0 && bMaxX == 0) || (bMinX == 255 && bMaxX == 255))) && @@ -725,7 +858,12 @@ static bool SBNSearchDiskInternal(SearchStruct *psSearch, int nDepth, { free(psNode->pabyShapeDesc); psNode->pabyShapeDesc = SHPLIB_NULLPTR; - hSBN->sHooks.Error("Inconsistent shape count for bin"); + char szMessage[96]; + snprintf( + szMessage, sizeof(szMessage), + "Inconsistent shape count for node %d. Got %d, expected %d", + nNodeId, nShapeCountAcc, psNode->nShapeCount); + hSBN->sHooks.Error(szMessage); return false; } @@ -787,8 +925,7 @@ static bool SBNSearchDiskInternal(SearchStruct *psSearch, int nDepth, /* helper for qsort */ static int compare_ints(const void *a, const void *b) { - return *REINTERPRET_CAST(const int *, a) - - *REINTERPRET_CAST(const int *, b); + return *STATIC_CAST(const int *, a) - *STATIC_CAST(const int *, b); } /************************************************************************/