Skip to content

Commit

Permalink
GH-41460: [C++] Use ASAN to poison temp vector stack memory (#41695)
Browse files Browse the repository at this point in the history
### Rationale for this change

See #41460. And reduce the overhead of current manual poisoning (filling the entire stack space with `0xFF`s) that happens even in release mode.

### What changes are included in this PR?

Use ASAN API to replace the current manual poisoning of the temp vector stack memory.

### Are these changes tested?

Wanted to add cases to assert that ASAN poison/unpoison is functioning. However I found it too tricky to catch an ASAN error because ASAN directly uses signals that are hard to interoperate in C++/gtest. So I just manually checked poisoning is working in my local, e.g. by intentionally not unpoisoning the allocated buffer and seeing ASAN unhappy.

Just leveraging existing cases that use temp stack such as acero tests, which should cover this change well.

### Are there any user-facing changes?

None.

* GitHub Issue: #41460

Authored-by: Ruoxi Sun <zanmato1984@gmail.com>
Signed-off-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
  • Loading branch information
zanmato1984 committed May 18, 2024
1 parent 7aff9d5 commit dcdf4e6
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
27 changes: 24 additions & 3 deletions cpp/src/arrow/compute/util_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,29 @@
#include "arrow/compute/util.h"
#include "arrow/memory_pool.h"

#ifdef ADDRESS_SANITIZER
#include <sanitizer/asan_interface.h>
#endif

namespace arrow {
namespace util {

TempVectorStack::~TempVectorStack() {
#ifdef ADDRESS_SANITIZER
if (buffer_) {
ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data(), buffer_size_);
}
#endif
}

Status TempVectorStack::Init(MemoryPool* pool, int64_t size) {
num_vectors_ = 0;
top_ = 0;
buffer_size_ = EstimatedAllocationSize(size);
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
// Ensure later operations don't accidentally read uninitialized memory.
std::memset(buffer->mutable_data(), 0xFF, size);
#ifdef ADDRESS_SANITIZER
ASAN_POISON_MEMORY_REGION(buffer->mutable_data(), size);
#endif
buffer_ = std::move(buffer);
return Status::OK();
}
Expand All @@ -53,12 +66,17 @@ void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) {
ARROW_CHECK_LE(new_top, buffer_size_)
<< "TempVectorStack::alloc overflow: allocating " << estimated_alloc_size
<< " on top of " << top_ << " in stack of size " << buffer_size_;
*data = buffer_->mutable_data() + top_ + sizeof(uint64_t);
#ifdef ADDRESS_SANITIZER
ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data() + top_, estimated_alloc_size);
#endif
*data = buffer_->mutable_data() + top_ + /*one guard*/ sizeof(uint64_t);
#ifndef NDEBUG
// We set 8 bytes before the beginning of the allocated range and
// 8 bytes after the end to check for stack overflow (which would
// result in those known bytes being corrupted).
reinterpret_cast<uint64_t*>(buffer_->mutable_data() + top_)[0] = kGuard1;
reinterpret_cast<uint64_t*>(buffer_->mutable_data() + new_top)[-1] = kGuard2;
#endif
*id = num_vectors_++;
top_ = new_top;
}
Expand All @@ -72,6 +90,9 @@ void TempVectorStack::release(int id, uint32_t num_bytes) {
top_ -= size;
ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + top_)[0] ==
kGuard1);
#ifdef ADDRESS_SANITIZER
ASAN_POISON_MEMORY_REGION(buffer_->mutable_data() + top_, size);
#endif
--num_vectors_;
}

Expand Down
10 changes: 9 additions & 1 deletion cpp/src/arrow/compute/util_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"

namespace arrow {
namespace util {
Expand All @@ -38,13 +39,20 @@ class ARROW_EXPORT TempVectorStack {
friend class TempVectorHolder;

public:
TempVectorStack() = default;
~TempVectorStack();

ARROW_DISALLOW_COPY_AND_ASSIGN(TempVectorStack);

ARROW_DEFAULT_MOVE_AND_ASSIGN(TempVectorStack);

Status Init(MemoryPool* pool, int64_t size);

int64_t AllocatedSize() const { return top_; }

private:
static int64_t EstimatedAllocationSize(int64_t size) {
return PaddedAllocationSize(size) + 2 * sizeof(uint64_t);
return PaddedAllocationSize(size) + /*two guards*/ 2 * sizeof(uint64_t);
}

static int64_t PaddedAllocationSize(int64_t num_bytes);
Expand Down

0 comments on commit dcdf4e6

Please sign in to comment.