Spaces:
Runtime error
Runtime error
// This example demonstrates how to control how Thrust allocates temporary | |
// storage during algorithms such as thrust::sort. The idea will be to create a | |
// simple cache of allocations to search when temporary storage is requested. | |
// If a hit is found in the cache, we quickly return the cached allocation | |
// instead of resorting to the more expensive thrust::cuda::malloc. | |
// Note: Thrust now has its own caching allocator layer; if you just need a | |
// caching allocator, you ought to use that. This example is still useful | |
// as a demonstration of how to use a Thrust custom allocator. | |
// Note: this implementation cached_allocator is not thread-safe. If multiple | |
// (host) threads use the same cached_allocator then they should gain exclusive | |
// access to the allocator before accessing its methods. | |
struct not_my_pointer | |
{ | |
not_my_pointer(void* p) | |
: message() | |
{ | |
std::stringstream s; | |
s << "Pointer `" << p << "` was not allocated by this allocator."; | |
message = s.str(); | |
} | |
virtual ~not_my_pointer() {} | |
virtual const char* what() const | |
{ | |
return message.c_str(); | |
} | |
private: | |
std::string message; | |
}; | |
// A simple allocator for caching cudaMalloc allocations. | |
struct cached_allocator | |
{ | |
typedef char value_type; | |
cached_allocator() {} | |
~cached_allocator() | |
{ | |
free_all(); | |
} | |
char *allocate(std::ptrdiff_t num_bytes) | |
{ | |
std::cout << "cached_allocator::allocate(): num_bytes == " | |
<< num_bytes | |
<< std::endl; | |
char *result = 0; | |
// Search the cache for a free block. | |
free_blocks_type::iterator free_block = free_blocks.find(num_bytes); | |
if (free_block != free_blocks.end()) | |
{ | |
std::cout << "cached_allocator::allocate(): found a free block" | |
<< std::endl; | |
result = free_block->second; | |
// Erase from the `free_blocks` map. | |
free_blocks.erase(free_block); | |
} | |
else | |
{ | |
// No allocation of the right size exists, so create a new one with | |
// `thrust::cuda::malloc`. | |
try | |
{ | |
std::cout << "cached_allocator::allocate(): allocating new block" | |
<< std::endl; | |
// Allocate memory and convert the resulting `thrust::cuda::pointer` to | |
// a raw pointer. | |
result = thrust::cuda::malloc<char>(num_bytes).get(); | |
} | |
catch (std::runtime_error&) | |
{ | |
throw; | |
} | |
} | |
// Insert the allocated pointer into the `allocated_blocks` map. | |
allocated_blocks.insert(std::make_pair(result, num_bytes)); | |
return result; | |
} | |
void deallocate(char *ptr, size_t) | |
{ | |
std::cout << "cached_allocator::deallocate(): ptr == " | |
<< reinterpret_cast<void*>(ptr) << std::endl; | |
// Erase the allocated block from the allocated blocks map. | |
allocated_blocks_type::iterator iter = allocated_blocks.find(ptr); | |
if (iter == allocated_blocks.end()) | |
throw not_my_pointer(reinterpret_cast<void*>(ptr)); | |
std::ptrdiff_t num_bytes = iter->second; | |
allocated_blocks.erase(iter); | |
// Insert the block into the free blocks map. | |
free_blocks.insert(std::make_pair(num_bytes, ptr)); | |
} | |
private: | |
typedef std::multimap<std::ptrdiff_t, char*> free_blocks_type; | |
typedef std::map<char*, std::ptrdiff_t> allocated_blocks_type; | |
free_blocks_type free_blocks; | |
allocated_blocks_type allocated_blocks; | |
void free_all() | |
{ | |
std::cout << "cached_allocator::free_all()" << std::endl; | |
// Deallocate all outstanding blocks in both lists. | |
for ( free_blocks_type::iterator i = free_blocks.begin() | |
; i != free_blocks.end() | |
; ++i) | |
{ | |
// Transform the pointer to cuda::pointer before calling cuda::free. | |
thrust::cuda::free(thrust::cuda::pointer<char>(i->second)); | |
} | |
for( allocated_blocks_type::iterator i = allocated_blocks.begin() | |
; i != allocated_blocks.end() | |
; ++i) | |
{ | |
// Transform the pointer to cuda::pointer before calling cuda::free. | |
thrust::cuda::free(thrust::cuda::pointer<char>(i->first)); | |
} | |
} | |
}; | |
int main() | |
{ | |
std::size_t num_elements = 32768; | |
thrust::host_vector<int> h_input(num_elements); | |
// Generate random input. | |
thrust::generate(h_input.begin(), h_input.end(), rand); | |
thrust::cuda::vector<int> d_input = h_input; | |
thrust::cuda::vector<int> d_result(num_elements); | |
std::size_t num_trials = 5; | |
cached_allocator alloc; | |
for (std::size_t i = 0; i < num_trials; ++i) | |
{ | |
d_result = d_input; | |
// Pass alloc through cuda::par as the first parameter to sort | |
// to cause allocations to be handled by alloc during sort. | |
thrust::sort(thrust::cuda::par(alloc), d_result.begin(), d_result.end()); | |
// Ensure the result is sorted. | |
assert(thrust::is_sorted(d_result.begin(), d_result.end())); | |
} | |
return 0; | |
} | |