Template class for thread local storage.
enum ets_key_usage_type {
ets__key_per_instance,
ets_no_key
};
template <typename T,
typename Allocator=cache_aligned_allocator<T>,
ets_key_usage_type ETS_key_type=ets_no_key>
class enumerable_thread_specific;
#include "tbb/enumerable_thread_specific.h"
An enumerable_thread_specific provides thread local storage (TLS) for elements of type T. An enumerable_thread_specific acts as a container by providing iterators and ranges across all of the thread-local elements.
The thread-local elements are created lazily. A freshly constructed enumerable_thread_specific has no elements. When a thread requests access to an enumerable_thread_specific, it creates an element corresponding to that thread. The number of elements is equal to the number of distinct threads that have accessed the enumerable_thread_specific and not the number of threads in use by the application. Clearing an enumerable_thread_specific removes all of its elements.
The ETS_key_usage_type parameter can be used to select between an implementation that consumes no native TLS keys and a specialization that offers higher performance but consumes 1 native TLS key per enumerable_thread_specific instance. If no ETS_key_usage_type parameter is provided, ets_no_key is used by default.
Caution:The number of native TLS keys is limited and can be fairly small, for example 64 or 128. Therefore it is recommended to restrict the use of the ets_key_per_instance specialization to only the most performance critical cases.
The following code shows a simple example usage of enumerable_thread_specific. The number of calls to null_parallel_for_body::operator() and total number of iterations executed are counted by each thread that participates in the parallel_for, and these counts are printed at the end of main.
#include <cstdio>
#include <utility>
#include "tbb/task_scheduler_init.h"
#include "tbb/enumerable_thread_specific.h"
#include "tbb/parallel_for.h"
#include "tbb/blocked_range.h"
using namespace tbb;
typedef enumerable_thread_specific< std::pair<int,int> > CounterType;
CounterType MyCounters (std::make_pair(0,0));
struct Body {
void operator()(const tbb::blocked_range<int> &r) const {
CounterType::reference my_counter = MyCounters.local();
++my_counter.first;
for (int i = r.begin(); i != r.end(); ++i)
++my_counter.second;
}
};
int main() {
parallel_for( blocked_range<int>(0, 100000000), Body());
for (CounterType::const_iterator i = MyCounters.begin();
i != MyCounters.end(); ++i)
{
printf("Thread stats:\n");
printf(" calls to operator(): %d", i->first);
printf(" total # of iterations executed: %d\n\n",
i->second);
}
}
Class enumerable_thread_specific has a method combine(f) that does a reduction using binary functor f, which can be written using a lambda expression. For example, the previous example can be extended to sum the thread-local values by adding the following lines to the end of function main:
std::pair<int,int> sum =
MyCounters.combine([](std::pair<int,int> x,
std::pair<int,int> y) {
return std::make_pair(x.first+y.first,
x.second+y.second);
});
printf("Total calls to operator() = %d, "
"total iterations = %d\n", sum.first, sum.second);
namespace tbb {
template <typename T,
typename Allocator=cache_aligned_allocator<T>,
ets_key_usage_type ETS_key_type=ets_no_key >
class enumerable_thread_specific {
public:
// Basic types
typedef Allocator allocator_type;
typedef T value_type;
typedef T& reference;
typedef const T& const_reference;
typedef T* pointer;
typedef implementation-dependent size_type;
typedef implementation-dependent difference_type;
// Iterator types
typedef implementation-dependent iterator;
typedef implementation-dependent const_iterator;
// Parallel range types
typedef implementation-dependent range_type;
typedef implementation-dependent const_range_type;
// Whole container operations
enumerable_thread_specific();
enumerable_thread_specific(
const enumerable_thread_specific &other
);
template<typename U, typename Alloc,
ets_key_usage_type Cachetype>
enumerable_thread_specific(
const enumerable_thread_specific<U, Alloc,
Cachetype>& other );
template <typename Finit>
enumerable_thread_specific( Finit finit );
enumerable_thread_specific(const T &exemplar);
~enumerable_thread_specific();
enumerable_thread_specific&
operator=(const enumerable_thread_specific& other);
template<typename U, typename Alloc,
ets_key_usage_type Cachetype>
enumerable_thread_specific&
operator=(
const enumerable_thread_specific<U, Alloc, Cachetype>&
other
);
void clear();
// Concurrent operations
reference local();
reference local(bool& existis);
size_type size() const;
bool empty() const;
// Combining
template<typename FCombine> T combine(FCombine fcombine);
template<typename Func> void combine_each(Func f);
// Parallel iteration
range_type range( size_t grainsize=1 );
const_range_type range( size_t grainsize=1 ) const;
// Iterators
iterator begin();
iterator end();
const_iterator begin() const;
const_iterator end() const;
};
}