|
| frequent_items_sketch (uint8_t lg_max_map_size, uint8_t lg_start_map_size=LG_MIN_MAP_SIZE, const E &equal=E(), const A &allocator=A()) |
|
void | update (const T &item, W weight=1) |
|
void | update (T &&item, W weight=1) |
|
void | merge (const frequent_items_sketch &other) |
|
void | merge (frequent_items_sketch &&other) |
|
bool | is_empty () const |
|
uint32_t | get_num_active_items () const |
|
W | get_total_weight () const |
|
W | get_estimate (const T &item) const |
|
W | get_lower_bound (const T &item) const |
|
W | get_upper_bound (const T &item) const |
|
W | get_maximum_error () const |
|
double | get_epsilon () const |
|
vector_row | get_frequent_items (frequent_items_error_type err_type) const |
|
vector_row | get_frequent_items (frequent_items_error_type err_type, W threshold) const |
|
template<typename SerDe = serde<T>> |
size_t | get_serialized_size_bytes (const SerDe &sd=SerDe()) const |
|
template<typename SerDe = serde<T>> |
void | serialize (std::ostream &os, const SerDe &sd=SerDe()) const |
|
template<typename SerDe = serde<T>> |
vector_bytes | serialize (unsigned header_size_bytes=0, const SerDe &sd=SerDe()) const |
|
string< A > | to_string (bool print_items=false) const |
|
template<typename SerDe > |
auto | serialize (unsigned header_size_bytes, const SerDe &sd) const -> vector_bytes |
|
template<typename SerDe > |
frequent_items_sketch< T, W, H, E, A > | deserialize (std::istream &is, const SerDe &sd, const E &equal, const A &allocator) |
|
template<typename SerDe > |
frequent_items_sketch< T, W, H, E, A > | deserialize (const void *bytes, size_t size, const SerDe &sd, const E &equal, const A &allocator) |
|
|
static double | get_epsilon (uint8_t lg_max_map_size) |
|
static double | get_apriori_error (uint8_t lg_max_map_size, W estimated_total_weight) |
|
template<typename SerDe = serde<T>> |
static frequent_items_sketch | deserialize (std::istream &is, const SerDe &sd=SerDe(), const E &equal=E(), const A &allocator=A()) |
|
template<typename SerDe = serde<T>> |
static frequent_items_sketch | deserialize (const void *bytes, size_t size, const SerDe &sd=SerDe(), const E &equal=E(), const A &allocator=A()) |
|
template<typename T, typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename A = std::allocator<T>>
class datasketches::frequent_items_sketch< T, W, H, E, A >
Frequent Items sketch.
Based on Java implementation here: https://github.com/apache/datasketches-java/blob/master/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java
- Author
- Alexander Saydakov
template<typename T , typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename A = std::allocator<T>>
template<typename SerDe = serde<T>>
This method deserializes a sketch from a given array of bytes.
- Parameters
-
bytes | pointer to the array of bytes |
size | the size of the array |
sd | instance of a SerDe |
equal | instance of Equality operator |
allocator | instance of an Allocator |
- Returns
- an instance of the sketch
template<typename T , typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename A = std::allocator<T>>
template<typename SerDe = serde<T>>
This method deserializes a sketch from a given stream.
- Parameters
-
is | input stream |
sd | instance of a SerDe |
equal | instance of Equality operator |
allocator | instance of an Allocator |
- Returns
- an instance of the sketch
template<typename T , typename W , typename H , typename E , typename A >
Returns an array of rows that include frequent items, estimates, upper and lower bounds given an error_type and using get_maximum_error() as a threshold.
The method first examines all active items in the sketch (items that have a counter).
If error_type = NO_FALSE_NEGATIVES, this will include an item in the result list if get_upper_bound(item) > threshold. There will be no false negatives, i.e., no Type II error. There may be items in the set with true frequencies less than the threshold (false positives).
If error_type = NO_FALSE_POSITIVES, this will include an item in the result list if get_lower_bound(item) > threshold. There will be no false positives, i.e., no Type I error. There may be items omitted from the set with true frequencies greater than the threshold (false negatives).
- Parameters
-
err_type | determines whether no false positives or no false negatives are desired. |
- Returns
- an array of frequent items
template<typename T , typename W , typename H , typename E , typename A >
Returns an array of rows that include frequent items, estimates, upper and lower bounds given an error_type and a threshold.
The method first examines all active items in the sketch (items that have a counter).
If error_type = NO_FALSE_NEGATIVES, this will include an item in the result list if get_upper_bound(item) > threshold. There will be no false negatives, i.e., no Type II error. There may be items in the set with true frequencies less than the threshold (false positives).
If error_type = NO_FALSE_POSITIVES, this will include an item in the result list if get_lower_bound(item) > threshold. There will be no false positives, i.e., no Type I error. There may be items omitted from the set with true frequencies greater than the threshold (false negatives).
- Parameters
-
err_type | determines whether no false positives or no false negatives are desired. |
threshold | to include items in the result list |
- Returns
- an array of frequent items
template<typename T , typename W , typename H , typename E , typename A >
template<typename SerDe >
Computes size needed to serialize the current state of the sketch. This can be expensive since every item needs to be looked at.
- Parameters
-
- Returns
- size in bytes needed to serialize this sketch
template<typename T , typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename A = std::allocator<T>>
template<typename SerDe = serde<T>>
This method serializes the sketch as a vector of bytes. An optional header can be reserved in front of the sketch. It is a blank space of a given size. This header is used in Datasketches PostgreSQL extension.
- Parameters
-
header_size_bytes | space to reserve in front of the sketch |
sd | instance of a SerDe |
- Returns
- serialized sketch as a vector of bytes