From a211236329e9fd97cf8c7172a357e55fe7e14292 Mon Sep 17 00:00:00 2001
From: Christoph Viebig <christoph.viebig@clickhouse.com>
Date: Thu, 2 Jul 2026 17:43:51 +0200
Subject: [PATCH] Avoid allocation in count_min_sketch::get_hashes

---
 count/include/count_min.hpp      |  9 +++++----
 count/include/count_min_impl.hpp | 26 +++++++++++---------------
 2 files changed, 16 insertions(+), 19 deletions(-)
diff --git a/count/include/count_min.hpp b/count/include/count_min.hpp
index 1fbba0e6..5d45ee20 100644
--- a/count/include/count_min.hpp
+++ b/count/include/count_min.hpp
@@ -389,12 +389,13 @@ class count_min_sketch{
   static void check_header_validity(uint8_t preamble_longs, uint8_t serial_version, uint8_t family_id, uint8_t flags_byte);
 
   /*
-   * Obtain the hash values when inserting an item into the sketch.
-   * @param item pointer to the data item to be inserted into the sketch.
+   * Compute the hash locations for an input item
+   * @param item pointer to the data item to be inserted into or queried from the sketch.
    * @param size of the data in bytes
-   * @return vector of uint64_t which each represent the index to which `value' must update in the sketch
+   * @param callback function to invoke for each sketch array location
    */
-  std::vector<uint64_t> get_hashes(const void* item, size_t size) const;
+  template<typename F>
+  void foreach_hash_location(const void* item, size_t size, F callback) const;
 
 };
 
diff --git a/count/include/count_min_impl.hpp b/count/include/count_min_impl.hpp
index 2f2629fc..528fb619 100644
--- a/count/include/count_min_impl.hpp
+++ b/count/include/count_min_impl.hpp
@@ -110,9 +110,10 @@ uint8_t count_min_sketch<W,A>::suggest_num_hashes(double confidence) {
 }
 
 template<typename W, typename A>
-std::vector<uint64_t> count_min_sketch<W,A>::get_hashes(const void* item, size_t size) const {
+template<typename F>
+void count_min_sketch<W,A>::foreach_hash_location(const void* item, size_t size, F callback) const {
   /*
-   * Returns the hash locations for the input item using the original hashing
+   * Computes the hash locations for the input item using the original hashing
    * scheme from [1].
    * Generate _num_hashes separate hashes from calls to murmurmhash.
    * This could be optimized by keeping both of the 64bit parts of the hash
@@ -126,8 +127,6 @@ std::vector<uint64_t> count_min_sketch<W,A>::get_hashes(const void* item, size_t
    * https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
    */
   uint64_t bucket_index;
-  std::vector<uint64_t> sketch_update_locations;
-  sketch_update_locations.reserve(_num_hashes);
 
   uint64_t hash_seed_index = 0;
   for (const auto &it: hash_seeds) {
@@ -135,10 +134,9 @@ std::vector<uint64_t> count_min_sketch<W,A>::get_hashes(const void* item, size_t
     MurmurHash3_x64_128(item, size, it, hashes); // ? BEWARE OVERFLOW.
     uint64_t hash = hashes.h1;
     bucket_index = hash % _num_buckets;
-    sketch_update_locations.push_back((hash_seed_index * _num_buckets) + bucket_index);
+    callback((hash_seed_index * _num_buckets) + bucket_index);
     hash_seed_index += 1;
   }
-  return sketch_update_locations;
 }
 
 template<typename W, typename A>
@@ -158,12 +156,11 @@ W count_min_sketch<W,A>::get_estimate(const void* item, size_t size) const {
   /*
    * Returns the estimated frequency of the item
    */
-  std::vector<uint64_t> hash_locations = get_hashes(item, size);
-  std::vector<W> estimates;
-  for (const auto h: hash_locations) {
-    estimates.push_back(_sketch_array[h]);
-  }
-  return *std::min_element(estimates.begin(), estimates.end());
+  W estimate = std::numeric_limits<W>::max();
+  foreach_hash_location(item, size, [this, &estimate](uint64_t h) {
+    estimate = std::min(estimate, _sketch_array[h]);
+  });
+  return estimate;
 }
 
 template<typename W, typename A>
@@ -189,10 +186,9 @@ void count_min_sketch<W,A>::update(const void* item, size_t size, W weight) {
    * locations by the weight.
    */
   _total_weight += weight >= 0 ? weight : -weight;
-  std::vector<uint64_t> hash_locations = get_hashes(item, size);
-  for (const auto h: hash_locations) {
+  foreach_hash_location(item, size, [this, weight](uint64_t h) {
     _sketch_array[h] += weight;
-  }
+  });
 }
 
 template<typename W, typename A>