GHashTable如何使用哈希值来存储其节点?

发布于 2024-12-26 20:11:11 字数 2152 浏览 3 评论 0原文

g_hash_table_new() 的文档 表示

哈希值用于确定键在 GHashTable 数据结构中的存储位置。

但是哈希值是如何使用的呢?

似乎 g_hash_table_foreach()< /code>0N- 1 遍历表格N 个节点。我使用此函数打印出每个节点的哈希、键和值:

test.c

#include <glib.h>

static GHashTable* _htab = NULL;

static int VALS[] = { 1, 2, 3, 4, 5 };

struct kv {
  gpointer key;
  gpointer val;
};

static struct kv KEYVALS[] = {
  { "aaa",    &VALS[0] },
  { "a",      &VALS[1] },
  { "b",      &VALS[2] },
  { "bbbbbb", &VALS[3] },
  { "aaaaa",  &VALS[4] }
};

static void iter(gpointer key, gpointer val, gpointer data) {
  g_printf("key=%s(%p) val=%d(%p) hash=%u\n",
      (char*)key, key,
      *(int*)val, val,
      g_str_hash((char*)key)
  );
}

int main(int argc, char* argv[]) {
  int i;

  if (!_htab) {
    _htab = g_hash_table_new(g_str_hash, g_str_equal);
    g_assert(_htab);
  }

  for (i = 0; i < sizeof(KEYVALS) / sizeof(KEYVALS[0]); i++) {
    g_hash_table_insert(_htab, KEYVALS[i].key, KEYVALS[i].val);
  }
  g_hash_table_foreach(_htab, iter, NULL);
  g_hash_table_remove_all(_htab);
  g_hash_table_destroy(_htab);
  return 0;
}


即使多次运行后,输出也始终相同(指针值除外),因此这里使用了某种算法。 节点的哈希值如何(例如,来自 g_str_hash()) 确定它在 GHashTable 中的存储位置?

$ gcc `pkg-config --cflags glib-2.0` `pkg-config --libs glib-2.0` test.c
$ ./a.out
key=bbbbbb(0x10f142ee0) val=4(0x10f1430ac) hash=4087176913
key=a(0x10f142edc) val=2(0x10f1430a4) hash=177670
key=b(0x10f142ede) val=3(0x10f1430a8) hash=177671
key=aaaaa(0x10f142ee7) val=5(0x10f1430b0) hash=252781386
key=aaa(0x10f142ed8) val=1(0x10f1430a0) hash=193485928
$

The documentation for g_hash_table_new() indicates

Hash values are used to determine where keys are stored within the GHashTable data structure.

but how are the hash values used?

It seems g_hash_table_foreach() traverses the table from 0 to N- 1 for N nodes. I used this function to print out the hash, key, and value for each node:

test.c

#include <glib.h>

static GHashTable* _htab = NULL;

static int VALS[] = { 1, 2, 3, 4, 5 };

struct kv {
  gpointer key;
  gpointer val;
};

static struct kv KEYVALS[] = {
  { "aaa",    &VALS[0] },
  { "a",      &VALS[1] },
  { "b",      &VALS[2] },
  { "bbbbbb", &VALS[3] },
  { "aaaaa",  &VALS[4] }
};

static void iter(gpointer key, gpointer val, gpointer data) {
  g_printf("key=%s(%p) val=%d(%p) hash=%u\n",
      (char*)key, key,
      *(int*)val, val,
      g_str_hash((char*)key)
  );
}

int main(int argc, char* argv[]) {
  int i;

  if (!_htab) {
    _htab = g_hash_table_new(g_str_hash, g_str_equal);
    g_assert(_htab);
  }

  for (i = 0; i < sizeof(KEYVALS) / sizeof(KEYVALS[0]); i++) {
    g_hash_table_insert(_htab, KEYVALS[i].key, KEYVALS[i].val);
  }
  g_hash_table_foreach(_htab, iter, NULL);
  g_hash_table_remove_all(_htab);
  g_hash_table_destroy(_htab);
  return 0;
}

The output is always the same (other than the pointer values) even after multiple runs, so there's some kind of algorithm used here. How does a node's hash (e.g., from g_str_hash()) determine where it's stored in the GHashTable?

$ gcc `pkg-config --cflags glib-2.0` `pkg-config --libs glib-2.0` test.c
$ ./a.out
key=bbbbbb(0x10f142ee0) val=4(0x10f1430ac) hash=4087176913
key=a(0x10f142edc) val=2(0x10f1430a4) hash=177670
key=b(0x10f142ede) val=3(0x10f1430a8) hash=177671
key=aaaaa(0x10f142ee7) val=5(0x10f1430b0) hash=252781386
key=aaa(0x10f142ed8) val=1(0x10f1430a0) hash=193485928
$

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

江南月 2025-01-02 20:11:11

让我们看看源代码可能告诉我们什么。

static inline guint
g_hash_table_lookup_node (GHashTable    *hash_table,
                          gconstpointer  key,
                          guint         *hash_return)
{
  /* ... */
  hash_value = hash_table->hash_func (key);
  /* ... */
  node_index = hash_value % hash_table->mod;
  /* ... */

哈希值以某个值为模。让我们看看它可能是什么。

static void
g_hash_table_set_shift (GHashTable *hash_table, gint shift)
{
  /* ... */
  hash_table->size = 1 << shift;
  hash_table->mod  = prime_mod [shift];
  /* ... */

啊哈,来自预定义素数的static const 数组的素数。但它设置在哪里呢?

#define HASH_TABLE_MIN_SHIFT 3  /* 1 << 3 == 8 buckets */

/* ... */

GHashTable *
g_hash_table_new_full (GHashFunc      hash_func,
                       GEqualFunc     key_equal_func,
                       GDestroyNotify key_destroy_func,
                       GDestroyNotify value_destroy_func)
{
  /* ... */
  hash_table = g_slice_new (GHashTable);
  g_hash_table_set_shift (hash_table, HASH_TABLE_MIN_SHIFT);

因此

static void
g_hash_table_resize (GHashTable *hash_table)
{
  /* ... */
  g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 2);

,在创建 GHashTable 时,会为其分配某个素数,并用于将哈希值转换为索引。之后,当哈希表增长时,再次调用 g_hash_table_set_shift 来更新素值,并导致给定哈希值返回不同的索引。

Let's see what the source code might tell us.

static inline guint
g_hash_table_lookup_node (GHashTable    *hash_table,
                          gconstpointer  key,
                          guint         *hash_return)
{
  /* ... */
  hash_value = hash_table->hash_func (key);
  /* ... */
  node_index = hash_value % hash_table->mod;
  /* ... */

The hash value is taken modulo some value. Let's see what it might be.

static void
g_hash_table_set_shift (GHashTable *hash_table, gint shift)
{
  /* ... */
  hash_table->size = 1 << shift;
  hash_table->mod  = prime_mod [shift];
  /* ... */

Aha, a prime number from a static const array of predefined primes. But where is it set?

#define HASH_TABLE_MIN_SHIFT 3  /* 1 << 3 == 8 buckets */

/* ... */

GHashTable *
g_hash_table_new_full (GHashFunc      hash_func,
                       GEqualFunc     key_equal_func,
                       GDestroyNotify key_destroy_func,
                       GDestroyNotify value_destroy_func)
{
  /* ... */
  hash_table = g_slice_new (GHashTable);
  g_hash_table_set_shift (hash_table, HASH_TABLE_MIN_SHIFT);

and

static void
g_hash_table_resize (GHashTable *hash_table)
{
  /* ... */
  g_hash_table_set_shift_from_size (hash_table, hash_table->nnodes * 2);

So on GHashTable's creation a certain prime number is assigned to it and it's used to translate a hash value to an index. Afterwards, when the hash table grows, g_hash_table_set_shift is called once again to update the prime value and causes different indices to be returned for given hash values.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文