C++:哈希和碰撞技术的正确使用

发布于 2024-10-18 05:38:07 字数 4736 浏览 7 评论 0原文

哈希表的大小是否应该有大小限制?

我有点困惑,因为我明白为什么创建太小的哈希表会给我带来问题?似乎哈希表太大导致我的探测器抛出 Sig 错误?如果有人有哈希表的经验,这是我的代码。我当然很感激你提供的任何建议(除了开始编织之外,请):

#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <math.h>

using namespace std;

struct TABLE{
  int key;
  TABLE* next;
};
const int MAX_KEYS = 5000;
const int RANDOM = 30000;

int randNUMS(int *rand);
int hashTableSize();
int HASH(int key,int listSIZE);
void threeHashMethods(int *randARRAY,int tbSIZE);
int* openAddressing(int *randARRAY,int tbSIZE);
int seperateCHAINING();
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe);
int doubleHASH(int key,int tbSIZE);
void listSEARCH(int *randARRAY,int *HT,int tbSIZE);

int main(){
  int tbSIZE = 0;
  int randARRAY[MAX_KEYS];
  for(int a = 0; a <= MAX_KEYS; a++){
    randARRAY[a] = 0;
  }

  ///create random array of 5,000 unique int
  ///they will be of values between 1-30000
  randNUMS(randARRAY);
  ///get hash table size from user
  ///table must be larger than 6500 int
  tbSIZE = hashTableSize();
  ///driver function for all three
  ///collision resolution techniques
  threeHashMethods(randARRAY,tbSIZE);

  return 0;
}
int HASH(int key,int listSIZE){
  int address = 0;
  address = key % listSIZE;
  return address;
}
int doubleHASH(int key,int tbSIZE){
  int address = 0;
  address = (key % (tbSIZE - 2)) + 1;
  return address;
}
int hashTableSize(){
  int userCHOOSE = 0;

  cout << "Enter desired hash table size." << endl;
  cout << "NOTE: hash table size must exceed 6500: " << endl;
  cin >> userCHOOSE;
  if(userCHOOSE < 6500){
    cout << "Whoops " << userCHOOSE << " is to small!" << endl;
    hashTableSize();
  }
  return userCHOOSE;
}
int randNUMS(int *randARRAY){
  ///temporary fix for randARRAY array of numbers till hash is running
  int check = 0;
  int index = 0;
  int loop = 0;

  srand (time(NULL));
  for(index = 0; index < MAX_KEYS; index++){
    check = rand() % RANDOM + 1;
    while(randARRAY[loop] != 0){
      if(check == randARRAY[index]){
    check = rand() % RANDOM + 1;
    loop = 0;
      }
      loop++;
    }
    randARRAY[index] = check;
  }

  return *randARRAY;
}
void threeHashMethods(int *randARRAY,int tbSIZE){
  int *HT;


  ///this menu will allow user to select collision method
  HT = openAddressing(randARRAY,tbSIZE);
  listSEARCH(randARRAY,HT,tbSIZE);
}
int* openAddressing(int *randARRAY,int tbSIZE){
  int key = 0,
    address = 0,
    prb = 0,
    hashTABLE[tbSIZE * 2],
    *HT = hashTABLE;
  int percent = (5000.00 / tbSIZE) * 100;
  int load = (5000.00 / tbSIZE) * 10;
  int loadFACTOR = (tbSIZE * load)/10;

  if(percent > 0){

  for(int a = 0; a < tbSIZE; a++){

    hashTABLE[a] = 0;
  }

  while(randARRAY[key] != 0){
    ///get a purposed address
    ///and move through indexes
    ///in array of random int till
    ///empty index is found
    if(randARRAY[key] > tbSIZE){
    address = HASH(randARRAY[key],loadFACTOR);
    }
    ///if address is available 
    ///grab the key
    if(hashTABLE[address] == 0){
      hashTABLE[address] = randARRAY[key];
    }
    ///if a collision is the result run
    ///a linear probe until available address is found 
    else{
      address = linearPROBE(address,hashTABLE,0,tbSIZE,prb);
      hashTABLE[address] = randARRAY[key];
    }
    if(hashTABLE[address] == randARRAY[key]){
    key++;
    }
  }
  cout << key << " items loaded into a " << tbSIZE << " element hash table." << endl;
  cout << "Load Factor = " << percent << "%" << endl;
  cout << "Results from searching for 2500 items." << endl;
  }
  else{
    cout << "Load Factor is maxed out." << endl;
  }

  return HT;
}
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe){
  while(HASH[address] != probeTHIS){
    address = (address + 1);
    probe++;
    if(address >= load){
      address = 0;
    }
  }
  return address;
}
void listSEARCH(int *randARRAY,int *HT,int tbSIZE){
  int key = 0,
    address = 0,
    probe = 0,
    found = 0,
    attempts = 0;

  while(randARRAY[key] != 0){
    address =  HASH(randARRAY[key],tbSIZE);
    while(HT[address] != randARRAY[key] && attempts < tbSIZE){
      address = linearPROBE(address,HT,randARRAY[key],tbSIZE,probe);
      found++;
      attempts++;
    }
    key = key + 2;
    attempts = 0;
  }
  found = probe / found;

  cout << "Linear Probing." << endl;
  cout << probe  << " items examined ";
  cout << "(avg = " << found << " items examined per search.)" << endl;
}

Should there be a size limitation on how LARGE a Hash Table may be?

I'm a little baffled as I can see why creating too small of a Hash Table should be causing me issues? It appears that too large of a Hash Table is causing my probe to throw a Sig error? Here is my code if anyone has experience with Hash Tables. I certainly appreciate any advice you have to offer (beyond taking up knitting instead, please):

#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <math.h>

using namespace std;

struct TABLE{
  int key;
  TABLE* next;
};
const int MAX_KEYS = 5000;
const int RANDOM = 30000;

int randNUMS(int *rand);
int hashTableSize();
int HASH(int key,int listSIZE);
void threeHashMethods(int *randARRAY,int tbSIZE);
int* openAddressing(int *randARRAY,int tbSIZE);
int seperateCHAINING();
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe);
int doubleHASH(int key,int tbSIZE);
void listSEARCH(int *randARRAY,int *HT,int tbSIZE);

int main(){
  int tbSIZE = 0;
  int randARRAY[MAX_KEYS];
  for(int a = 0; a <= MAX_KEYS; a++){
    randARRAY[a] = 0;
  }

  ///create random array of 5,000 unique int
  ///they will be of values between 1-30000
  randNUMS(randARRAY);
  ///get hash table size from user
  ///table must be larger than 6500 int
  tbSIZE = hashTableSize();
  ///driver function for all three
  ///collision resolution techniques
  threeHashMethods(randARRAY,tbSIZE);

  return 0;
}
int HASH(int key,int listSIZE){
  int address = 0;
  address = key % listSIZE;
  return address;
}
int doubleHASH(int key,int tbSIZE){
  int address = 0;
  address = (key % (tbSIZE - 2)) + 1;
  return address;
}
int hashTableSize(){
  int userCHOOSE = 0;

  cout << "Enter desired hash table size." << endl;
  cout << "NOTE: hash table size must exceed 6500: " << endl;
  cin >> userCHOOSE;
  if(userCHOOSE < 6500){
    cout << "Whoops " << userCHOOSE << " is to small!" << endl;
    hashTableSize();
  }
  return userCHOOSE;
}
int randNUMS(int *randARRAY){
  ///temporary fix for randARRAY array of numbers till hash is running
  int check = 0;
  int index = 0;
  int loop = 0;

  srand (time(NULL));
  for(index = 0; index < MAX_KEYS; index++){
    check = rand() % RANDOM + 1;
    while(randARRAY[loop] != 0){
      if(check == randARRAY[index]){
    check = rand() % RANDOM + 1;
    loop = 0;
      }
      loop++;
    }
    randARRAY[index] = check;
  }

  return *randARRAY;
}
void threeHashMethods(int *randARRAY,int tbSIZE){
  int *HT;


  ///this menu will allow user to select collision method
  HT = openAddressing(randARRAY,tbSIZE);
  listSEARCH(randARRAY,HT,tbSIZE);
}
int* openAddressing(int *randARRAY,int tbSIZE){
  int key = 0,
    address = 0,
    prb = 0,
    hashTABLE[tbSIZE * 2],
    *HT = hashTABLE;
  int percent = (5000.00 / tbSIZE) * 100;
  int load = (5000.00 / tbSIZE) * 10;
  int loadFACTOR = (tbSIZE * load)/10;

  if(percent > 0){

  for(int a = 0; a < tbSIZE; a++){

    hashTABLE[a] = 0;
  }

  while(randARRAY[key] != 0){
    ///get a purposed address
    ///and move through indexes
    ///in array of random int till
    ///empty index is found
    if(randARRAY[key] > tbSIZE){
    address = HASH(randARRAY[key],loadFACTOR);
    }
    ///if address is available 
    ///grab the key
    if(hashTABLE[address] == 0){
      hashTABLE[address] = randARRAY[key];
    }
    ///if a collision is the result run
    ///a linear probe until available address is found 
    else{
      address = linearPROBE(address,hashTABLE,0,tbSIZE,prb);
      hashTABLE[address] = randARRAY[key];
    }
    if(hashTABLE[address] == randARRAY[key]){
    key++;
    }
  }
  cout << key << " items loaded into a " << tbSIZE << " element hash table." << endl;
  cout << "Load Factor = " << percent << "%" << endl;
  cout << "Results from searching for 2500 items." << endl;
  }
  else{
    cout << "Load Factor is maxed out." << endl;
  }

  return HT;
}
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe){
  while(HASH[address] != probeTHIS){
    address = (address + 1);
    probe++;
    if(address >= load){
      address = 0;
    }
  }
  return address;
}
void listSEARCH(int *randARRAY,int *HT,int tbSIZE){
  int key = 0,
    address = 0,
    probe = 0,
    found = 0,
    attempts = 0;

  while(randARRAY[key] != 0){
    address =  HASH(randARRAY[key],tbSIZE);
    while(HT[address] != randARRAY[key] && attempts < tbSIZE){
      address = linearPROBE(address,HT,randARRAY[key],tbSIZE,probe);
      found++;
      attempts++;
    }
    key = key + 2;
    attempts = 0;
  }
  found = probe / found;

  cout << "Linear Probing." << endl;
  cout << probe  << " items examined ";
  cout << "(avg = " << found << " items examined per search.)" << endl;
}

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

维持三分热 2024-10-25 05:38:07

差一分。这将填充 randARRAY 的前 MAX_KEYS+1 个元素。

  for(int a = 0; a <= MAX_KEYS; a++){
    randARRAY[a] = 0;
  }

这会再次询问,然后使用旧的 userCHOOSE 值。您想要 return hashTableSize();

  if(userCHOOSE < 6500){
    cout << "Whoops " << userCHOOSE << " is to small!" << endl;
    hashTableSize();
  }

那么真正的问题是:在 openAddressing 中,您在 randARRAY[key] != 0 时进行扫描。您的 randARRAY 不是 0 终止的(在 main 中设置 tbSIZE 将覆盖您之前相差一的 randARRAY[5000]),因此您将扫描超过 5000 个键。然后在 listSearch 中,您将访问 randARRAY[key] 来获取大于 5000 的键,这意味着您正在读取“垃圾”数据,例如负数。你对它进行散列(模数),它仍然是负数。然后你访问 HT[负值] 就会崩溃。

编辑:修复:

  int randARRAY[MAX_KEYS+1];

这将防止相差一,使您的 0 终止扫描工作,因此最大键值为正确的值 5000。

Off by one. This fills the first MAX_KEYS+1 elements of randARRAY.

  for(int a = 0; a <= MAX_KEYS; a++){
    randARRAY[a] = 0;
  }

This asks again then uses the old userCHOOSE value. You want return hashTableSize();

  if(userCHOOSE < 6500){
    cout << "Whoops " << userCHOOSE << " is to small!" << endl;
    hashTableSize();
  }

Then the real problem: In openAddressing you scan while randARRAY[key] != 0. Your randARRAY isn't 0-terminated (setting tbSIZE in main will overwrite your earlier off-by-one randARRAY[5000]), so you will scan beyond 5000 keys. Then in listSearch you'll access randARRAY[key] for key larger than 5000, which means you're reading "garbage" data, e.g. a negative number. You hash it (modulus) and it remains negative. You then access HT[negative value] which crashes.

EDIT: The fix:

  int randARRAY[MAX_KEYS+1];

This will prevent the off-by-one, make your 0-terminated scan work, and therefore max key at the correct value 5000.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文