C++:哈希和碰撞技术的正确使用
哈希表的大小是否应该有大小限制?
我有点困惑,因为我明白为什么创建太小的哈希表会给我带来问题?似乎哈希表太大导致我的探测器抛出 Sig 错误?如果有人有哈希表的经验,这是我的代码。我当然很感激你提供的任何建议(除了开始编织之外,请):
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <math.h>
using namespace std;
struct TABLE{
int key;
TABLE* next;
};
const int MAX_KEYS = 5000;
const int RANDOM = 30000;
int randNUMS(int *rand);
int hashTableSize();
int HASH(int key,int listSIZE);
void threeHashMethods(int *randARRAY,int tbSIZE);
int* openAddressing(int *randARRAY,int tbSIZE);
int seperateCHAINING();
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe);
int doubleHASH(int key,int tbSIZE);
void listSEARCH(int *randARRAY,int *HT,int tbSIZE);
int main(){
int tbSIZE = 0;
int randARRAY[MAX_KEYS];
for(int a = 0; a <= MAX_KEYS; a++){
randARRAY[a] = 0;
}
///create random array of 5,000 unique int
///they will be of values between 1-30000
randNUMS(randARRAY);
///get hash table size from user
///table must be larger than 6500 int
tbSIZE = hashTableSize();
///driver function for all three
///collision resolution techniques
threeHashMethods(randARRAY,tbSIZE);
return 0;
}
int HASH(int key,int listSIZE){
int address = 0;
address = key % listSIZE;
return address;
}
int doubleHASH(int key,int tbSIZE){
int address = 0;
address = (key % (tbSIZE - 2)) + 1;
return address;
}
int hashTableSize(){
int userCHOOSE = 0;
cout << "Enter desired hash table size." << endl;
cout << "NOTE: hash table size must exceed 6500: " << endl;
cin >> userCHOOSE;
if(userCHOOSE < 6500){
cout << "Whoops " << userCHOOSE << " is to small!" << endl;
hashTableSize();
}
return userCHOOSE;
}
int randNUMS(int *randARRAY){
///temporary fix for randARRAY array of numbers till hash is running
int check = 0;
int index = 0;
int loop = 0;
srand (time(NULL));
for(index = 0; index < MAX_KEYS; index++){
check = rand() % RANDOM + 1;
while(randARRAY[loop] != 0){
if(check == randARRAY[index]){
check = rand() % RANDOM + 1;
loop = 0;
}
loop++;
}
randARRAY[index] = check;
}
return *randARRAY;
}
void threeHashMethods(int *randARRAY,int tbSIZE){
int *HT;
///this menu will allow user to select collision method
HT = openAddressing(randARRAY,tbSIZE);
listSEARCH(randARRAY,HT,tbSIZE);
}
int* openAddressing(int *randARRAY,int tbSIZE){
int key = 0,
address = 0,
prb = 0,
hashTABLE[tbSIZE * 2],
*HT = hashTABLE;
int percent = (5000.00 / tbSIZE) * 100;
int load = (5000.00 / tbSIZE) * 10;
int loadFACTOR = (tbSIZE * load)/10;
if(percent > 0){
for(int a = 0; a < tbSIZE; a++){
hashTABLE[a] = 0;
}
while(randARRAY[key] != 0){
///get a purposed address
///and move through indexes
///in array of random int till
///empty index is found
if(randARRAY[key] > tbSIZE){
address = HASH(randARRAY[key],loadFACTOR);
}
///if address is available
///grab the key
if(hashTABLE[address] == 0){
hashTABLE[address] = randARRAY[key];
}
///if a collision is the result run
///a linear probe until available address is found
else{
address = linearPROBE(address,hashTABLE,0,tbSIZE,prb);
hashTABLE[address] = randARRAY[key];
}
if(hashTABLE[address] == randARRAY[key]){
key++;
}
}
cout << key << " items loaded into a " << tbSIZE << " element hash table." << endl;
cout << "Load Factor = " << percent << "%" << endl;
cout << "Results from searching for 2500 items." << endl;
}
else{
cout << "Load Factor is maxed out." << endl;
}
return HT;
}
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe){
while(HASH[address] != probeTHIS){
address = (address + 1);
probe++;
if(address >= load){
address = 0;
}
}
return address;
}
void listSEARCH(int *randARRAY,int *HT,int tbSIZE){
int key = 0,
address = 0,
probe = 0,
found = 0,
attempts = 0;
while(randARRAY[key] != 0){
address = HASH(randARRAY[key],tbSIZE);
while(HT[address] != randARRAY[key] && attempts < tbSIZE){
address = linearPROBE(address,HT,randARRAY[key],tbSIZE,probe);
found++;
attempts++;
}
key = key + 2;
attempts = 0;
}
found = probe / found;
cout << "Linear Probing." << endl;
cout << probe << " items examined ";
cout << "(avg = " << found << " items examined per search.)" << endl;
}
Should there be a size limitation on how LARGE a Hash Table may be?
I'm a little baffled as I can see why creating too small of a Hash Table should be causing me issues? It appears that too large of a Hash Table is causing my probe to throw a Sig error? Here is my code if anyone has experience with Hash Tables. I certainly appreciate any advice you have to offer (beyond taking up knitting instead, please):
#include <iostream>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <math.h>
using namespace std;
struct TABLE{
int key;
TABLE* next;
};
const int MAX_KEYS = 5000;
const int RANDOM = 30000;
int randNUMS(int *rand);
int hashTableSize();
int HASH(int key,int listSIZE);
void threeHashMethods(int *randARRAY,int tbSIZE);
int* openAddressing(int *randARRAY,int tbSIZE);
int seperateCHAINING();
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe);
int doubleHASH(int key,int tbSIZE);
void listSEARCH(int *randARRAY,int *HT,int tbSIZE);
int main(){
int tbSIZE = 0;
int randARRAY[MAX_KEYS];
for(int a = 0; a <= MAX_KEYS; a++){
randARRAY[a] = 0;
}
///create random array of 5,000 unique int
///they will be of values between 1-30000
randNUMS(randARRAY);
///get hash table size from user
///table must be larger than 6500 int
tbSIZE = hashTableSize();
///driver function for all three
///collision resolution techniques
threeHashMethods(randARRAY,tbSIZE);
return 0;
}
int HASH(int key,int listSIZE){
int address = 0;
address = key % listSIZE;
return address;
}
int doubleHASH(int key,int tbSIZE){
int address = 0;
address = (key % (tbSIZE - 2)) + 1;
return address;
}
int hashTableSize(){
int userCHOOSE = 0;
cout << "Enter desired hash table size." << endl;
cout << "NOTE: hash table size must exceed 6500: " << endl;
cin >> userCHOOSE;
if(userCHOOSE < 6500){
cout << "Whoops " << userCHOOSE << " is to small!" << endl;
hashTableSize();
}
return userCHOOSE;
}
int randNUMS(int *randARRAY){
///temporary fix for randARRAY array of numbers till hash is running
int check = 0;
int index = 0;
int loop = 0;
srand (time(NULL));
for(index = 0; index < MAX_KEYS; index++){
check = rand() % RANDOM + 1;
while(randARRAY[loop] != 0){
if(check == randARRAY[index]){
check = rand() % RANDOM + 1;
loop = 0;
}
loop++;
}
randARRAY[index] = check;
}
return *randARRAY;
}
void threeHashMethods(int *randARRAY,int tbSIZE){
int *HT;
///this menu will allow user to select collision method
HT = openAddressing(randARRAY,tbSIZE);
listSEARCH(randARRAY,HT,tbSIZE);
}
int* openAddressing(int *randARRAY,int tbSIZE){
int key = 0,
address = 0,
prb = 0,
hashTABLE[tbSIZE * 2],
*HT = hashTABLE;
int percent = (5000.00 / tbSIZE) * 100;
int load = (5000.00 / tbSIZE) * 10;
int loadFACTOR = (tbSIZE * load)/10;
if(percent > 0){
for(int a = 0; a < tbSIZE; a++){
hashTABLE[a] = 0;
}
while(randARRAY[key] != 0){
///get a purposed address
///and move through indexes
///in array of random int till
///empty index is found
if(randARRAY[key] > tbSIZE){
address = HASH(randARRAY[key],loadFACTOR);
}
///if address is available
///grab the key
if(hashTABLE[address] == 0){
hashTABLE[address] = randARRAY[key];
}
///if a collision is the result run
///a linear probe until available address is found
else{
address = linearPROBE(address,hashTABLE,0,tbSIZE,prb);
hashTABLE[address] = randARRAY[key];
}
if(hashTABLE[address] == randARRAY[key]){
key++;
}
}
cout << key << " items loaded into a " << tbSIZE << " element hash table." << endl;
cout << "Load Factor = " << percent << "%" << endl;
cout << "Results from searching for 2500 items." << endl;
}
else{
cout << "Load Factor is maxed out." << endl;
}
return HT;
}
int linearPROBE(int address,int *HASH,int probeTHIS,int load,int& probe){
while(HASH[address] != probeTHIS){
address = (address + 1);
probe++;
if(address >= load){
address = 0;
}
}
return address;
}
void listSEARCH(int *randARRAY,int *HT,int tbSIZE){
int key = 0,
address = 0,
probe = 0,
found = 0,
attempts = 0;
while(randARRAY[key] != 0){
address = HASH(randARRAY[key],tbSIZE);
while(HT[address] != randARRAY[key] && attempts < tbSIZE){
address = linearPROBE(address,HT,randARRAY[key],tbSIZE,probe);
found++;
attempts++;
}
key = key + 2;
attempts = 0;
}
found = probe / found;
cout << "Linear Probing." << endl;
cout << probe << " items examined ";
cout << "(avg = " << found << " items examined per search.)" << endl;
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
差一分。这将填充 randARRAY 的前 MAX_KEYS+1 个元素。
这会再次询问,然后使用旧的 userCHOOSE 值。您想要
return hashTableSize();
那么真正的问题是:在
openAddressing
中,您在randARRAY[key] != 0
时进行扫描。您的 randARRAY 不是 0 终止的(在 main 中设置 tbSIZE 将覆盖您之前相差一的 randARRAY[5000]),因此您将扫描超过 5000 个键。然后在 listSearch 中,您将访问 randARRAY[key] 来获取大于 5000 的键,这意味着您正在读取“垃圾”数据,例如负数。你对它进行散列(模数),它仍然是负数。然后你访问 HT[负值] 就会崩溃。编辑:修复:
这将防止相差一,使您的 0 终止扫描工作,因此最大键值为正确的值 5000。
Off by one. This fills the first MAX_KEYS+1 elements of randARRAY.
This asks again then uses the old userCHOOSE value. You want
return hashTableSize();
Then the real problem: In
openAddressing
you scan whilerandARRAY[key] != 0
. Your randARRAY isn't 0-terminated (setting tbSIZE in main will overwrite your earlier off-by-one randARRAY[5000]), so you will scan beyond 5000 keys. Then in listSearch you'll access randARRAY[key] for key larger than 5000, which means you're reading "garbage" data, e.g. a negative number. You hash it (modulus) and it remains negative. You then access HT[negative value] which crashes.EDIT: The fix:
This will prevent the off-by-one, make your 0-terminated scan work, and therefore max key at the correct value 5000.