如何使用 boost::unordered_map

发布于 2024-09-28 14:08:29 字数 3326 浏览 3 评论 0原文

对于我的应用程序，我需要使用哈希映射，因此我编写了一个测试程序，其中将基类的一些实例存储在 boost::unordered_map 中。但我想通过调用返回基类的派生类的特殊函数来访问实例，并且我使用这些函数的参数作为 unordered_map 的哈希键。如果没有找到具有某些参数的类，则生成一个类并将其存储在映射中。该程序的目的可能不清楚，但这是代码。

#include <boost/unordered_map.hpp>
#include <iostream>

using namespace std;
using namespace boost;
typedef unsigned char BYT;
typedef unsigned long long ULL;

class BaseClass
{
public:
    int sign;
    size_t HASHCODE;
    BaseClass(){}
};

class  ClassA : public BaseClass
{
public:
    int AParam1;
    int AParam2;
    ClassA(int s1, int s2) : AParam1(s1), AParam2(s2)
    {
        sign = AParam1;
    }
};


struct HashKey
{
    ULL * hasharray;
    size_t hashNum;
    size_t HASHCODE;
    HashKey(ULL * ULLarray,  size_t Hashnum) : hasharray(ULLarray), hashNum(Hashnum), HASHCODE(0)
    {   }
    bool operator == (const HashKey & hk ) const
    {
        bool deg = (hashNum == hk.hashNum);
        if (deg)
        {
            for (int i = 0; i< hashNum;i++)
                if(hasharray[i] != hk.hasharray[i]) return false;
        }
        return deg;
    }
};

struct ihash : std::unary_function<HashKey, std::size_t>
{
    std::size_t operator()(HashKey const & x) const
    {
        std::size_t seed = 0;
        if (x.hashNum == 1)
            seed = x.hasharray[0];
        else
        {
            int amount = x.hashNum * 8;
            const std::size_t fnv_prime = 16777619u;
            BYT * byt = (BYT*)x.hasharray;
            for (int i = 0; i< amount;i++)
            {
                seed ^= byt[0];
                seed *= fnv_prime;
            }
        }
        return seed;
    }
};

typedef std::pair<HashKey,BaseClass*> HashPair;
unordered_map<HashKey,BaseClass*,ihash> UMAP;
typedef unordered_map<HashKey,BaseClass*,ihash>::iterator iter;


BaseClass * & FindClass(ULL* byt, int Num, size_t & HCode)
{
    HashKey hk(byt,Num); 
    HashPair hp(hk,0);
    std::pair<iter,bool> xx = UMAP.insert(hp);
//  if (xx.second) UMAP.rehash((UMAP.size() + 1) / UMAP.max_load_factor() + 1);
    if (!xx.first->second) HCode = UMAP.hash_function()(hk);
    return xx.first->second;
}


template <typename T, class A,class B> 
T* GetClass(size_t& hashcode ,A a, B b)
{   
    ULL byt[3] = {a,b,hashcode};
    BaseClass *& cls = FindClass(byt, 3, hashcode);
    if(! cls){ cls = new T(a,b); cls->HASHCODE = hashcode;}
    return static_cast<T*>(cls);
}



ClassA * findA(int Period1, int Period2)
{
    size_t classID = 100;
    return GetClass<ClassA>(classID,Period1,Period2);
}

int main(int argc, char* argv[])
{
    int limit = 1000;
     int modnum = 40;
    int result = 0;

    for(int i = 0 ; i < limit; i++ )
    {
        result += findA( rand() % modnum ,4)->sign ;
    }

    cout << UMAP.size() << "," << UMAP.bucket_count() << "," << result <<  endl;

    int x = 0;

    for(iter it =  UMAP.begin(); it != UMAP.end(); it++)
    {
        cout << ++x << "," << it->second->HASHCODE << "," << it->second->sign << endl ;
        delete it->second;

    }

    return 0;
}

问题是，我预计 UMAP 的大小等于 modnum，但它始终大于 modnum，这意味着有多个实例具有相同的参数和 HASHCODE。

我的问题的解决办法是什么？请帮忙。
谢谢

原文

for my application, i need to use a hash map, so i have written a test program in which i store some instances of a baseclass in a boost::unordered_map. but i want to reach the instances by calling special functions which return a derived class of the base and i use those functions' parameters for hash key of unordered_map. if no class is found with certain parameters then a class is generated and stored in map. the purpose of the program may not be clear but here is the code.

#include <boost/unordered_map.hpp>
#include <iostream>

using namespace std;
using namespace boost;
typedef unsigned char BYT;
typedef unsigned long long ULL;

class BaseClass
{
public:
    int sign;
    size_t HASHCODE;
    BaseClass(){}
};

class  ClassA : public BaseClass
{
public:
    int AParam1;
    int AParam2;
    ClassA(int s1, int s2) : AParam1(s1), AParam2(s2)
    {
        sign = AParam1;
    }
};


struct HashKey
{
    ULL * hasharray;
    size_t hashNum;
    size_t HASHCODE;
    HashKey(ULL * ULLarray,  size_t Hashnum) : hasharray(ULLarray), hashNum(Hashnum), HASHCODE(0)
    {   }
    bool operator == (const HashKey & hk ) const
    {
        bool deg = (hashNum == hk.hashNum);
        if (deg)
        {
            for (int i = 0; i< hashNum;i++)
                if(hasharray[i] != hk.hasharray[i]) return false;
        }
        return deg;
    }
};

struct ihash : std::unary_function<HashKey, std::size_t>
{
    std::size_t operator()(HashKey const & x) const
    {
        std::size_t seed = 0;
        if (x.hashNum == 1)
            seed = x.hasharray[0];
        else
        {
            int amount = x.hashNum * 8;
            const std::size_t fnv_prime = 16777619u;
            BYT * byt = (BYT*)x.hasharray;
            for (int i = 0; i< amount;i++)
            {
                seed ^= byt[0];
                seed *= fnv_prime;
            }
        }
        return seed;
    }
};

typedef std::pair<HashKey,BaseClass*> HashPair;
unordered_map<HashKey,BaseClass*,ihash> UMAP;
typedef unordered_map<HashKey,BaseClass*,ihash>::iterator iter;


BaseClass * & FindClass(ULL* byt, int Num, size_t & HCode)
{
    HashKey hk(byt,Num); 
    HashPair hp(hk,0);
    std::pair<iter,bool> xx = UMAP.insert(hp);
//  if (xx.second) UMAP.rehash((UMAP.size() + 1) / UMAP.max_load_factor() + 1);
    if (!xx.first->second) HCode = UMAP.hash_function()(hk);
    return xx.first->second;
}


template <typename T, class A,class B> 
T* GetClass(size_t& hashcode ,A a, B b)
{   
    ULL byt[3] = {a,b,hashcode};
    BaseClass *& cls = FindClass(byt, 3, hashcode);
    if(! cls){ cls = new T(a,b); cls->HASHCODE = hashcode;}
    return static_cast<T*>(cls);
}



ClassA * findA(int Period1, int Period2)
{
    size_t classID = 100;
    return GetClass<ClassA>(classID,Period1,Period2);
}

int main(int argc, char* argv[])
{
    int limit = 1000;
     int modnum = 40;
    int result = 0;

    for(int i = 0 ; i < limit; i++ )
    {
        result += findA( rand() % modnum ,4)->sign ;
    }

    cout << UMAP.size() << "," << UMAP.bucket_count() << "," << result <<  endl;

    int x = 0;

    for(iter it =  UMAP.begin(); it != UMAP.end(); it++)
    {
        cout << ++x << "," << it->second->HASHCODE << "," << it->second->sign << endl ;
        delete it->second;

    }

    return 0;
}

the problem is, i expect that the size of UMAP is equal to modnum however it is allways greater than modnum which means there are more than one instance that has the same parameters and HASHCODE.

what is the solution to my problem? please help.
thanks

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

真心难拥有 2024-10-05 14:08:29

这里有几个设计问题：

struct HashKey
{
    ULL * hasharray;
    ...

您的键类型存储指向某个数组的指针。但是这个指针是用本地对象的地址初始化的：

BaseClass * & FindClass(ULL* byt, int Num, size_t & HCode)
{
    HashKey hk(byt,Num); // <-- !!!
    HashPair hp(hk,0);
    std::pair<iter,bool> xx = UMAP.insert(hp);
    if (!xx.first->second) HCode = UMAP.hash_function()(hk);
    return xx.first->second;
}

template <typename T, class A,class B> 
T* GetClass(size_t& hashcode ,A a, B b)
{   
    ULL byt[3] = {a,b,hashcode}; // <-- !!!
    BaseClass *& cls = FindClass(byt, 3, hashcode);
    if(! cls){ cls = new T(a,b); cls->HASHCODE = hashcode;}
    return static_cast<T*>(cls);
}

这使得映射存储一个带有悬空指针的 HashKey 对象。此外，您还返回对 FindClass 中名为 xx 的函数本地对象的成员的引用。使用此引用会调用未定义的行为。

考虑重命名地图的键类型。哈希码本身不应该是密钥。正如 HashKey 的运算符 == 所示，您不希望实际的键是哈希码，而是可变长度的整数序列。另外，请考虑将序列存储在键类型内部而不是指针，例如作为向量。此外，避免返回对函数本地对象的引用。

Here are a couple of design problems:

struct HashKey
{
    ULL * hasharray;
    ...

Your key type stores a pointer to some array. But this pointer is initialized with the address of a local object:

BaseClass * & FindClass(ULL* byt, int Num, size_t & HCode)
{
    HashKey hk(byt,Num); // <-- !!!
    HashPair hp(hk,0);
    std::pair<iter,bool> xx = UMAP.insert(hp);
    if (!xx.first->second) HCode = UMAP.hash_function()(hk);
    return xx.first->second;
}

template <typename T, class A,class B> 
T* GetClass(size_t& hashcode ,A a, B b)
{   
    ULL byt[3] = {a,b,hashcode}; // <-- !!!
    BaseClass *& cls = FindClass(byt, 3, hashcode);
    if(! cls){ cls = new T(a,b); cls->HASHCODE = hashcode;}
    return static_cast<T*>(cls);
}

This makes the map store a HashKey object with a dangling pointer. Also you are returning a reference to a member of a function local object called xx in FindClass. The use of this reference invokes undefined behaviour.

Consider renaming the map's key type. The hash code itself shouldn't be a key. And as your operator== for HashKey suggests, you don't want the actual key to be the hash code but the sequence of integers of variable length. Also, consider storing the sequence inside of the key type instead of a pointer, for example, as a vector. In addition, avoid returning references to function local objects.

回复收藏 0 原文