之前在网上看到暴雪Hash算法,觉得该算法非常不错,其查找的时间复杂度是O(1)(每个对象有唯一的哈希编码, 查找时只要根据哈希码作为数组下表即可以找到该对象)。最近我也参与了公司的一个项目,项目的数据多的有百万到千万级的数据量,刚好结合暴雪Hash算法,虽然没有真正的应用到实际当中,但确实是一个很好的算法,希望在以后的实际应用中用到,哈希表用来存储对象其哈希code用内存地址表示。这个列子对碰撞问题还有待处理。
////////////////////////////////////////////////////实列///////////////////////////////////////////
作者:July、wuliming、pkuoliver
出处:http://blog.csdn.net/v_JULY_v/article/details/6256463
//函数prepareCryptTable以下的函数生成一个长度为0x500(合10进制数:1280)的cryptTable[0x500] (0x500*5)
typedef struct
{
unsigned int nHashA;
unsigned int nHashB;
char bExists;
} SOMESTRUCTRUE;
typedef struct
{
char* pkey;
unsigned int weight;
}KEYNODE, *key_list;
//Hash表,用于生产字符串的哈希编码,可根据实际需要生产不同大小的CryptTable.
void prepareCryptTable()
{
unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i;
//0x100*5=0x500
for( index1 = 0; index1 <0x100; index1++ )//0x100
{
for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100)//5
{
unsigned long temp1, temp2;
seed = (seed * 125 + 3) % 0x2AAAAB;
temp1 = (seed & 0xFFFF)<<0x10;
seed = (seed * 125 + 3) % 0x2AAAAB;
temp2 = (seed & 0xFFFF);
cryptTable[index2] = ( temp1 | temp2 );
}
}
}
//函数HashString以下函数计算lpszFileName 字符串的hash值,其中dwHashType 为hash的类型,
unsigned long HashString(const char *lpszkeyName, unsigned long dwHashType )
{
unsigned char *key = (unsigned char *)lpszkeyName;
unsigned long seed1 = 0x7FED7FED;
unsigned long seed2 = 0xEEEEEEEE;
int ch;
while( *key != 0 )
{
ch = *key++;
seed1 = cryptTable[(dwHashType<<8) + ch] ^ (seed1 + seed2);
seed2 = ch + seed1 + seed2 + (seed2<<5) + 3;
}
return seed1;
}
/////////////////////////////////////////////////////////////////////
//function: 哈希词典 编码
//parameter:
/////////////////////////////////////////////////////////////////////
MPQHASHTABLE TestHashTable[nTableSize];
unsigned int TestHashCTable[nTableSize];
unsigned int TestHashDTable[nTableSize];
key_list test_data[nTableSize];
//直接调用上面的hashstring,nHashPos就是对应的HASH值。
int insert_string(const char *string_in)
{
const int HASH_OFFSET = 0, HASH_C = 1, HASH_D = 2;
unsigned int nHash = HashString(string_in, HASH_OFFSET);
unsigned int nHashC = HashString(string_in, HASH_C);
unsigned int nHashD = HashString(string_in, HASH_D);
unsigned int nHashStart = nHash % nTableSize;
unsigned int nHashPos = nHashStart;
int ln, ires = 0;
while (TestHashTable[nHashPos].bExists)
{
// if (TestHashCTable[nHashPos] == (int) nHashC && TestHashDTable[nHashPos] == (int) nHashD)
// break;
// //...
// else
//如之前所提示读者的那般,暴雪的Hash算法对于查询那样处理可以,但对插入就不能那么解决
nHashPos = (nHashPos + 1) % nTableSize;
if (nHashPos == nHashStart)
break;
}
ln = strlen(string_in);
if (!TestHashTable[nHashPos].bExists && (ln < nMaxStrLen))
{
TestHashCTable[nHashPos] = nHashC;
TestHashDTable[nHashPos] = nHashD;
test_data[nHashPos] = (KEYNODE *) malloc (sizeof(KEYNODE));
if(test_data[nHashPos] == NULL)
{
printf("10000 EMS ERROR !!!!\n");
return 0;
}
test_data[nHashPos]->pkey = (char *)malloc(ln+1);
if(test_data[nHashPos]->pkey == NULL)
{
printf("10000 EMS ERROR !!!!\n");
return 0;
}
memset(test_data[nHashPos]->pkey, 0, ln+1);
strncpy(test_data[nHashPos]->pkey, string_in, ln);
*((test_data[nHashPos]->pkey)+ln) = 0;
test_data[nHashPos]->weight = nHashPos;
TestHashTable[nHashPos].bExists = 1;
}
else
{
if(TestHashTable[nHashPos].bExists)
printf("30000 in the hash table %s !!!\n", string_in);
else
printf("90000 strkey error !!!\n");
}
return nHashPos;
}