原文地址:cityhash–对字符串的哈希算法 – 无敌牛
欢迎参观我的个人博客:无敌牛 – 技术/著作/典籍/分享等
分享一个给字符串计算hash的开源库,谷歌出品。
源代码在:https://github.com/google/cityhash
可以自己下载:git clone https://github.com/google/cityhash.git
但是代码是C++的,编译出来的动态库不能在C语言中调用,需要稍做修改后,才能编译出来C语言的动态链接库。
可以按照下边 1.1节 的方式修改,也可以直接下载修改完的代码包。
cityhash.tar下载
用 tar -zxvf cityhash.tar.gz
的方式解压,然后从 1.2节 开始操作就可以。
编译安装
1.1 修改,使支持C语言调用
只需要修改 src/city.h 文件即可。找到需要在C语言调用的函数,在其前后增加 extern “C” { …. }。修改后的文件如下:
#ifndef CITY_HASH_H_
#define CITY_HASH_H_#include <stdlib.h> // for size_t.
#include <stdint.h>
#include <utility>typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef std::pair<uint64, uint64> uint128;inline uint64 Uint128Low64(const uint128& x) { return x.first; }
inline uint64 Uint128High64(const uint128& x) { return x.second; }#ifdef __cplusplus
extern "C" {
#endif// Hash function for a byte array.
uint64 CityHash64(const char *buf, size_t len);// Hash function for a byte array. For convenience, a 64-bit seed is also
// hashed into the result.
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);// Hash function for a byte array. For convenience, two seeds are also
// hashed into the result.
uint64 CityHash64WithSeeds(const char *buf, size_t len,uint64 seed0, uint64 seed1);// Hash function for a byte array.
uint128 CityHash128(const char *s, size_t len);// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);// Hash function for a byte array. Most useful in 32-bit binaries.
uint32 CityHash32(const char *buf, size_t len);#ifdef __cplusplus
} // extern "C"
#endif// Hash 128 input bits down to 64 bits of output.
// This is intended to be a reasonably good hash function.
inline uint64 Hash128to64(const uint128& x) {// Murmur-inspired hashing.const uint64 kMul = 0x9ddfea08eb382d69ULL;uint64 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;a ^= (a >> 47);uint64 b = (Uint128High64(x) ^ a) * kMul;b ^= (b >> 47);b *= kMul;return b;
}#endif // CITY_HASH_H_
1.2 编译和安装
查看CPU是否支持 sse4_2 指令
生成 Makefile 文件,命令:./configure --enable-sse4.2
。
如果cpu不支持 sse4_2 指令,可以不加参数 --enable-sse4.2
。
编译,命令:make all check CXXFLAGS="-g -O3 -msse4.2"
。
如果cpu不支持 sse4_2 指令,可以不加参数 -msse4.2
安装,命令:make install
动态库会安装在 /usr/local/lib/ 目录,头文件在 /usr/local/include/ 目录。
1.3 增加C语言可引入的头文件
在 /usr/local/include/ 目录下创建 cityhash.h 文件。内容如下:
#ifndef __CITY_HASH_C_H_
#define __CITY_HASH_C_H_#include <stdint.h>typedef uint8_t uint8;
typedef uint32_t uint32;
typedef uint64_t uint64;
typedef struct { uint64_t val[2] ; } uint128;// Hash function for a byte array.
uint64 CityHash64(const char *buf, size_t len);// Hash function for a byte array. For convenience, a 64-bit seed is also
// hashed into the result.
uint64 CityHash64WithSeed(const char *buf, size_t len, uint64 seed);// Hash function for a byte array. For convenience, two seeds are also
// hashed into the result.
uint64 CityHash64WithSeeds(const char *buf, size_t len,uint64 seed0, uint64 seed1);// Hash function for a byte array.
uint128 CityHash128(const char *s, size_t len);// Hash function for a byte array. For convenience, a 128-bit seed is also
// hashed into the result.
uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed);// Hash function for a byte array. Most useful in 32-bit binaries.
uint32 CityHash32(const char *buf, size_t len);#endif //__CITY_HASH_C_H_
测试
2.1 创建测试文件 testch.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>#include "cityhash.h"int test(char * data) ;int main(int argc, char * argv[]){char * data = "aaaaaaaaaaaaaaabbbbbbbbbbbbccccccccccccdddddddddddddeeeeeeeeeeeffffffffffgggggggghhhhhhh" ;test(data) ;char * data2 = "aaaaaaaaaaaaaaabbbbbbbbbbbbccccccccccccdddddddddddddeeeeeeeeeeeffffffffffgggggggghhhhhhh3333" ;test(data2) ;return 0 ;
}int test(char * data) {uint32 key32 = CityHash32(data, strlen(data) ) ;printf("key32 [%u]\n", key32) ;uint64 key64 = CityHash64(data, strlen(data) ) ;printf("key64 [%lu]\n", key64) ;key64 = CityHash64WithSeed(data, strlen(data), 123123123 ) ;printf("key64 [%lu]\n", key64) ;key64 = CityHash64WithSeeds(data, strlen(data), 123123123, 321321321212121) ;printf("key64 [%lu]\n", key64) ;uint128 key128 = CityHash128(data, strlen(data) ) ;printf("key128 [%lu] [%lu]\n", key128.val[0], key128.val[1]) ;uint128 seed ;seed.val[0] = 123123123123 ;seed.val[1] = 33344455556666 ;key128 = CityHash128WithSeed(data, strlen(data), seed) ;printf("key128 [%lu] [%lu]\n", key128.val[0], key128.val[1]) ;return 0 ;
}