• 引入哈希桶的概念来实现一个哈希表

    学习一下国外大牛的思路
    服务器君一共花费 22.440 ms 进行了 2 次数据库查询,努力地为您提供了这个页面。
    广告很萌的

    前面的讲述了如何用链地址法实现一个哈希表,那么今天来分析一下另一种解决哈希冲突的做法,即为每个Hash值,建立一个Hash桶(Bucket),桶的容量是固定的,也就是只能处理固定次数的冲突,如1048576个Hash桶,每个桶中有4个表项(Entry),总计4M个表项。其实这两种的实现思路雷同,就是对Hash表中每个Hash值建立一个冲突表,即将冲突的几个记录以表的形式存储在其中。

    大致的思路是这样的:

    首先哈希桶的个数是固定的,有用户构建的时候输入,一旦构建,个数就已经固定;查找的时候首先将key值通过哈希函数获取哈希值,根据哈希值获取到对应的哈希桶,然后遍历哈希桶内的pairs数组获取。

    主要的数据结构:

    struct Pair {
        char *key;
        char *value;
    };
    
    struct Bucket {
        unsigned int count;
        Pair *pairs;
    };
    
    struct StrMap {
        unsigned int count;
        Bucket *buckets;
    };
    
    • 本小节主要是学习一下国外大牛是如何实现哈希表的。完整的代码,请看:这里,一位圣安德鲁斯大学的讲师:KRISTENSSON博客

    strmap.h

    #ifndef _STRMAP_H_
    #define _STRMAP_H_
    
    #ifdef __cplusplus
    extern "C"
    {
    #endif
    
    #include <stdlib.h>
    #include <string.h>
    
    typedef struct StrMap StrMap;
    
    /*
     * This callback function is called once per key-value when iterating over
     * all keys associated to values.
     *
     * Parameters:
     *
     * key: A pointer to a null-terminated C string. The string must not
     * be modified by the client.
     *
     * value: A pointer to a null-terminated C string. The string must
     * not be modified by the client.
     *
     * obj: A pointer to a client-specific object. This parameter may be
     * null.
     *
     * Return value: None.
     */
    typedef void(*sm_enum_func)(const char *key, const char *value, const void *obj);
    
    /*
     * Creates a string map.
     *
     * Parameters:
     *
     * capacity: The number of top-level slots this string map
     * should allocate. This parameter must be > 0.
     *
     * Return value: A pointer to a string map object, 
     * or null if a new string map could not be allocated.
     */
    StrMap * sm_new(unsigned int capacity);
    
    /*
     * Releases all memory held by a string map object.
     *
     * Parameters:
     *
     * map: A pointer to a string map. This parameter cannot be null.
     * If the supplied string map has been previously released, the
     * behaviour of this function is undefined.
     *
     * Return value: None.
     */
    void sm_delete(StrMap *map);
    
    /*
     * Returns the value associated with the supplied key.
     *
     * Parameters:
     *
     * map: A pointer to a string map. This parameter cannot be null.
     *
     * key: A pointer to a null-terminated C string. This parameter cannot
     * be null.
     *
     * out_buf: A pointer to an output buffer which will contain the value,
     * if it exists and fits into the buffer.
     *
     * n_out_buf: The size of the output buffer in bytes.
     *
     * Return value: If out_buf is set to null and n_out_buf is set to 0 the return
     * value will be the number of bytes required to store the value (if it exists)
     * and its null-terminator. For all other parameter configurations the return value
     * is 1 if an associated value was found and completely copied into the output buffer,
     * 0 otherwise.
     */
    int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf);
    
    /*
     * Queries the existence of a key.
     *
     * Parameters:
     *
     * map: A pointer to a string map. This parameter cannot be null.
     *
     * key: A pointer to a null-terminated C string. This parameter cannot
     * be null.
     *
     * Return value: 1 if the key exists, 0 otherwise.
     */
    int sm_exists(const StrMap *map, const char *key);
    
    /*
     * Associates a value with the supplied key. If the key is already
     * associated with a value, the previous value is replaced.
     *
     * Parameters:
     *
     * map: A pointer to a string map. This parameter cannot be null.
     *
     * key: A pointer to a null-terminated C string. This parameter
     * cannot be null. The string must have a string length > 0. The
     * string will be copied.
     *
     * value: A pointer to a null-terminated C string. This parameter
     * cannot be null. The string must have a string length > 0. The
     * string will be copied.
     *
     * Return value: 1 if the association succeeded, 0 otherwise.
     */
    int sm_put(StrMap *map, const char *key, const char *value);
    
    /*
     * Returns the number of associations between keys and values.
     *
     * Parameters:
     *
     * map: A pointer to a string map. This parameter cannot be null.
     *
     * Return value: The number of associations between keys and values.
     */
    int sm_get_count(const StrMap *map);
    
    /*
     * An enumerator over all associations between keys and values.
     *
     * Parameters:
     *
     * map: A pointer to a string map. This parameter cannot be null.
     *
     * enum_func: A pointer to a callback function that will be
     * called by this procedure once for every key associated
     * with a value. This parameter cannot be null.
     *
     * obj: A pointer to a client-specific object. This parameter will be
     * passed back to the client's callback function. This parameter can
     * be null.
     *
     * Return value: 1 if enumeration completed, 0 otherwise.
     */
    int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj);
    
    #ifdef __cplusplus
    }
    #endif
    
    #endif
    

    strmap.c

    #include "strmap.h"
    
    typedef struct Pair Pair;
    
    typedef struct Bucket Bucket;
    
    struct Pair {
    	char *key;
    	char *value;
    };
    
    struct Bucket {
    	unsigned int count;
    	Pair *pairs;
    };
    
    struct StrMap {
    	unsigned int count;
    	Bucket *buckets;
    };
    
    static Pair * get_pair(Bucket *bucket, const char *key);
    static unsigned long hash(const char *str);
    
    StrMap * sm_new(unsigned int capacity)
    {
    	StrMap *map;
    	
    	map = malloc(sizeof(StrMap));
    	if (map == NULL) {
    		return NULL;
    	}
    	map->count = capacity;
    	map->buckets = malloc(map->count * sizeof(Bucket));
    	if (map->buckets == NULL) {
    		free(map);
    		return NULL;
    	}
    	memset(map->buckets, 0, map->count * sizeof(Bucket));
    	return map;
    }
    
    void sm_delete(StrMap *map)
    {
    	unsigned int i, j, n, m;
    	Bucket *bucket;
    	Pair *pair;
    
    	if (map == NULL) {
    		return;
    	}
    	n = map->count;
    	bucket = map->buckets;
    	i = 0;
    	while (i < n) {
    		m = bucket->count;
    		pair = bucket->pairs;
    		j = 0;
    		while(j < m) {
    			free(pair->key);
    			free(pair->value);
    			pair++;
    			j++;
    		}
    		free(bucket->pairs);
    		bucket++;
    		i++;
    	}
    	free(map->buckets);
    	free(map);
    }
    
    int sm_get(const StrMap *map, const char *key, char *out_buf, unsigned int n_out_buf)
    {
    	unsigned int index;
    	Bucket *bucket;
    	Pair *pair;
    
    	if (map == NULL) {
    		return 0;
    	}
    	if (key == NULL) {
    		return 0;
    	}
    	index = hash(key) % map->count;
    	bucket = &(map->buckets[index]);
    	pair = get_pair(bucket, key);
    	if (pair == NULL) {
    		return 0;
    	}
    	if (out_buf == NULL && n_out_buf == 0) {
    		return strlen(pair->value) + 1;
    	}
    	if (out_buf == NULL) {
    		return 0;
    	}
    	if (strlen(pair->value) >= n_out_buf) {
    		return 0;
    	}
    	strcpy(out_buf, pair->value);
    	return 1;
    }
    
    int sm_exists(const StrMap *map, const char *key)
    {
    	unsigned int index;
    	Bucket *bucket;
    	Pair *pair;
    
    	if (map == NULL) {
    		return 0;
    	}
    	if (key == NULL) {
    		return 0;
    	}
    	index = hash(key) % map->count;
    	bucket = &(map->buckets[index]);
    	pair = get_pair(bucket, key);
    	if (pair == NULL) {
    		return 0;
    	}
    	return 1;
    }
    
    int sm_put(StrMap *map, const char *key, const char *value)
    {
    	unsigned int key_len, value_len, index;
    	Bucket *bucket;
    	Pair *tmp_pairs, *pair;
    	char *tmp_value;
    	char *new_key, *new_value;
    
    	if (map == NULL) {
    		return 0;
    	}
    	if (key == NULL || value == NULL) {
    		return 0;
    	}
    	key_len = strlen(key);
    	value_len = strlen(value);
    	/* Get a pointer to the bucket the key string hashes to */
    	index = hash(key) % map->count;
    	bucket = &(map->buckets[index]);
    	/* Check if we can handle insertion by simply replacing
    	 * an existing value in a key-value pair in the bucket.
    	 */
    	if ((pair = get_pair(bucket, key)) != NULL) {
    		/* The bucket contains a pair that matches the provided key,
    		 * change the value for that pair to the new value.
    		 */
    		if (strlen(pair->value) < value_len) {
    			/* If the new value is larger than the old value, re-allocate
    			 * space for the new larger value.
    			 */
    			tmp_value = realloc(pair->value, (value_len + 1) * sizeof(char));
    			if (tmp_value == NULL) {
    				return 0;
    			}
    			pair->value = tmp_value;
    		}
    		/* Copy the new value into the pair that matches the key */
    		strcpy(pair->value, value);
    		return 1;
    	}
    	/* Allocate space for a new key and value */
    	new_key = malloc((key_len + 1) * sizeof(char));
    	if (new_key == NULL) {
    		return 0;
    	}
    	new_value = malloc((value_len + 1) * sizeof(char));
    	if (new_value == NULL) {
    		free(new_key);
    		return 0;
    	}
    	/* Create a key-value pair */
    	if (bucket->count == 0) {
    		/* The bucket is empty, lazily allocate space for a single
    		 * key-value pair.
    		 */
    		bucket->pairs = malloc(sizeof(Pair));
    		if (bucket->pairs == NULL) {
    			free(new_key);
    			free(new_value);
    			return 0;
    		}
    		bucket->count = 1;
    	}
    	else {
    		/* The bucket wasn't empty but no pair existed that matches the provided
    		 * key, so create a new key-value pair.
    		 */
    		tmp_pairs = realloc(bucket->pairs, (bucket->count + 1) * sizeof(Pair));
    		if (tmp_pairs == NULL) {
    			free(new_key);
    			free(new_value);
    			return 0;
    		}
    		bucket->pairs = tmp_pairs;
    		bucket->count++;
    	}
    	/* Get the last pair in the chain for the bucket */
    	pair = &(bucket->pairs[bucket->count - 1]);
    	pair->key = new_key;
    	pair->value = new_value;
    	/* Copy the key and its value into the key-value pair */
    	strcpy(pair->key, key);
    	strcpy(pair->value, value);
    	return 1;
    }
    
    int sm_get_count(const StrMap *map)
    {
    	unsigned int i, j, n, m;
    	unsigned int count;
    	Bucket *bucket;
    	Pair *pair;
    
    	if (map == NULL) {
    		return 0;
    	}
    	bucket = map->buckets;
    	n = map->count;
    	i = 0;
    	count = 0;
    	while (i < n) {
    		pair = bucket->pairs;
    		m = bucket->count;
    		j = 0;
    		while (j < m) {
    			count++;
    			pair++;
    			j++;
    		}
    		bucket++;
    		i++;
    	}
    	return count;
    }
    
    int sm_enum(const StrMap *map, sm_enum_func enum_func, const void *obj)
    {
    	unsigned int i, j, n, m;
    	Bucket *bucket;
    	Pair *pair;
    
    	if (map == NULL) {
    		return 0;
    	}
    	if (enum_func == NULL) {
    		return 0;
    	}
    	bucket = map->buckets;
    	n = map->count;
    	i = 0;
    	while (i < n) {
    		pair = bucket->pairs;
    		m = bucket->count;
    		j = 0;
    		while (j < m) {
    			enum_func(pair->key, pair->value, obj);
    			pair++;
    			j++;
    		}
    		bucket++;
    		i++;
    	}
    	return 1;
    }
    
    /*
     * Returns a pair from the bucket that matches the provided key,
     * or null if no such pair exist.
     */
    static Pair * get_pair(Bucket *bucket, const char *key)
    {
    	unsigned int i, n;
    	Pair *pair;
    
    	n = bucket->count;
    	if (n == 0) {
    		return NULL;
    	}
    	pair = bucket->pairs;
    	i = 0;
    	while (i < n) {
    		if (pair->key != NULL && pair->value != NULL) {
    			if (strcmp(pair->key, key) == 0) {
    				return pair;
    			}
    		}
    		pair++;
    		i++;
    	}
    	return NULL;
    }
    
    /*
     * Returns a hash code for the provided string.
     */
    static unsigned long hash(const char *str)
    {
    	unsigned long hash = 5381;
    	int c;
    
    	while (c = *str++) {
    		hash = ((hash << 5) + hash) + c;
    	}
    	return hash;
    }
    

    前一节与这节这两种实现方法看似比较类似,但也有差异:

    基于哈希桶的情况下,由于Hash桶容量的限制,所以,有可能发生Hash表填不满的情况,也就是,虽然Hash表里面还有空位,但是新建的表项由于冲突过多,而不能装入Hash表中。不过,这样的实现也有其好处,就是查表的最大开销是可以确定的,因为最多处理的冲突数是确定的,所以算法的时间复杂度为O(1)+O(m),其中m为Hash桶容量。

    而另一种通过链表的实现,由于Hash桶的容量是无限的,因此,只要没有超出Hash表的最大容量,就能够容纳新建的表项。但是,一旦发生了Hash冲突严重的情况,就会造成Hash桶的链表过长,大大降低查找效率。在最坏的情况下,时间复杂度退化为O(n),其中n为Hash表的总容量。当然,这种情况的概率小之又小,几乎是可以忽略的。

更多 推荐条目

Welcome to NowaMagic Academy!

现代魔法 推荐于 2013-02-27 10:23   

本章最新发布
随机专题
  1. [数据结构] 散列表(哈希表) 13 个条目
  2. [PHP程序设计] htaccess 设置技巧 6 个条目
  3. [Python程序设计] Tornado背景知识介绍 4 个条目
  4. [Python程序设计] Python HTTP服务器 7 个条目
  5. [PHP程序设计] CodeIgniter与PHP框架设计 5 个条目
  6. [运维管理] 防火墙原理与应用 5 个条目
  7. [移动开发] Android加载器Loaders 5 个条目
  8. [移动开发] Android布局基本知识 3 个条目
  9. [Python程序设计] 写几个简单的Tornado程序吧 5 个条目
  10. [数据库技术] 无限级分类数据表设计 4 个条目
  11. [C语言程序设计] C语言里的全局变量 2 个条目
  12. [移动开发] 刷机与root相关 2 个条目
窗口 -- [八点]