字典,又称关联数组或者映射,是用于保存键值对的数据结构,字典中的每一个key都是唯一的。通常用作hash类型和string类型的数据存储结构。
数据结构
redis的字典是由两张hash表组成的,以用户字典的扩容,hash表的基本数据类型是hash表节点。具体定义如下。
1、字典
typedef struct dict { dictType *type; // 保存了type中需要用的私有数据 void *privdata; // ht[0]用来正常访问,ht[1]用来rehashing dictht ht[2]; // 指的是ht[0]的第几个元素,而第几个节点(hash表会通过连地址法将多个hash值相同的节点以单链表的形式挂接在一起) long rehashidx; /* rehashing not in progress if rehashidx == -1 */ int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */ } dict;
2、hash表
typedef struct dictht { dictEntry **table; // hash表的大小 unsigned long size; // 一般为 size - 1 unsigned long sizemask; // 表示hash表中已经存在的节点数,used可能大于size,因为相同hash值得节点会存储在同一个hash表的元素中 unsigned long used; } dictht;
3、hash表节点
typedef struct dictEntry { void *key; union { void *val; uint64_t u64; int64_t s64; double d; } v; // 保存相同hash值的下个节点数据 struct dictEntry *next; } dictEntry;
4、字典类型
typedef struct dictType { uint64_t (*hashFunction)(const void *key); void *(*keyDup)(void *privdata, const void *key); void *(*valDup)(void *privdata, const void *obj); int (*keyCompare)(void *privdata, const void *key1, const void *key2); void (*keyDestructor)(void *privdata, void *key); void (*valDestructor)(void *privdata, void *obj); int (*expandAllowed)(size_t moreMem, double usedRatio); } dictType;
特点
1、链地址法解决hash冲突。相同的hash值得节点会存储在hash表的相同位置,新插入的在已有数据前,以提高新数据的查询效率。
2、自动扩容。当ht.used >= ht.size > 1 或者 ht.used / ht.size > 5时,会自动对hash表进行扩容操作。
3、渐进式rehash。rehash时,redis不是一次性的将ht[0]中的所有节点移至ht[1]中,而是分多次、渐进式地完成的。这个动作会发生对字典的增、删、改、查等过程中渐进式的完成。并且在rehash过程中,新增的key会被插入到ht[1]中。
源码
1、初始化
/* Initialize the hash table */ int _dictInit(dict *d, dictType *type, void *privDataPtr) { _dictReset(&d->ht[0]); _dictReset(&d->ht[1]); d->type = type; d->privdata = privDataPtr; d->rehashidx = -1; d->pauserehash = 0; return DICT_OK; }
2、获取hash表的idx
/* Returns the index of a free slot that can be populated with * a hash entry for the given 'key'. * If the key already exists, -1 is returned * and the optional output parameter may be filled. * * Note that if we are in the process of rehashing the hash table, the * index is always returned in the context of the second (new) hash table. * * 查找key所在的ht下标(idx) * */ static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing) { unsigned long idx, table; dictEntry *he; if (existing) *existing = NULL; /* Expand the hash table if needed */ if (_dictExpandIfNeeded(d) == DICT_ERR) return -1; for (table = 0; table <= 1; table++) { idx = hash & d->ht[table].sizemask; /* Search if this slot does not already contain the given key */ // 查看key是否已存在 he = d->ht[table].table[idx]; while(he) { if (key==he->key || dictCompareKeys(d, key, he->key)) { if (existing) *existing = he; return -1; } he = he->next; } if (!dictIsRehashing(d)) break; } return idx; }
3、插入一个key
/* Low level add or find: * This function adds the entry but instead of setting a value returns the * dictEntry structure to the user, that will make sure to fill the value * field as they wish. * * This function is also directly exposed to the user API to be called * mainly in order to store non-pointers inside the hash value, example: * * entry = dictAddRaw(dict,mykey,NULL); * if (entry != NULL) dictSetSignedIntegerVal(entry,1000); * * Return values: * * If key already exists NULL is returned, and "*existing" is populated * with the existing entry if existing is not NULL. * * If key was added, the hash entry is returned to be manipulated by the caller. * * 向字典中添加一个key,如果字典中已经存在,则将存在的节点用existing返回。如果正在进行hash将key添加到ht[1]中。 */ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) { long index; dictEntry *entry; dictht *ht; // 若果正在rehash 则进行一步渐进式hash if (dictIsRehashing(d)) _dictRehashStep(d); /* Get the index of the new element, or -1 if * the element already exists. */ if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1) return NULL; /* Allocate the memory and store the new entry. * Insert the element in top, with the assumption that in a database * system it is more likely that recently added entries are accessed * more frequently. */ // 分配内存,并将新节点插入到相同index节点的前面,以便更快的访问 ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0]; entry = zmalloc(sizeof(*entry)); entry->next = ht->table[index]; ht->table[index] = entry; ht->used++; /* Set the hash entry fields. */ dictSetKey(d, entry, key); return entry; }
4、rehash
/* Performs N steps of incremental rehashing. Returns 1 if there are still * keys to move from the old to the new hash table, otherwise 0 is returned. * * Note that a rehashing step consists in moving a bucket (that may have more * than one key as we use chaining) from the old to the new hash table, however * since part of the hash table may be composed of empty spaces, it is not * guaranteed that this function will rehash even a single bucket, since it * will visit at max N*10 empty buckets in total, otherwise the amount of * work it does would be unbound and the function may block for a long time. * * n 表示处理几个ht[0]中元素 * 返回0表示已经rehash完,返回1表示已经正在hash中 * */ int dictRehash(dict *d, int n) { int empty_visits = n*10; /* Max number of empty buckets to visit. */ if (!dictIsRehashing(d)) return 0; while(n-- && d->ht[0].used != 0) { dictEntry *de, *nextde; /* Note that rehashidx can't overflow as we are sure there are more * elements because ht[0].used != 0 */ // 如果本次处理够了empty_visits个null成员则返回1 assert(d->ht[0].size > (unsigned long)d->rehashidx); while(d->ht[0].table[d->rehashidx] == NULL) { d->rehashidx++; if (--empty_visits == 0) return 1; } de = d->ht[0].table[d->rehashidx]; /* Move all the keys in this bucket from the old to the new hash HT */ // 将ht[0].table[d->rehashidx]指向的相同hash值的单链表,移动到ht[1]中 while(de) { uint64_t h; nextde = de->next; /* Get the index in the new hash table */ h = dictHashKey(d, de->key) & d->ht[1].sizemask; de->next = d->ht[1].table[h]; d->ht[1].table[h] = de; d->ht[0].used--; d->ht[1].used++; de = nextde; } d->ht[0].table[d->rehashidx] = NULL; d->rehashidx++; } /* Check if we already rehashed the whole table... */ if (d->ht[0].used == 0) { zfree(d->ht[0].table); d->ht[0] = d->ht[1]; _dictReset(&d->ht[1]); d->rehashidx = -1; return 0; } /* More to rehash... */ return 1; }