整数集合是集合键的底层实现之一,当一个集合只包含整数时,redis会使用整数集合作为集合的底层实现,整数集合类型会尽可能的节约内存空间,只有新插入的数据大于encoding时,才会升级集合类型。
数据结构
整数集合的定义包含三个部分,encoding表示集合元素的数据类型,length表示集合包含多少元素,contents存储集合成员的一段连续的地址空间。
1、整数集合
typedef struct intset { // 集合的编码,可以为16 32 64位的整数类型 uint32_t encoding; // 集合长度 uint32_t length; // 集合成员数组,是一段连续的内存空间,具体元素的值与encoding有关 int8_t contents[]; } intset;
特点
1、整数集合内的元素从小到大有序排列。
2、当新的元素占用存储空间大于encoding时,整数集合会进行集合类型升级,然后再进行插入。
3、整数集合不支持数据类型的降级,如果删除一个64的整数时,32位的整数类型就可以满足,这时整数集合是不会进行降级操作的。
4、整数集合根据待插入的元素,来动态升级集合元素类型,可以提高数据插入的灵活性,避免了插入时的类型错误,并尽可能的节约内存。
源码
1、初始化整数集合
/* Create an empty intset. */ intset *intsetNew(void) { intset *is = zmalloc(sizeof(intset)); is->encoding = intrev32ifbe(INTSET_ENC_INT16); is->length = 0; return is; }
2、插入数据
/* Insert an integer in the intset */ intset *intsetAdd(intset *is, int64_t value, uint8_t *success) { uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 1; /* Upgrade encoding if necessary. If we need to upgrade, we know that * this value should be either appended (if > 0) or prepended (if < 0), * because it lies outside the range of existing values. */ if (valenc > intrev32ifbe(is->encoding)) { // 如果添加的元素大于已有的encoding,则进行整体升级并插入 /* This always succeeds, so we don't need to curry *success. */ return intsetUpgradeAndAdd(is,value); } else { /* Abort if the value is already present in the set. * This call will populate "pos" with the right position to insert * the value when it cannot be found. */ if (intsetSearch(is,value,&pos)) { if (success) *success = 0; return is; } // 扩展数组长度,并且移动已有的元素 is = intsetResize(is,intrev32ifbe(is->length)+1); if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1); } _intsetSet(is,pos,value); is->length = intrev32ifbe(intrev32ifbe(is->length)+1); return is; }
3、升级数据类型
/* Upgrades the intset to a larger encoding and inserts the given integer. */ static intset *intsetUpgradeAndAdd(intset *is, int64_t value) { uint8_t curenc = intrev32ifbe(is->encoding); uint8_t newenc = _intsetValueEncoding(value); int length = intrev32ifbe(is->length); // 引起类型升级的只有新增的数字长度大于当前的encode, 大于0表示插入在集合末尾,小于0表示添加在集合头部 int prepend = value < 0 ? 1 : 0; /* First set new encoding and resize */ is->encoding = intrev32ifbe(newenc); is = intsetResize(is,intrev32ifbe(is->length)+1); /* Upgrade back-to-front so we don't overwrite values. * Note that the "prepend" variable is used to make sure we have an empty * space at either the beginning or the end of the intset. */ // 将原来的元素类型升级为新的数据类型 while(length--) // _intsetGetEncoded(is,length,curenc) 在升级之后的content上按原来的encode获取原来的数组元素的值,如果新元素小于0, 则已有的数组元素整体后移一个位置 _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc)); /* Set the value at the beginning or the end. */ // 添加的元素,如果小于0则插入在集合头部,大于0则放在集合尾部 if (prepend) _intsetSet(is,0,value); else _intsetSet(is,intrev32ifbe(is->length),value); // 集合长度+1 is->length = intrev32ifbe(intrev32ifbe(is->length)+1); return is; }
4、查找元素
/* Search for the position of "value". Return 1 when the value was found and * sets "pos" to the position of the value within the intset. Return 0 when * the value is not present in the intset and sets "pos" to the position * where "value" can be inserted. */ static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) { int min = 0, max = intrev32ifbe(is->length)-1, mid = -1; int64_t cur = -1; /* The value can never be found when the set is empty */ if (intrev32ifbe(is->length) == 0) { if (pos) *pos = 0; return 0; } else { /* Check for the case where we know we cannot find the value, * but do know the insert position. */ // 如果value比集合最大值还大,那pos就为length if (value > _intsetGet(is,max)) { if (pos) *pos = intrev32ifbe(is->length); return 0; } // 如果value比集合最小值还小,那pos就为0 else if (value < _intsetGet(is,0)) { if (pos) *pos = 0; return 0; } } // 二分查找指定元素 while(max >= min) { mid = ((unsigned int)min + (unsigned int)max) >> 1; cur = _intsetGet(is,mid); if (value > cur) { min = mid+1; } else if (value < cur) { max = mid-1; } else { break; } } // 如果待插入的val和二分查找最后的中间值一样则返回当前的位置,否则返回min的值 if (value == cur) { if (pos) *pos = mid; return 1; } else { if (pos) *pos = min; return 0; } }
5、删除一个元素
/* Delete integer from intset */ intset *intsetRemove(intset *is, int64_t value, int *success) { uint8_t valenc = _intsetValueEncoding(value); uint32_t pos; if (success) *success = 0; if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) { uint32_t len = intrev32ifbe(is->length); /* We know we can delete */ if (success) *success = 1; /* Overwrite value with tail and update length */ // 调整剩下元素位置和数组长度 if (pos < (len-1)) intsetMoveTail(is,pos+1,pos); is = intsetResize(is,len-1); is->length = intrev32ifbe(len-1); } return is; }