整数集合是集合键的底层实现之一,当一个集合只包含整数时,redis会使用整数集合作为集合的底层实现,整数集合类型会尽可能的节约内存空间,只有新插入的数据大于encoding时,才会升级集合类型。
数据结构
整数集合的定义包含三个部分,encoding表示集合元素的数据类型,length表示集合包含多少元素,contents存储集合成员的一段连续的地址空间。
1、整数集合
typedef struct intset {
// 集合的编码,可以为16 32 64位的整数类型
uint32_t encoding;
// 集合长度
uint32_t length;
// 集合成员数组,是一段连续的内存空间,具体元素的值与encoding有关
int8_t contents[];
} intset;特点
1、整数集合内的元素从小到大有序排列。
2、当新的元素占用存储空间大于encoding时,整数集合会进行集合类型升级,然后再进行插入。
3、整数集合不支持数据类型的降级,如果删除一个64的整数时,32位的整数类型就可以满足,这时整数集合是不会进行降级操作的。
4、整数集合根据待插入的元素,来动态升级集合元素类型,可以提高数据插入的灵活性,避免了插入时的类型错误,并尽可能的节约内存。
源码
1、初始化整数集合
/* Create an empty intset. */
intset *intsetNew(void) {
intset *is = zmalloc(sizeof(intset));
is->encoding = intrev32ifbe(INTSET_ENC_INT16);
is->length = 0;
return is;
}2、插入数据
/* Insert an integer in the intset */
intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
uint8_t valenc = _intsetValueEncoding(value);
uint32_t pos;
if (success) *success = 1;
/* Upgrade encoding if necessary. If we need to upgrade, we know that
* this value should be either appended (if > 0) or prepended (if < 0),
* because it lies outside the range of existing values. */
if (valenc > intrev32ifbe(is->encoding)) {
// 如果添加的元素大于已有的encoding,则进行整体升级并插入
/* This always succeeds, so we don't need to curry *success. */
return intsetUpgradeAndAdd(is,value);
} else {
/* Abort if the value is already present in the set.
* This call will populate "pos" with the right position to insert
* the value when it cannot be found. */
if (intsetSearch(is,value,&pos)) {
if (success) *success = 0;
return is;
}
// 扩展数组长度,并且移动已有的元素
is = intsetResize(is,intrev32ifbe(is->length)+1);
if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
}
_intsetSet(is,pos,value);
is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
return is;
}3、升级数据类型
/* Upgrades the intset to a larger encoding and inserts the given integer. */
static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
uint8_t curenc = intrev32ifbe(is->encoding);
uint8_t newenc = _intsetValueEncoding(value);
int length = intrev32ifbe(is->length);
// 引起类型升级的只有新增的数字长度大于当前的encode, 大于0表示插入在集合末尾,小于0表示添加在集合头部
int prepend = value < 0 ? 1 : 0;
/* First set new encoding and resize */
is->encoding = intrev32ifbe(newenc);
is = intsetResize(is,intrev32ifbe(is->length)+1);
/* Upgrade back-to-front so we don't overwrite values.
* Note that the "prepend" variable is used to make sure we have an empty
* space at either the beginning or the end of the intset. */
// 将原来的元素类型升级为新的数据类型
while(length--)
// _intsetGetEncoded(is,length,curenc) 在升级之后的content上按原来的encode获取原来的数组元素的值,如果新元素小于0, 则已有的数组元素整体后移一个位置
_intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
/* Set the value at the beginning or the end. */
// 添加的元素,如果小于0则插入在集合头部,大于0则放在集合尾部
if (prepend)
_intsetSet(is,0,value);
else
_intsetSet(is,intrev32ifbe(is->length),value);
// 集合长度+1
is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
return is;
}4、查找元素
/* Search for the position of "value". Return 1 when the value was found and
* sets "pos" to the position of the value within the intset. Return 0 when
* the value is not present in the intset and sets "pos" to the position
* where "value" can be inserted. */
static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
int64_t cur = -1;
/* The value can never be found when the set is empty */
if (intrev32ifbe(is->length) == 0) {
if (pos) *pos = 0;
return 0;
} else {
/* Check for the case where we know we cannot find the value,
* but do know the insert position. */
// 如果value比集合最大值还大,那pos就为length
if (value > _intsetGet(is,max)) {
if (pos) *pos = intrev32ifbe(is->length);
return 0;
}
// 如果value比集合最小值还小,那pos就为0
else if (value < _intsetGet(is,0)) {
if (pos) *pos = 0;
return 0;
}
}
// 二分查找指定元素
while(max >= min) {
mid = ((unsigned int)min + (unsigned int)max) >> 1;
cur = _intsetGet(is,mid);
if (value > cur) {
min = mid+1;
} else if (value < cur) {
max = mid-1;
} else {
break;
}
}
// 如果待插入的val和二分查找最后的中间值一样则返回当前的位置,否则返回min的值
if (value == cur) {
if (pos) *pos = mid;
return 1;
} else {
if (pos) *pos = min;
return 0;
}
}5、删除一个元素
/* Delete integer from intset */
intset *intsetRemove(intset *is, int64_t value, int *success) {
uint8_t valenc = _intsetValueEncoding(value);
uint32_t pos;
if (success) *success = 0;
if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
uint32_t len = intrev32ifbe(is->length);
/* We know we can delete */
if (success) *success = 1;
/* Overwrite value with tail and update length */
// 调整剩下元素位置和数组长度
if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);
is = intsetResize(is,len-1);
is->length = intrev32ifbe(len-1);
}
return is;
}