SETBIT key offset value
对 key 所储存的字符串值,设置或清除指定偏移量上的位(bit)。
位的设置或清除取决于 value 参数,可以是 0 也可以是 1 。
当 key 不存在时,自动生成一个新的字符串值。
字符串会进行伸展(grown)以确保它可以将 value 保存在指定的偏移量上。当字符串值进行伸展时,空白位置以 0 填充。
offset 参数必须大于或等于 0 ,小于 2^32 (bit 映射被限制在 512 MB 之内)。
- 可用版本:>= 2.2.0
- 时间复杂度: O(1)
- 返回值:指定偏移量原来储存的位。
[code]redis> SETBIT bit 10086 1(integer) 0redis> GETBIT bit 10086(integer) 1redis> GETBIT bit 100 # bit 默认被初始化为 0(integer) 0
[/code][code]// BKDR Hash Functionunsigned int BKDRHash(char *str){ unsigned int seed = 131; // 31 131 1313 13131 131313 etc..[/code]
unsigned int hash = 0; while (*str)
{
hash = hash * seed + (*str++);
} return (hash & 0x7FFFFFFF);
}
[code]function getBKDRHashSeed($n) { if ($n === 0) return 31;[/code]
$j = $n + 2;
$r = 0; for ($i = 0; $i < $j; $i ++) { if ($i % 2) {// 奇数
$r = $r * 10 + 3;
} else {
$r = $r * 10 + 1;
}
}
return $r;
}
function BKDRHash($str, $seed) {
$hash = 0;
$len = strlen($str);
$i = 0; while ($i < $len) {
$hash = ((floatval($hash * $seed) & 0x7FFFFFFF) + ord($str[$i])) & 0x7FFFFFFF;
$i ++;
}
return ($hash & 0x7FFFFFFF);
}
[code]class Bf{ public $redis; public $key; public $m; public $k; public function __construct($key, $m, $k) { if ($m > 4294967296) {[/code]
error_log('ERROR: m over 4294967296'); return false;
} $this->key = $key; $this->m = $m; $this->k = $k; $this->redis = new Redis(); $this->redis->connect('127.0.0.1', 6379);
} public function add($e) {
$e = (string)$e;
$c = 0; for ($i = 0; $i < $this->k; $i ++) {
$seed = self::getBKDRHashSeed($i);
$hash = self::BKDRHash($e, $seed);
$offset = $hash % $this->m;
$t1 = microtime(true);
$c += $this->redis->setbit($this->key, $offset, 1);
$t2 = microtime(true);
$cost = round(($t2-$t1)*1000, 3).'ms';
error_log('[' . date('Y-m-d H:i:s', time()) . '] DEBUG: redis-time-spent=' . $cost . ' entry=' . $e . ' c=' . $c);
} return $c === $this->k;
} public function flushall() { return $this->redis->delete($this->key);
} static public function getBKDRHashSeed($n) { if ($n === 0) return 31;
$j = $n + 2;
$r = 0; for ($i = 0; $i < $j; $i ++) { if ($i % 2) {// 奇数
$r = $r * 10 + 3;
} else {
$r = $r * 10 + 1;
}
} return $r;
} static public function BKDRHash($str, $seed) {
$hash = 0;
$len = strlen($str);
$i = 0; while ($i < $len) {
$hash = ((floatval($hash * $seed) & 0x7FFFFFFF) + ord($str[$i])) & 0x7FFFFFFF;
$i ++;
} return ($hash & 0x7FFFFFFF);
}
}
[code]#!/usr/bin/env php <?php$n = empty($_SERVER['argv'][1]) ? pow(2, 30) : intval($_SERVER['argv'][2]);for ($i = 0; $i < $n; $i ++) {[/code]
$word = genRandWord(); echo $word . "\n";
}function genRandWord() {
$max = rand(4, 12);
$chars = [];
for ($i = 0; $i < $max; $i ++) {
$chars[] = chr(rand(97, 122));
}
$word = join('', $chars); return $word;
}
[code][tf@jp002 bf4redis]$ cat sample.txt |wc -l[/code]
10000000
[tf@jp002 bf4redis]$ cat sample.txt |sort |uniq |wc -l
9254122
[code]$fp = fopen('./sample.txt', 'r');while ($word = fgets($fp)) {[/code]
$word = trim($word); if (empty($word)) { continue;
}
$rt = $bf->add($word); if ($rt) {
error_log('WARNING: ' . $word . ' EXIST!');
}
}fclose($fp);
[code][tf@jp002 bf4redis]$ cat v1.log |grep 'EXIST!' |wc -l745878[/code]
[code]class Bf{ public $redis; public $key; public $m; public $k; public function __construct($key, $m, $k) { if ($m > 4294967296) {[/code]优化后的测试结果
error_log('ERROR: m over 4294967296'); return false;
} $this->key = $key; $this->m = $m; $this->k = $k; $this->redis = new Redis(); $this->redis->connect('127.0.0.1', 6379);
} public function add($e) {
$e = (string)$e; $this->redis->multi(Redis::PIPELINE); for ($i = 0; $i < $this->k; $i ++) {
$seed = self::getBKDRHashSeed($i);
$hash = self::BKDRHash($e, $seed);
$offset = $hash % $this->m; $this->redis->setbit($this->key, $offset, 1);
}
$t1 = microtime(true);
$rt = $this->redis->exec();
$t2 = microtime(true);
$cost = round(($t2-$t1)*1000, 3).'ms';
$c = array_sum($rt);
error_log('[' . date('Y-m-d H:i:s', time()) . '] DEBUG: redis-time-spent=' . $cost . ' entry=' . $e . ' c=' . $c); return $c === $this->k;
} public function flushall() { return $this->redis->delete($this->key);
} static public function getBKDRHashSeed($n) { if ($n === 0) return 31;
$j = $n + 2;
$r = 0; for ($i = 0; $i < $j; $i ++) { if ($i % 2) {// 濂囨暟
$r = $r * 10 + 3;
} else {
$r = $r * 10 + 1;
}
} return $r;
} static public function BKDRHash($str, $seed) {
$hash = 0;
$len = strlen($str);
$i = 0; while ($i < $len) {
$hash = ((floatval($hash * $seed) & 0x7FFFFFFF) + ord($str[$i])) & 0x7FFFFFFF;
$i ++;
} return ($hash & 0x7FFFFFFF);
}
}
[code][tf@jp002 bf4redis]$ cat v2.log |grep 'EXIST!' |wc -l745878[/code]
bit 映射被限制在 512 MB 之内
False-Positive-Ratio表(含内存空间占用)
[code]class Bf{ public $key; public $m; public $k; public $nPartitions; public $redisCfg; public $nRedis; public $maxOffs = []; const MAX_PARTITION_SIZE = 4294967296; //redis string's max len is pow(2, 32) bits = 512MB[/code]
//const MAX_PARTITION_SIZE = 65536;
public function __construct($redisCfg, $key, $m, $k) { $this->nRedis = count($redisCfg); if ($m > self::MAX_PARTITION_SIZE) { $this->nPartitions = ceil(ceil($m / $this->nRedis) / self::MAX_PARTITION_SIZE);
} else { $this->nPartitions = 1;
} $this->key = $key; $this->m = $m; $this->k = $k; $this->redisCfg = $redisCfg;
} private function getPosition($e) {
$nRedis = count($this->redisCfg);
$hash = crc32($e);
$i = $hash % $nRedis;
$redis = SRedis::getSingeton($this->redisCfg[$i]);
$key = $this->key . '.' . $hash % $this->nPartitions; return [$i, $redis, $key];
} public function add($e) {
$e = (string)$e; list($n, $redis, $key) = $this->getPosition($e); //var_dump($this->key, $this->m, $this->k, $this->nRedis, $this->nPartitions, $redis, $key);
$redis->multi(Redis::PIPELINE); for ($i = 0; $i < $this->k; $i ++) {
$seed = self::getBKDRHashSeed($i);
$hash = self::BKDRHash($e, $seed);
$offset = $hash % $this->m; if ($offset > @$this->maxOffs[$n.'|'.$key]) $this->maxOffs[$n.'|'.$key] = $offset; //only 4 log
$redis->setbit($key, $offset, 1);
}
$t1 = microtime(true);
$rt = $redis->exec();
$t2 = microtime(true);
$cost = round(($t2-$t1)*1000, 3).'ms';
$c = array_sum($rt);
error_log('[' . date('Y-m-d H:i:s', time()) . '] DEBUG: redis[' . $n . ']-time-spent=' . $cost . ' maxOffset-' . $n.'|'.$key . '=' . $this->maxOffs[$n.'|'.$key] . ' entry=' . $e . ' c=' . $c); return $c === $this->k;
} public function flushall() { foreach ($this->redisCfg as $cfg) {
$redis = SRedis::getSingeton($cfg); for ($i = 0; $i < $this->nPartitions; $i ++) {
$redis->delete($this->key . '.' . $i);
}
}
} static public function getBKDRHashSeed($n) { if ($n === 0) return 31;
$j = $n + 2;
$r = 0; for ($i = 0; $i < $j; $i ++) { if ($i % 2) {// 濂囨暟
$r = $r * 10 + 3;
} else {
$r = $r * 10 + 1;
}
} return $r;
} static public function BKDRHash($str, $seed) {
$hash = 0;
$len = strlen($str);
$i = 0; while ($i < $len) {
$hash = ((floatval($hash * $seed) & 0x7FFFFFFF) + ord($str[$i])) & 0x7FFFFFFF;
$i ++;
} return ($hash & 0x7FFFFFFF);
}
}class SRedis{ public function getSingeton($cfg) { static $pool; if (empty($cfg) || !is_array($cfg)) { return false;
}
$k = serialize($cfg); if (empty($pool[$k])) {
$redis = new Redis();
call_user_func_array([$redis, 'connect'], array_values($cfg));
$pool[$k] = $redis;
} return $pool[$k];
}
}if ($_SERVER['argc'] < 4) { die("Usage: ./" . $_SERVER['argv'][0] . " <bloom-filter's name> <m> <k>\n");
}
$key = trim($_SERVER['argv'][3]);
$m = intval($_SERVER['argv'][4]);
$k = intval($_SERVER['argv'][5]);
$sampleFile = __DIR__ . '/sample.txt';
$redisCfg = [
[ 'host' => '127.0.0.1', 'port' => 6379, /*
'timeout' => 5,
'reserved' => null,
'retry_interval' => 1000,
'read_timeout' => 1,
*/
],
];
$bf = new Bf($redisCfg, $key, $m, $k);
$bf->flushall();
$fp = fopen($sampleFile, 'r');while ($word = fgets($fp)) {
$word = trim($word); if (empty($word)) { continue;
}
$rt = $bf->add($word); if ($rt) {
error_log('WARNING: ' . $word . ' EXIST!');
}
}
fclose($fp);
作者:田峰(田老师原360百科技术经理,现创业中)
本文为 @ 21CTO 创作并授权 21CTO 发布,未经许可,请勿转载。
内容授权事宜请您联系 webmaster@21cto.com或关注 21CTO 公众号。
该文观点仅代表作者本人,21CTO 平台仅提供信息存储空间服务。