Redis 实现布隆过滤器
昨天听马士兵教育张福刚讲公开课,里面讲解了布隆过滤器,今天无聊没事干,整理了一下笔记。关于布隆过滤器是什么东西,有什么应用场景就不做讨论了,网上有很多,大家可以自行了解,只记录实现:
1. pom 依赖
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>3.3.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>18.0</version>
</dependency>
2. 具体实现
package cn.bridgeli.demo;
import com.google.common.hash.Funnels;
import com.google.common.hash.Hashing;
import org.junit.Before;
import org.junit.Test;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.Pipeline;
import java.nio.charset.StandardCharsets;
/**
* @author BridgeLi
* @date 2020/6/6 16:38
*/
public class BloomFilter {
private Jedis jedis = null;
/**
* 预估的数据量
*/
private static long n = 10000;
/**
* 容忍的错误率
*/
private static double fpp = 0.01;
private static long numBits = optimalNumOfBits(n, fpp);
private static int numHashFunctions = optimalNumOfHashFunctions(n, numBits);
/**
* 根据预估数据量 n 和允许的错误率 fpp 计算需要的 bit 数组的长度
*
* @param n
* @param fpp
* @return
*/
private static long optimalNumOfBits(long n, double fpp) {
if (0 == fpp) {
fpp = Double.MIN_VALUE;
}
return (long) (-n * Math.log(fpp) / (Math.log(2) * Math.log(2)));
}
/**
* 根据预估的数据量和计算出来的需要的 bit 数组的长度,计算所需要的 hash 函数的个数
*
* @param n
* @param numBits
* @return
*/
private static int optimalNumOfHashFunctions(long n, long numBits) {
return Math.max(1, (int) Math.round((double) numBits / n * Math.log(2)));
}
/**
* 预热数据
*/
@Before
public void testBloomFilterBefore() {
BloomFilter bloomFilter = new BloomFilter();
bloomFilter.init();
for (int i = 0; i < n; i++) {
bloomFilter.put("bf", String.valueOf(i + 100));
}
}
/**
* 过滤数据
*/
@Test
public void testBloomFilter() {
BloomFilter bloomFilter = new BloomFilter();
bloomFilter.init();
int ex_count = 0;
int ne_count = 0;
for (int i = 0; i < 2 * n; i++) {
boolean exist = bloomFilter.isExist("bf", String.valueOf(i + 100));
if (exist) {
ex_count++;
} else {
ne_count++;
}
}
System.out.println("ex_count: " + ex_count + ", ne_count: " + ne_count);
}
private void init() {
JedisPool jedisPool = new JedisPool("127.0.0.1", 6379);
jedis = jedisPool.getResource();
}
public boolean isExist(String where, String key) {
long[] indexs = getIndexs(key);
boolean result = false;
try (Pipeline pipeline = jedis.pipelined()) {
for (long index : indexs) {
pipeline.getbit(where, index);
}
// 只要有一个位置为 false,即代表该数据不存在
result = !pipeline.syncAndReturnAll().contains(false);
} catch (Exception e) {
}
return result;
}
public void put(String where, String key) {
long[] indexs = getIndexs(key);
try (Pipeline pipeline = jedis.pipelined()) {
for (long index : indexs) {
pipeline.setbit(where, index, true);
}
pipeline.sync();
} catch (Exception e) {
}
}
private long[] getIndexs(String key) {
long hash1 = Hashing.murmur3_128().hashObject(key, Funnels.stringFunnel(StandardCharsets.UTF_8)).asLong();
long hash2 = hash1 >>> 16;
long[] result = new long[numHashFunctions];
for (int i = 0; i < numHashFunctions; i++) {
long combinedHash = hash1 + i * hash2;
if (combinedHash < 0) {
combinedHash = ~combinedHash;
}
result[i] = combinedHash % numBits;
}
return result;
}
}
全文完,如果本文对您有所帮助,请花 1 秒钟帮忙点击一下广告,谢谢。
作 者: BridgeLi,https://www.bridgeli.cn
原文链接:http://www.bridgeli.cn/archives/676
版权声明:非特殊声明均为本站原创作品,转载时请注明作者和原文链接。
作 者: BridgeLi,https://www.bridgeli.cn
原文链接:http://www.bridgeli.cn/archives/676
版权声明:非特殊声明均为本站原创作品,转载时请注明作者和原文链接。
近期评论