Adding radix support to constraints
This allows us to lookup an index -> ip address much faster (16 seconds for ~3.7 billion addresses with sparse blacklist, vs 65 sec before radix array). Setting the PREFIX_LEN to /20, as this appears to give a good tradeoff between radix population time (<1ms), runtime (16s), and memory footprint (4MB).
This commit is contained in:
parent
bc3ec6456a
commit
8ecbd038b5
@ -25,6 +25,10 @@
|
||||
|
||||
static constraint_t *constraint = NULL;
|
||||
|
||||
uint32_t blacklist_lookup_index(uint64_t index) {
|
||||
return constraint_lookup_index(constraint, index, ADDR_ALLOWED);
|
||||
}
|
||||
|
||||
// check whether a single IP address is allowed to be scanned.
|
||||
// 1 => is allowed
|
||||
// 0 => is not allowed
|
||||
@ -121,7 +125,7 @@ int blacklist_init_from_files(char *whitelist_filename, char *blacklist_filename
|
||||
if (blacklist_filename) {
|
||||
init(blacklist_filename, "blacklist", ADDR_DISALLOWED);
|
||||
}
|
||||
constraint_optimize(constraint);
|
||||
constraint_paint_value(constraint, ADDR_ALLOWED);
|
||||
uint64_t allowed = blacklist_count_allowed();
|
||||
log_debug("blacklist", "%lu addresses allowed to be scanned (%0.0f%% of address space)",
|
||||
allowed, allowed*100./((long long int)1 << 32));
|
||||
|
220
lib/constraint.c
220
lib/constraint.c
@ -55,12 +55,12 @@ typedef struct node {
|
||||
|
||||
// As an optimization, we precompute lookups for every prefix of this
|
||||
// length:
|
||||
#define RADIX_LENGTH 16
|
||||
#define RADIX_LENGTH 20
|
||||
|
||||
struct _constraint {
|
||||
node_t *root; // root node of the tree
|
||||
node_t **radix; // array of nodes for every RADIX_LENGTH prefix
|
||||
int optimized; // is radix populated and up-to-date?
|
||||
uint32_t *radix; // array of prefixes (/RADIX_LENGTH) that are painted paint_value
|
||||
size_t radix_len; // number of prefixes in radix array
|
||||
int painted; // have we precomputed counts for each node?
|
||||
value_t paint_value; // value for which we precomputed counts
|
||||
};
|
||||
@ -153,7 +153,7 @@ void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value)
|
||||
{
|
||||
assert(con);
|
||||
_set_recurse(con->root, prefix, len, value);
|
||||
con->optimized = 0;
|
||||
con->painted = 0;
|
||||
}
|
||||
|
||||
// Return the value pertaining to an address, according to the tree
|
||||
@ -182,18 +182,7 @@ static int _lookup_ip(node_t *root, uint32_t address)
|
||||
value_t constraint_lookup_ip(constraint_t *con, uint32_t address)
|
||||
{
|
||||
assert(con);
|
||||
if (con->optimized) {
|
||||
// Use radix optimization
|
||||
node_t *node = con->radix[address >> (32 - RADIX_LENGTH)];
|
||||
if (IS_LEAF(node)) {
|
||||
return node->value;
|
||||
}
|
||||
return _lookup_ip(node, address << RADIX_LENGTH);
|
||||
} else {
|
||||
// Do a full lookup using the tree
|
||||
log_trace("constraint", "Unoptimized lookup");
|
||||
return _lookup_ip(con->root, address);
|
||||
}
|
||||
}
|
||||
|
||||
// Return the nth painted IP address.
|
||||
@ -228,32 +217,45 @@ uint32_t constraint_lookup_index(constraint_t *con, uint64_t index, value_t valu
|
||||
if (!con->painted || con->paint_value != value) {
|
||||
constraint_paint_value(con, value);
|
||||
}
|
||||
if (con->optimized) {
|
||||
// TK TK TK
|
||||
|
||||
uint64_t radix_idx = index / (1 << (32 - RADIX_LENGTH));
|
||||
if (radix_idx < con->radix_len) {
|
||||
// Radix lookup
|
||||
uint32_t radix_offset = index % (1 << (32 - RADIX_LENGTH)); // TODO: bitwise maths
|
||||
return con->radix[radix_idx] | radix_offset;
|
||||
}
|
||||
|
||||
// Otherwise, do the "slow" lookup in tree.
|
||||
// Note that tree counts do NOT include things in the radix,
|
||||
// so we subtract these off here.
|
||||
index -= con->radix_len * (1 << (32 - RADIX_LENGTH));
|
||||
assert(index < con->root->count);
|
||||
return _lookup_index(con->root, index);
|
||||
}
|
||||
|
||||
|
||||
// Implement count_ips by recursing on halves of the tree. Size represents
|
||||
// the number of addresses in a prefix at the current level of the tree.
|
||||
// If paint is specified, each node will have its count set to the number of
|
||||
// leaves under it set to value.
|
||||
static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size, int paint)
|
||||
// If exclude_radix is specified, the number of addresses will exlcude prefixes
|
||||
// that are a /RADIX_LENGTH or larger
|
||||
static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size, int paint, int exclude_radix)
|
||||
{
|
||||
assert(node);
|
||||
uint64_t n;
|
||||
if (IS_LEAF(node)) {
|
||||
if (node->value == value) {
|
||||
n = size;
|
||||
// Exclude prefixes already included in the radix
|
||||
if (exclude_radix && size >= (1 << (32 -RADIX_LENGTH))) {
|
||||
n = 0;
|
||||
}
|
||||
} else {
|
||||
n = 0;
|
||||
}
|
||||
} else {
|
||||
n = _count_ips_recurse(node->l, value, size >> 1, paint) +
|
||||
_count_ips_recurse(node->r, value, size >> 1, paint);
|
||||
n = _count_ips_recurse(node->l, value, size >> 1, paint, exclude_radix) +
|
||||
_count_ips_recurse(node->r, value, size >> 1, paint, exclude_radix);
|
||||
}
|
||||
if (paint) {
|
||||
node->count = n;
|
||||
@ -261,51 +263,6 @@ static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size, i
|
||||
return n;
|
||||
}
|
||||
|
||||
// For each node, precompute the count of leaves beneath it set to value.
|
||||
// Note that the tree can be painted for only one value at a time.
|
||||
void constraint_paint_value(constraint_t *con, value_t value)
|
||||
{
|
||||
assert(con);
|
||||
log_info("constraint", "Painting value %lu", value);
|
||||
_count_ips_recurse(con->root, value, (uint64_t)1 << 32, 1);
|
||||
con->painted = 1;
|
||||
con->paint_value = value;
|
||||
}
|
||||
|
||||
// Return the number of addresses that have a given value.
|
||||
uint64_t constraint_count_ips(constraint_t *con, value_t value)
|
||||
{
|
||||
assert(con);
|
||||
if (con->painted && con->paint_value == value) {
|
||||
return con->root->count;
|
||||
} else {
|
||||
return _count_ips_recurse(con->root, value, (uint64_t)1 << 32, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the tree.
|
||||
// All addresses will initally have the given value.
|
||||
constraint_t* constraint_init(value_t value)
|
||||
{
|
||||
log_trace("constraint", "Initializing");
|
||||
constraint_t* con = malloc(sizeof(constraint_t));
|
||||
con->root = _create_leaf(value);
|
||||
con->radix = calloc(sizeof(node_t *), 1 << RADIX_LENGTH);
|
||||
assert(con->radix);
|
||||
con->optimized = 0;
|
||||
return con;
|
||||
}
|
||||
|
||||
// Deinitialize and free the tree.
|
||||
void constraint_free(constraint_t *con)
|
||||
{
|
||||
assert(con);
|
||||
log_trace("constraint", "Cleaning up");
|
||||
_destroy_subtree(con->root);
|
||||
free(con->radix);
|
||||
free(con);
|
||||
}
|
||||
|
||||
// Return a node that determines the values for the addresses with
|
||||
// the given prefix. This is either the internal node that
|
||||
// corresponds to the end of the prefix or a leaf node that
|
||||
@ -317,8 +274,9 @@ static node_t* _lookup_node(node_t *root, uint32_t prefix, int len)
|
||||
|
||||
node_t *node = root;
|
||||
uint32_t mask = 0x80000000;
|
||||
int i;
|
||||
|
||||
for (int i=0; i < len; i++) {
|
||||
for (i=0; i < len; i++) {
|
||||
if (IS_LEAF(node)) {
|
||||
return node;
|
||||
}
|
||||
@ -332,21 +290,66 @@ static node_t* _lookup_node(node_t *root, uint32_t prefix, int len)
|
||||
return node;
|
||||
}
|
||||
|
||||
// After values have been set, precompute prefix lookups.
|
||||
void constraint_optimize(constraint_t *con)
|
||||
// For each node, precompute the count of leaves beneath it set to value.
|
||||
// Note that the tree can be painted for only one value at a time.
|
||||
void constraint_paint_value(constraint_t *con, value_t value)
|
||||
{
|
||||
assert(con);
|
||||
if (con->optimized) {
|
||||
return;
|
||||
}
|
||||
log_trace("constraint", "Optimizing constraints");
|
||||
for (uint32_t i=0; i < (1 << RADIX_LENGTH); i++) {
|
||||
log_info("constraint", "Painting value %lu", value);
|
||||
|
||||
// Paint everything except what we will put in radix
|
||||
_count_ips_recurse(con->root, value, (uint64_t)1 << 32, 1, 1);
|
||||
|
||||
// Fill in the radix array with a list of addresses
|
||||
uint32_t i;
|
||||
con->radix_len = 0;
|
||||
for (i=0; i < (1 << RADIX_LENGTH); i++) {
|
||||
uint32_t prefix = i << (32 - RADIX_LENGTH);
|
||||
con->radix[i] = _lookup_node(con->root, prefix, RADIX_LENGTH);
|
||||
node_t *node = _lookup_node(con->root, prefix, RADIX_LENGTH);
|
||||
if (IS_LEAF(node) && node->value == value) {
|
||||
// Add this prefix to the radix
|
||||
con->radix[con->radix_len++] = prefix;
|
||||
}
|
||||
con->optimized = 1;
|
||||
}
|
||||
log_info("constraint", "%lu IPs in radix array, %lu IPs in tree",
|
||||
con->radix_len * (1 << (32 - RADIX_LENGTH)), con->root->count);
|
||||
con->painted = 1;
|
||||
con->paint_value = value;
|
||||
}
|
||||
|
||||
// Return the number of addresses that have a given value.
|
||||
uint64_t constraint_count_ips(constraint_t *con, value_t value)
|
||||
{
|
||||
assert(con);
|
||||
if (con->painted && con->paint_value == value) {
|
||||
return con->root->count + con->radix_len * (1 << (32 - RADIX_LENGTH));
|
||||
} else {
|
||||
return _count_ips_recurse(con->root, value, (uint64_t)1 << 32, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the tree.
|
||||
// All addresses will initally have the given value.
|
||||
constraint_t* constraint_init(value_t value)
|
||||
{
|
||||
log_trace("constraint", "Initializing");
|
||||
constraint_t* con = malloc(sizeof(constraint_t));
|
||||
con->root = _create_leaf(value);
|
||||
con->radix = calloc(sizeof(uint32_t), 1 << RADIX_LENGTH);
|
||||
assert(con->radix);
|
||||
con->painted = 0;
|
||||
return con;
|
||||
}
|
||||
|
||||
// Deinitialize and free the tree.
|
||||
void constraint_free(constraint_t *con)
|
||||
{
|
||||
assert(con);
|
||||
log_trace("constraint", "Cleaning up");
|
||||
_destroy_subtree(con->root);
|
||||
free(con->radix);
|
||||
free(con);
|
||||
}
|
||||
|
||||
/*
|
||||
int main(void)
|
||||
@ -384,68 +387,3 @@ int main(void)
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
static int init(constraint_t *con, char *file, const char *name, value_t value)
|
||||
{
|
||||
FILE *fp;
|
||||
char line[1000];
|
||||
int blocked = 0;
|
||||
|
||||
fp = fopen(file, "r");
|
||||
if (fp == NULL) {
|
||||
log_fatal(name, "Unable to open %s file: %s: %s",
|
||||
name, file, strerror(errno));
|
||||
}
|
||||
|
||||
while (fgets(line, sizeof(line), fp) != NULL) {
|
||||
char *comment = strchr(line, '#');
|
||||
if (comment) {
|
||||
*comment = '\0';
|
||||
}
|
||||
char ip[33];
|
||||
if ((sscanf(line, "%32s", ip)) == EOF) {
|
||||
continue;
|
||||
}
|
||||
int prefix_len;
|
||||
char *slash = strchr(ip, '/');
|
||||
if (slash == NULL) {
|
||||
log_fatal(name,
|
||||
"Unable to parse %s file: %s",
|
||||
name, file);
|
||||
}
|
||||
// split apart network and prefix length
|
||||
*slash = '\0';
|
||||
prefix_len = atoi(&slash[1]);
|
||||
constraint_set(con, ntohl(inet_addr(ip)), prefix_len, value);
|
||||
|
||||
blocked++;
|
||||
}
|
||||
fclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void main()
|
||||
{
|
||||
log_init(stderr, LOG_TRACE);
|
||||
|
||||
constraint_t *con = constraint_init(1);
|
||||
init(con, "blacklist.prefixes", "blacklist", 0);
|
||||
//constraint_optimize(con);
|
||||
|
||||
printf("count(0)=%lu\n", constraint_count_ips(con, 0));
|
||||
printf("count(1)=%lu\n", constraint_count_ips(con, 1));
|
||||
|
||||
uint32_t i=0, count=0;
|
||||
do {
|
||||
if (constraint_lookup_ip(con, i))
|
||||
count++;
|
||||
} while (++i != 0);
|
||||
printf("derived count(1)=%u\n", count);
|
||||
|
||||
constraint_free(con);
|
||||
|
||||
}
|
||||
|
||||
*/
|
||||
|
@ -1,13 +1,14 @@
|
||||
#ifndef _CONSTRAINT_H
|
||||
#define _CONSTRAINT_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct _constraint constraint_t;
|
||||
typedef unsigned int value_t;
|
||||
|
||||
constraint_t* constraint_init(value_t value);
|
||||
void constraint_free(constraint_t *con);
|
||||
void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value);
|
||||
void constraint_optimize(constraint_t *con);
|
||||
value_t constraint_lookup_ip(constraint_t *con, uint32_t address);
|
||||
uint64_t constraint_count_ips(constraint_t *con, value_t value);
|
||||
uint32_t constraint_lookup_index(constraint_t *con, uint64_t index, value_t value);
|
||||
|
Loading…
Reference in New Issue
Block a user