Adding radix support to constraints

This allows us to lookup an index -> ip address much faster
(16 seconds for ~3.7 billion addresses with sparse blacklist,
vs 65 sec before radix array).

Setting the PREFIX_LEN to /20, as this appears to give a good tradeoff
between radix population time (<1ms), runtime (16s), and memory footprint (4MB).
This commit is contained in:
Eric 2013-10-03 02:28:19 -04:00
parent bc3ec6456a
commit 8ecbd038b5
3 changed files with 89 additions and 146 deletions

View File

@ -25,6 +25,10 @@
static constraint_t *constraint = NULL; static constraint_t *constraint = NULL;
uint32_t blacklist_lookup_index(uint64_t index) {
return constraint_lookup_index(constraint, index, ADDR_ALLOWED);
}
// check whether a single IP address is allowed to be scanned. // check whether a single IP address is allowed to be scanned.
// 1 => is allowed // 1 => is allowed
// 0 => is not allowed // 0 => is not allowed
@ -121,7 +125,7 @@ int blacklist_init_from_files(char *whitelist_filename, char *blacklist_filename
if (blacklist_filename) { if (blacklist_filename) {
init(blacklist_filename, "blacklist", ADDR_DISALLOWED); init(blacklist_filename, "blacklist", ADDR_DISALLOWED);
} }
constraint_optimize(constraint); constraint_paint_value(constraint, ADDR_ALLOWED);
uint64_t allowed = blacklist_count_allowed(); uint64_t allowed = blacklist_count_allowed();
log_debug("blacklist", "%lu addresses allowed to be scanned (%0.0f%% of address space)", log_debug("blacklist", "%lu addresses allowed to be scanned (%0.0f%% of address space)",
allowed, allowed*100./((long long int)1 << 32)); allowed, allowed*100./((long long int)1 << 32));

View File

@ -55,12 +55,12 @@ typedef struct node {
// As an optimization, we precompute lookups for every prefix of this // As an optimization, we precompute lookups for every prefix of this
// length: // length:
#define RADIX_LENGTH 16 #define RADIX_LENGTH 20
struct _constraint { struct _constraint {
node_t *root; // root node of the tree node_t *root; // root node of the tree
node_t **radix; // array of nodes for every RADIX_LENGTH prefix uint32_t *radix; // array of prefixes (/RADIX_LENGTH) that are painted paint_value
int optimized; // is radix populated and up-to-date? size_t radix_len; // number of prefixes in radix array
int painted; // have we precomputed counts for each node? int painted; // have we precomputed counts for each node?
value_t paint_value; // value for which we precomputed counts value_t paint_value; // value for which we precomputed counts
}; };
@ -153,7 +153,7 @@ void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value)
{ {
assert(con); assert(con);
_set_recurse(con->root, prefix, len, value); _set_recurse(con->root, prefix, len, value);
con->optimized = 0; con->painted = 0;
} }
// Return the value pertaining to an address, according to the tree // Return the value pertaining to an address, according to the tree
@ -182,18 +182,7 @@ static int _lookup_ip(node_t *root, uint32_t address)
value_t constraint_lookup_ip(constraint_t *con, uint32_t address) value_t constraint_lookup_ip(constraint_t *con, uint32_t address)
{ {
assert(con); assert(con);
if (con->optimized) {
// Use radix optimization
node_t *node = con->radix[address >> (32 - RADIX_LENGTH)];
if (IS_LEAF(node)) {
return node->value;
}
return _lookup_ip(node, address << RADIX_LENGTH);
} else {
// Do a full lookup using the tree
log_trace("constraint", "Unoptimized lookup");
return _lookup_ip(con->root, address); return _lookup_ip(con->root, address);
}
} }
// Return the nth painted IP address. // Return the nth painted IP address.
@ -228,32 +217,45 @@ uint32_t constraint_lookup_index(constraint_t *con, uint64_t index, value_t valu
if (!con->painted || con->paint_value != value) { if (!con->painted || con->paint_value != value) {
constraint_paint_value(con, value); constraint_paint_value(con, value);
} }
if (con->optimized) {
// TK TK TK uint64_t radix_idx = index / (1 << (32 - RADIX_LENGTH));
if (radix_idx < con->radix_len) {
// Radix lookup
uint32_t radix_offset = index % (1 << (32 - RADIX_LENGTH)); // TODO: bitwise maths
return con->radix[radix_idx] | radix_offset;
} }
// Otherwise, do the "slow" lookup in tree.
// Note that tree counts do NOT include things in the radix,
// so we subtract these off here.
index -= con->radix_len * (1 << (32 - RADIX_LENGTH));
assert(index < con->root->count); assert(index < con->root->count);
return _lookup_index(con->root, index); return _lookup_index(con->root, index);
} }
// Implement count_ips by recursing on halves of the tree. Size represents // Implement count_ips by recursing on halves of the tree. Size represents
// the number of addresses in a prefix at the current level of the tree. // the number of addresses in a prefix at the current level of the tree.
// If paint is specified, each node will have its count set to the number of // If paint is specified, each node will have its count set to the number of
// leaves under it set to value. // leaves under it set to value.
static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size, int paint) // If exclude_radix is specified, the number of addresses will exlcude prefixes
// that are a /RADIX_LENGTH or larger
static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size, int paint, int exclude_radix)
{ {
assert(node); assert(node);
uint64_t n; uint64_t n;
if (IS_LEAF(node)) { if (IS_LEAF(node)) {
if (node->value == value) { if (node->value == value) {
n = size; n = size;
// Exclude prefixes already included in the radix
if (exclude_radix && size >= (1 << (32 -RADIX_LENGTH))) {
n = 0;
}
} else { } else {
n = 0; n = 0;
} }
} else { } else {
n = _count_ips_recurse(node->l, value, size >> 1, paint) + n = _count_ips_recurse(node->l, value, size >> 1, paint, exclude_radix) +
_count_ips_recurse(node->r, value, size >> 1, paint); _count_ips_recurse(node->r, value, size >> 1, paint, exclude_radix);
} }
if (paint) { if (paint) {
node->count = n; node->count = n;
@ -261,51 +263,6 @@ static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size, i
return n; return n;
} }
// For each node, precompute the count of leaves beneath it set to value.
// Note that the tree can be painted for only one value at a time.
void constraint_paint_value(constraint_t *con, value_t value)
{
assert(con);
log_info("constraint", "Painting value %lu", value);
_count_ips_recurse(con->root, value, (uint64_t)1 << 32, 1);
con->painted = 1;
con->paint_value = value;
}
// Return the number of addresses that have a given value.
uint64_t constraint_count_ips(constraint_t *con, value_t value)
{
assert(con);
if (con->painted && con->paint_value == value) {
return con->root->count;
} else {
return _count_ips_recurse(con->root, value, (uint64_t)1 << 32, 0);
}
}
// Initialize the tree.
// All addresses will initally have the given value.
constraint_t* constraint_init(value_t value)
{
log_trace("constraint", "Initializing");
constraint_t* con = malloc(sizeof(constraint_t));
con->root = _create_leaf(value);
con->radix = calloc(sizeof(node_t *), 1 << RADIX_LENGTH);
assert(con->radix);
con->optimized = 0;
return con;
}
// Deinitialize and free the tree.
void constraint_free(constraint_t *con)
{
assert(con);
log_trace("constraint", "Cleaning up");
_destroy_subtree(con->root);
free(con->radix);
free(con);
}
// Return a node that determines the values for the addresses with // Return a node that determines the values for the addresses with
// the given prefix. This is either the internal node that // the given prefix. This is either the internal node that
// corresponds to the end of the prefix or a leaf node that // corresponds to the end of the prefix or a leaf node that
@ -317,8 +274,9 @@ static node_t* _lookup_node(node_t *root, uint32_t prefix, int len)
node_t *node = root; node_t *node = root;
uint32_t mask = 0x80000000; uint32_t mask = 0x80000000;
int i;
for (int i=0; i < len; i++) { for (i=0; i < len; i++) {
if (IS_LEAF(node)) { if (IS_LEAF(node)) {
return node; return node;
} }
@ -332,21 +290,66 @@ static node_t* _lookup_node(node_t *root, uint32_t prefix, int len)
return node; return node;
} }
// After values have been set, precompute prefix lookups. // For each node, precompute the count of leaves beneath it set to value.
void constraint_optimize(constraint_t *con) // Note that the tree can be painted for only one value at a time.
void constraint_paint_value(constraint_t *con, value_t value)
{ {
assert(con); assert(con);
if (con->optimized) { log_info("constraint", "Painting value %lu", value);
return;
} // Paint everything except what we will put in radix
log_trace("constraint", "Optimizing constraints"); _count_ips_recurse(con->root, value, (uint64_t)1 << 32, 1, 1);
for (uint32_t i=0; i < (1 << RADIX_LENGTH); i++) {
// Fill in the radix array with a list of addresses
uint32_t i;
con->radix_len = 0;
for (i=0; i < (1 << RADIX_LENGTH); i++) {
uint32_t prefix = i << (32 - RADIX_LENGTH); uint32_t prefix = i << (32 - RADIX_LENGTH);
con->radix[i] = _lookup_node(con->root, prefix, RADIX_LENGTH); node_t *node = _lookup_node(con->root, prefix, RADIX_LENGTH);
if (IS_LEAF(node) && node->value == value) {
// Add this prefix to the radix
con->radix[con->radix_len++] = prefix;
} }
con->optimized = 1; }
log_info("constraint", "%lu IPs in radix array, %lu IPs in tree",
con->radix_len * (1 << (32 - RADIX_LENGTH)), con->root->count);
con->painted = 1;
con->paint_value = value;
} }
// Return the number of addresses that have a given value.
uint64_t constraint_count_ips(constraint_t *con, value_t value)
{
assert(con);
if (con->painted && con->paint_value == value) {
return con->root->count + con->radix_len * (1 << (32 - RADIX_LENGTH));
} else {
return _count_ips_recurse(con->root, value, (uint64_t)1 << 32, 0, 0);
}
}
// Initialize the tree.
// All addresses will initally have the given value.
constraint_t* constraint_init(value_t value)
{
log_trace("constraint", "Initializing");
constraint_t* con = malloc(sizeof(constraint_t));
con->root = _create_leaf(value);
con->radix = calloc(sizeof(uint32_t), 1 << RADIX_LENGTH);
assert(con->radix);
con->painted = 0;
return con;
}
// Deinitialize and free the tree.
void constraint_free(constraint_t *con)
{
assert(con);
log_trace("constraint", "Cleaning up");
_destroy_subtree(con->root);
free(con->radix);
free(con);
}
/* /*
int main(void) int main(void)
@ -384,68 +387,3 @@ int main(void)
} }
*/ */
/*
static int init(constraint_t *con, char *file, const char *name, value_t value)
{
FILE *fp;
char line[1000];
int blocked = 0;
fp = fopen(file, "r");
if (fp == NULL) {
log_fatal(name, "Unable to open %s file: %s: %s",
name, file, strerror(errno));
}
while (fgets(line, sizeof(line), fp) != NULL) {
char *comment = strchr(line, '#');
if (comment) {
*comment = '\0';
}
char ip[33];
if ((sscanf(line, "%32s", ip)) == EOF) {
continue;
}
int prefix_len;
char *slash = strchr(ip, '/');
if (slash == NULL) {
log_fatal(name,
"Unable to parse %s file: %s",
name, file);
}
// split apart network and prefix length
*slash = '\0';
prefix_len = atoi(&slash[1]);
constraint_set(con, ntohl(inet_addr(ip)), prefix_len, value);
blocked++;
}
fclose(fp);
return 0;
}
void main()
{
log_init(stderr, LOG_TRACE);
constraint_t *con = constraint_init(1);
init(con, "blacklist.prefixes", "blacklist", 0);
//constraint_optimize(con);
printf("count(0)=%lu\n", constraint_count_ips(con, 0));
printf("count(1)=%lu\n", constraint_count_ips(con, 1));
uint32_t i=0, count=0;
do {
if (constraint_lookup_ip(con, i))
count++;
} while (++i != 0);
printf("derived count(1)=%u\n", count);
constraint_free(con);
}
*/

View File

@ -1,13 +1,14 @@
#ifndef _CONSTRAINT_H #ifndef _CONSTRAINT_H
#define _CONSTRAINT_H #define _CONSTRAINT_H
#include <stdint.h>
typedef struct _constraint constraint_t; typedef struct _constraint constraint_t;
typedef unsigned int value_t; typedef unsigned int value_t;
constraint_t* constraint_init(value_t value); constraint_t* constraint_init(value_t value);
void constraint_free(constraint_t *con); void constraint_free(constraint_t *con);
void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value); void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value);
void constraint_optimize(constraint_t *con);
value_t constraint_lookup_ip(constraint_t *con, uint32_t address); value_t constraint_lookup_ip(constraint_t *con, uint32_t address);
uint64_t constraint_count_ips(constraint_t *con, value_t value); uint64_t constraint_count_ips(constraint_t *con, value_t value);
uint32_t constraint_lookup_index(constraint_t *con, uint64_t index, value_t value); uint32_t constraint_lookup_index(constraint_t *con, uint64_t index, value_t value);