Merge branch 'fast-whitelist' of github.com:zmap/zmap

This commit is contained in:
Zakir Durumeric 2013-10-03 10:59:21 -04:00
commit 4a37dba56f
5 changed files with 233 additions and 156 deletions

View File

@ -25,6 +25,10 @@
static constraint_t *constraint = NULL; static constraint_t *constraint = NULL;
uint32_t blacklist_lookup_index(uint64_t index) {
return ntohl(constraint_lookup_index(constraint, index, ADDR_ALLOWED));
}
// check whether a single IP address is allowed to be scanned. // check whether a single IP address is allowed to be scanned.
// 1 => is allowed // 1 => is allowed
// 0 => is not allowed // 0 => is not allowed
@ -121,9 +125,24 @@ int blacklist_init_from_files(char *whitelist_filename, char *blacklist_filename
if (blacklist_filename) { if (blacklist_filename) {
init(blacklist_filename, "blacklist", ADDR_DISALLOWED); init(blacklist_filename, "blacklist", ADDR_DISALLOWED);
} }
constraint_optimize(constraint); constraint_paint_value(constraint, ADDR_ALLOWED);
uint64_t allowed = blacklist_count_allowed(); uint64_t allowed = blacklist_count_allowed();
log_debug("blacklist", "%lu addresses allowed to be scanned (%0.0f%% of address space)", log_debug("blacklist", "%lu addresses allowed to be scanned (%0.0f%% of address space)",
allowed, allowed*100./((long long int)1 << 32)); allowed, allowed*100./((long long int)1 << 32));
/*
// test
log_debug("blacklist", "testing started");
uint64_t count = constraint_count_ips(constraint, ADDR_ALLOWED);
for (unsigned int i=0; i < count; i++) {
int ip = constraint_lookup_index(constraint, i, ADDR_ALLOWED);
if ((i & 0xFFFFFF) == 0)
log_info("blacklist", "%x", i & 0xFF000000);
if (constraint_lookup_ip(constraint, ip) != ADDR_ALLOWED) {
log_error("blacklist", "test failed for index %d", i);
}
}
log_debug("blacklist", "testing complete");
*/
return 0; return 0;
} }

View File

@ -3,6 +3,7 @@
#ifndef BLACKLIST_H #ifndef BLACKLIST_H
#define BLACKLIST_H #define BLACKLIST_H
uint32_t blacklist_lookup_index(uint64_t index);
int blacklist_is_allowed(uint32_t s_addr); int blacklist_is_allowed(uint32_t s_addr);
void blacklist_prefix(char *ip, int prefix_len); void blacklist_prefix(char *ip, int prefix_len);
void whitelist_prefix(char *ip, int prefix_len); void whitelist_prefix(char *ip, int prefix_len);

View File

@ -50,16 +50,19 @@ typedef struct node {
struct node *l; struct node *l;
struct node *r; struct node *r;
value_t value; value_t value;
uint64_t count;
} node_t; } node_t;
// As an optimization, we precompute lookups for every prefix of this // As an optimization, we precompute lookups for every prefix of this
// length: // length:
#define RADIX_LENGTH 16 #define RADIX_LENGTH 20
struct _constraint { struct _constraint {
node_t *root; // root node of the tree node_t *root; // root node of the tree
node_t **radix; // array of nodes for every RADIX_LENGTH prefix uint32_t *radix; // array of prefixes (/RADIX_LENGTH) that are painted paint_value
int optimized; // is radix populated and up-to-date? size_t radix_len; // number of prefixes in radix array
int painted; // have we precomputed counts for each node?
value_t paint_value; // value for which we precomputed counts
}; };
// Tree operations respect the invariant that every node that isn't a // Tree operations respect the invariant that every node that isn't a
@ -150,7 +153,7 @@ void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value)
{ {
assert(con); assert(con);
_set_recurse(con->root, prefix, len, value); _set_recurse(con->root, prefix, len, value);
con->optimized = 0; con->painted = 0;
} }
// Return the value pertaining to an address, according to the tree // Return the value pertaining to an address, according to the tree
@ -176,67 +179,88 @@ static int _lookup_ip(node_t *root, uint32_t address)
// Return the value pertaining to an address. // Return the value pertaining to an address.
// (Note: address must be in host byte order.) // (Note: address must be in host byte order.)
int constraint_lookup_ip(constraint_t *con, uint32_t address) value_t constraint_lookup_ip(constraint_t *con, uint32_t address)
{ {
assert(con); assert(con);
if (con->optimized) {
// Use radix optimization
node_t *node = con->radix[address >> (32 - RADIX_LENGTH)];
if (IS_LEAF(node)) {
return node->value;
}
return _lookup_ip(node, address << RADIX_LENGTH);
} else {
// Do a full lookup using the tree
log_trace("constraint", "Unoptimized lookup");
return _lookup_ip(con->root, address); return _lookup_ip(con->root, address);
} }
// Return the nth painted IP address.
static int _lookup_index(node_t *root, uint64_t n)
{
assert(root);
node_t *node = root;
uint32_t ip = 0;
uint32_t mask = 0x80000000;
for (;;) {
if (IS_LEAF(node)) {
return ip | n;
}
if (n < node->l->count) {
node = node->l;
} else {
n -= node->l->count;
node = node->r;
ip |= mask;
}
mask >>= 1;
}
}
// For a given value, return the IP address with zero-based index n.
// (i.e., if there are three addresses with value 0xFF, looking up index 1
// will return the second one).
// Note that the tree must have been previously painted with this value.
uint32_t constraint_lookup_index(constraint_t *con, uint64_t index, value_t value)
{
assert(con);
if (!con->painted || con->paint_value != value) {
constraint_paint_value(con, value);
}
uint64_t radix_idx = index / (1 << (32 - RADIX_LENGTH));
if (radix_idx < con->radix_len) {
// Radix lookup
uint32_t radix_offset = index % (1 << (32 - RADIX_LENGTH)); // TODO: bitwise maths
return con->radix[radix_idx] | radix_offset;
}
// Otherwise, do the "slow" lookup in tree.
// Note that tree counts do NOT include things in the radix,
// so we subtract these off here.
index -= con->radix_len * (1 << (32 - RADIX_LENGTH));
assert(index < con->root->count);
return _lookup_index(con->root, index);
} }
// Implement count_ips by recursing on halves of the tree. Size represents // Implement count_ips by recursing on halves of the tree. Size represents
// the number of addresses in a prefix at the current level of the tree. // the number of addresses in a prefix at the current level of the tree.
static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size) // If paint is specified, each node will have its count set to the number of
// leaves under it set to value.
// If exclude_radix is specified, the number of addresses will exlcude prefixes
// that are a /RADIX_LENGTH or larger
static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size, int paint, int exclude_radix)
{ {
assert(node); assert(node);
uint64_t n;
if (IS_LEAF(node)) { if (IS_LEAF(node)) {
if (node->value == value) { if (node->value == value) {
return size; n = size;
// Exclude prefixes already included in the radix
if (exclude_radix && size >= (1 << (32 -RADIX_LENGTH))) {
n = 0;
}
} else { } else {
return 0; n = 0;
} }
} else {
n = _count_ips_recurse(node->l, value, size >> 1, paint, exclude_radix) +
_count_ips_recurse(node->r, value, size >> 1, paint, exclude_radix);
} }
return _count_ips_recurse(node->l, value, size >> 1) + if (paint) {
_count_ips_recurse(node->r, value, size >> 1); node->count = n;
} }
return n;
// Return the number of addresses that have a given value.
uint64_t constraint_count_ips(constraint_t *con, value_t value)
{
assert(con);
return _count_ips_recurse(con->root, value, (uint64_t)1 << 32);
}
// Initialize the tree.
// All addresses will initally have the given value.
constraint_t* constraint_init(value_t value)
{
log_trace("constraint", "Initializing");
constraint_t* con = malloc(sizeof(constraint_t));
con->root = _create_leaf(value);
con->radix = calloc(sizeof(node_t *), 1 << RADIX_LENGTH);
assert(con->radix);
con->optimized = 0;
return con;
}
// Deinitialize and free the tree.
void constraint_free(constraint_t *con)
{
assert(con);
log_trace("constraint", "Cleaning up");
_destroy_subtree(con->root);
free(con->radix);
free(con);
} }
// Return a node that determines the values for the addresses with // Return a node that determines the values for the addresses with
@ -250,8 +274,9 @@ static node_t* _lookup_node(node_t *root, uint32_t prefix, int len)
node_t *node = root; node_t *node = root;
uint32_t mask = 0x80000000; uint32_t mask = 0x80000000;
int i;
for (int i=0; i < len; i++) { for (i=0; i < len; i++) {
if (IS_LEAF(node)) { if (IS_LEAF(node)) {
return node; return node;
} }
@ -265,21 +290,66 @@ static node_t* _lookup_node(node_t *root, uint32_t prefix, int len)
return node; return node;
} }
// After values have been set, precompute prefix lookups. // For each node, precompute the count of leaves beneath it set to value.
void constraint_optimize(constraint_t *con) // Note that the tree can be painted for only one value at a time.
void constraint_paint_value(constraint_t *con, value_t value)
{ {
assert(con); assert(con);
if (con->optimized) { log_trace("constraint", "Painting value %lu", value);
return;
} // Paint everything except what we will put in radix
log_trace("constraint", "Optimizing constraints"); _count_ips_recurse(con->root, value, (uint64_t)1 << 32, 1, 1);
for (uint32_t i=0; i < (1 << RADIX_LENGTH); i++) {
// Fill in the radix array with a list of addresses
uint32_t i;
con->radix_len = 0;
for (i=0; i < (1 << RADIX_LENGTH); i++) {
uint32_t prefix = i << (32 - RADIX_LENGTH); uint32_t prefix = i << (32 - RADIX_LENGTH);
con->radix[i] = _lookup_node(con->root, prefix, RADIX_LENGTH); node_t *node = _lookup_node(con->root, prefix, RADIX_LENGTH);
if (IS_LEAF(node) && node->value == value) {
// Add this prefix to the radix
con->radix[con->radix_len++] = prefix;
} }
con->optimized = 1; }
log_debug("constraint", "%lu IPs in radix array, %lu IPs in tree",
con->radix_len * (1 << (32 - RADIX_LENGTH)), con->root->count);
con->painted = 1;
con->paint_value = value;
} }
// Return the number of addresses that have a given value.
uint64_t constraint_count_ips(constraint_t *con, value_t value)
{
assert(con);
if (con->painted && con->paint_value == value) {
return con->root->count + con->radix_len * (1 << (32 - RADIX_LENGTH));
} else {
return _count_ips_recurse(con->root, value, (uint64_t)1 << 32, 0, 0);
}
}
// Initialize the tree.
// All addresses will initally have the given value.
constraint_t* constraint_init(value_t value)
{
log_trace("constraint", "Initializing");
constraint_t* con = malloc(sizeof(constraint_t));
con->root = _create_leaf(value);
con->radix = calloc(sizeof(uint32_t), 1 << RADIX_LENGTH);
assert(con->radix);
con->painted = 0;
return con;
}
// Deinitialize and free the tree.
void constraint_free(constraint_t *con)
{
assert(con);
log_trace("constraint", "Cleaning up");
_destroy_subtree(con->root);
free(con->radix);
free(con);
}
/* /*
int main(void) int main(void)
@ -317,68 +387,3 @@ int main(void)
} }
*/ */
/*
static int init(constraint_t *con, char *file, const char *name, value_t value)
{
FILE *fp;
char line[1000];
int blocked = 0;
fp = fopen(file, "r");
if (fp == NULL) {
log_fatal(name, "Unable to open %s file: %s: %s",
name, file, strerror(errno));
}
while (fgets(line, sizeof(line), fp) != NULL) {
char *comment = strchr(line, '#');
if (comment) {
*comment = '\0';
}
char ip[33];
if ((sscanf(line, "%32s", ip)) == EOF) {
continue;
}
int prefix_len;
char *slash = strchr(ip, '/');
if (slash == NULL) {
log_fatal(name,
"Unable to parse %s file: %s",
name, file);
}
// split apart network and prefix length
*slash = '\0';
prefix_len = atoi(&slash[1]);
constraint_set(con, ntohl(inet_addr(ip)), prefix_len, value);
blocked++;
}
fclose(fp);
return 0;
}
void main()
{
log_init(stderr, LOG_TRACE);
constraint_t *con = constraint_init(1);
init(con, "blacklist.prefixes", "blacklist", 0);
//constraint_optimize(con);
printf("count(0)=%lu\n", constraint_count_ips(con, 0));
printf("count(1)=%lu\n", constraint_count_ips(con, 1));
uint32_t i=0, count=0;
do {
if (constraint_lookup_ip(con, i))
count++;
} while (++i != 0);
printf("derived count(1)=%u\n", count);
constraint_free(con);
}
*/

View File

@ -1,14 +1,17 @@
#ifndef CONSTRAINT_H #ifndef CONSTRAINT_H
#define CONSTRAINT_H #define CONSTRAINT_H
#include <stdint.h>
typedef struct _constraint constraint_t; typedef struct _constraint constraint_t;
typedef int value_t; typedef unsigned int value_t;
constraint_t* constraint_init(value_t value); constraint_t* constraint_init(value_t value);
void constraint_free(constraint_t *con); void constraint_free(constraint_t *con);
void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value); void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value);
void constraint_optimize(constraint_t *con); value_t constraint_lookup_ip(constraint_t *con, uint32_t address);
int constraint_lookup_ip(constraint_t *con, uint32_t address);
uint64_t constraint_count_ips(constraint_t *con, value_t value); uint64_t constraint_count_ips(constraint_t *con, value_t value);
uint32_t constraint_lookup_index(constraint_t *con, uint64_t index, value_t value);
void constraint_paint_value(constraint_t *con, value_t value);
#endif //_CONSTRAINT_H #endif //_CONSTRAINT_H

View File

@ -57,28 +57,65 @@
#include "aesrand.h" #include "aesrand.h"
#define LSRC "cyclic" #define LSRC "cyclic"
#define PRIME 4294967311 // 2^32 + 15
#define KNOWN_PRIMROOT 3
// distinct prime factors of 2^32 + 15 typedef struct cyclic_group {
static const uint64_t psub1_f[] = { 2, 3, 5, 131, 364289 }; uint64_t prime;
uint64_t known_primroot;
size_t num_prime_factors; // number of unique prime factors of (prime-1)
uint64_t prime_factors[10]; // unique prime factors of (prime-1)
} cyclic_group_t;
// selected primitive root that we'll use as the generator // We will pick the first cyclic group from this list that is
// larger than the number of IPs in our whitelist. E.g. for an
// entire Internet scan, this would be cyclic32
// Note: this list should remain ordered by size (primes) ascending.
static cyclic_group_t groups[] = {
{ // 2^16 + 1
.prime = 65537,
.known_primroot = 3,
.prime_factors = {2},
.num_prime_factors = 1
},
{ // 2^24 + 43
.prime = 16777259,
.known_primroot = 2,
.prime_factors = {2, 23, 103, 3541},
.num_prime_factors = 4
},
{ // 2^28 + 3
.prime = 268435459,
.known_primroot = 2,
.prime_factors = {2, 3, 19, 87211},
.num_prime_factors = 4
},
{ // 2^32 + 15
.prime = 4294967311,
.known_primroot = 3,
.prime_factors = {2, 3, 5, 131, 364289},
.num_prime_factors = 5
}
};
// selected prime/primitive root that we'll use as the generator
static uint64_t prime = 0;
static uint64_t primroot = 0; static uint64_t primroot = 0;
static uint64_t current = 0; static uint64_t current = 0;
static uint64_t num_addrs = 0;
#define COPRIME 1 #define COPRIME 1
#define NOT_COPRIME 0 #define NOT_COPRIME 0
// check whether two integers are coprime // check whether two integers are coprime
static int check_coprime(uint64_t check) static int check_coprime(uint64_t check, const cyclic_group_t *group)
{ {
for (unsigned i=0; i < sizeof(psub1_f)/sizeof(psub1_f[0]); i++) { for (unsigned i=0; i < group->num_prime_factors; i++) {
if (psub1_f[i] > check && !(psub1_f[i] % check)) { if (group->prime_factors[i] > check && !(group->prime_factors[i] % check)) {
return NOT_COPRIME; return NOT_COPRIME;
} else if (psub1_f[i] < check && !(check % psub1_f[i])) { } else if (group->prime_factors[i] < check && !(check % group->prime_factors[i])) {
return NOT_COPRIME; return NOT_COPRIME;
} else if (psub1_f[i] == check) { } else if (group->prime_factors[i] == check) {
return NOT_COPRIME; return NOT_COPRIME;
} }
} }
@ -86,18 +123,18 @@ static int check_coprime(uint64_t check)
} }
// find gen of cyclic group Z modulo PRIME // find gen of cyclic group Z modulo PRIME
static uint64_t find_primroot(void) static uint64_t find_primroot(const cyclic_group_t *group)
{ {
// what luck, rand() returns a uint32_t! // what luck, rand() returns a uint32_t!
uint32_t candidate = (uint32_t) aesrand_getword() & 0xFFFF; uint32_t candidate = (uint32_t) aesrand_getword() & 0xFFFF;
while(check_coprime(candidate) != COPRIME) { while(check_coprime(candidate, group) != COPRIME) {
++candidate; ++candidate;
} }
// pre-modded result is gigantic so use GMP // pre-modded result is gigantic so use GMP
mpz_t base, power, prime, primroot; mpz_t base, power, prime, primroot;
mpz_init_set_d(base, (double) KNOWN_PRIMROOT); mpz_init_set_d(base, (double) group->known_primroot);
mpz_init_set_d(power, (double) candidate); mpz_init_set_d(power, (double) candidate);
mpz_init_set_d(prime, (double) PRIME); mpz_init_set_d(prime, (double) group->prime);
mpz_init(primroot); mpz_init(primroot);
mpz_powm(primroot, base, power, prime); mpz_powm(primroot, base, power, prime);
uint64_t retv = (uint64_t) mpz_get_ui(primroot); uint64_t retv = (uint64_t) mpz_get_ui(primroot);
@ -112,6 +149,24 @@ int cyclic_init(uint32_t primroot_, uint32_t current_)
{ {
assert(!(!primroot_ && current_)); assert(!(!primroot_ && current_));
// Initialize blacklist
if (blacklist_init_from_files(zconf.whitelist_filename,
zconf.blacklist_filename)) {
return -1;
}
num_addrs = blacklist_count_allowed();
uint32_t i;
const cyclic_group_t *cur_group = NULL;
for (i=0; i<sizeof(groups)/sizeof(groups[0]); i++) {
if (groups[i].prime > num_addrs) {
cur_group = &groups[i];
log_debug("cyclic", "using prime %lu, known_primroot %lu", cur_group->prime, cur_group->known_primroot);
prime = groups[i].prime;
break;
}
}
if (zconf.use_seed) { if (zconf.use_seed) {
aesrand_init(zconf.seed+1); aesrand_init(zconf.seed+1);
} else { } else {
@ -119,7 +174,7 @@ int cyclic_init(uint32_t primroot_, uint32_t current_)
} }
if (!primroot_) { if (!primroot_) {
do { do {
primroot = find_primroot(); primroot = find_primroot(cur_group);
} while (primroot >= (1LL << 32)); } while (primroot >= (1LL << 32));
log_debug(LSRC, "primitive root: %lld", primroot); log_debug(LSRC, "primitive root: %lld", primroot);
current = (uint32_t) aesrand_getword() & 0xFFFF; current = (uint32_t) aesrand_getword() & 0xFFFF;
@ -140,11 +195,6 @@ int cyclic_init(uint32_t primroot_, uint32_t current_)
} }
} }
zconf.generator = primroot; zconf.generator = primroot;
if (blacklist_init_from_files(zconf.whitelist_filename,
zconf.blacklist_filename)) {
return -1;
}
// make sure current is an allowed ip // make sure current is an allowed ip
cyclic_get_next_ip(); cyclic_get_next_ip();
@ -153,7 +203,7 @@ int cyclic_init(uint32_t primroot_, uint32_t current_)
uint32_t cyclic_get_curr_ip(void) uint32_t cyclic_get_curr_ip(void)
{ {
return (uint32_t) current; return (uint32_t) blacklist_lookup_index(current-1);
} }
uint32_t cyclic_get_primroot(void) uint32_t cyclic_get_primroot(void)
@ -165,7 +215,7 @@ static inline uint32_t cyclic_get_next_elem(void)
{ {
do { do {
current *= primroot; current *= primroot;
current %= PRIME; current %= prime;
} while (current >= (1LL << 32)); } while (current >= (1LL << 32));
return (uint32_t) current; return (uint32_t) current;
} }
@ -174,11 +224,10 @@ uint32_t cyclic_get_next_ip(void)
{ {
while (1) { while (1) {
uint32_t candidate = cyclic_get_next_elem(); uint32_t candidate = cyclic_get_next_elem();
if (!blacklist_is_allowed(candidate)) { if (candidate-1 < num_addrs) {
zsend.blacklisted++; return blacklist_lookup_index(candidate-1);
} else {
return candidate;
} }
zsend.blacklisted++;
} }
} }