diff --git a/lib/blacklist.c b/lib/blacklist.c index 49eaa32..a33860a 100644 --- a/lib/blacklist.c +++ b/lib/blacklist.c @@ -25,6 +25,10 @@ static constraint_t *constraint = NULL; +uint32_t blacklist_lookup_index(uint64_t index) { + return ntohl(constraint_lookup_index(constraint, index, ADDR_ALLOWED)); +} + // check whether a single IP address is allowed to be scanned. // 1 => is allowed // 0 => is not allowed @@ -121,9 +125,24 @@ int blacklist_init_from_files(char *whitelist_filename, char *blacklist_filename if (blacklist_filename) { init(blacklist_filename, "blacklist", ADDR_DISALLOWED); } - constraint_optimize(constraint); + constraint_paint_value(constraint, ADDR_ALLOWED); uint64_t allowed = blacklist_count_allowed(); log_debug("blacklist", "%lu addresses allowed to be scanned (%0.0f%% of address space)", allowed, allowed*100./((long long int)1 << 32)); + + /* + // test + log_debug("blacklist", "testing started"); + uint64_t count = constraint_count_ips(constraint, ADDR_ALLOWED); + for (unsigned int i=0; i < count; i++) { + int ip = constraint_lookup_index(constraint, i, ADDR_ALLOWED); + if ((i & 0xFFFFFF) == 0) + log_info("blacklist", "%x", i & 0xFF000000); + if (constraint_lookup_ip(constraint, ip) != ADDR_ALLOWED) { + log_error("blacklist", "test failed for index %d", i); + } + } + log_debug("blacklist", "testing complete"); + */ return 0; } diff --git a/lib/blacklist.h b/lib/blacklist.h index 80a44fc..98d77b4 100644 --- a/lib/blacklist.h +++ b/lib/blacklist.h @@ -3,6 +3,7 @@ #ifndef BLACKLIST_H #define BLACKLIST_H +uint32_t blacklist_lookup_index(uint64_t index); int blacklist_is_allowed(uint32_t s_addr); void blacklist_prefix(char *ip, int prefix_len); void whitelist_prefix(char *ip, int prefix_len); diff --git a/lib/constraint.c b/lib/constraint.c index aa8d0b9..2f70dd2 100644 --- a/lib/constraint.c +++ b/lib/constraint.c @@ -50,16 +50,19 @@ typedef struct node { struct node *l; struct node *r; value_t value; + uint64_t count; } node_t; // As an optimization, we precompute lookups for every prefix of this // length: -#define RADIX_LENGTH 16 +#define RADIX_LENGTH 20 struct _constraint { - node_t *root; // root node of the tree - node_t **radix; // array of nodes for every RADIX_LENGTH prefix - int optimized; // is radix populated and up-to-date? + node_t *root; // root node of the tree + uint32_t *radix; // array of prefixes (/RADIX_LENGTH) that are painted paint_value + size_t radix_len; // number of prefixes in radix array + int painted; // have we precomputed counts for each node? + value_t paint_value; // value for which we precomputed counts }; // Tree operations respect the invariant that every node that isn't a @@ -150,7 +153,7 @@ void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value) { assert(con); _set_recurse(con->root, prefix, len, value); - con->optimized = 0; + con->painted = 0; } // Return the value pertaining to an address, according to the tree @@ -176,67 +179,88 @@ static int _lookup_ip(node_t *root, uint32_t address) // Return the value pertaining to an address. // (Note: address must be in host byte order.) -int constraint_lookup_ip(constraint_t *con, uint32_t address) +value_t constraint_lookup_ip(constraint_t *con, uint32_t address) { assert(con); - if (con->optimized) { - // Use radix optimization - node_t *node = con->radix[address >> (32 - RADIX_LENGTH)]; + return _lookup_ip(con->root, address); +} + +// Return the nth painted IP address. +static int _lookup_index(node_t *root, uint64_t n) +{ + assert(root); + node_t *node = root; + uint32_t ip = 0; + uint32_t mask = 0x80000000; + for (;;) { if (IS_LEAF(node)) { - return node->value; + return ip | n; } - return _lookup_ip(node, address << RADIX_LENGTH); - } else { - // Do a full lookup using the tree - log_trace("constraint", "Unoptimized lookup"); - return _lookup_ip(con->root, address); + if (n < node->l->count) { + node = node->l; + } else { + n -= node->l->count; + node = node->r; + ip |= mask; + } + mask >>= 1; } } +// For a given value, return the IP address with zero-based index n. +// (i.e., if there are three addresses with value 0xFF, looking up index 1 +// will return the second one). +// Note that the tree must have been previously painted with this value. +uint32_t constraint_lookup_index(constraint_t *con, uint64_t index, value_t value) +{ + assert(con); + if (!con->painted || con->paint_value != value) { + constraint_paint_value(con, value); + } + + uint64_t radix_idx = index / (1 << (32 - RADIX_LENGTH)); + if (radix_idx < con->radix_len) { + // Radix lookup + uint32_t radix_offset = index % (1 << (32 - RADIX_LENGTH)); // TODO: bitwise maths + return con->radix[radix_idx] | radix_offset; + } + + // Otherwise, do the "slow" lookup in tree. + // Note that tree counts do NOT include things in the radix, + // so we subtract these off here. + index -= con->radix_len * (1 << (32 - RADIX_LENGTH)); + assert(index < con->root->count); + return _lookup_index(con->root, index); +} + // Implement count_ips by recursing on halves of the tree. Size represents // the number of addresses in a prefix at the current level of the tree. -static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size) +// If paint is specified, each node will have its count set to the number of +// leaves under it set to value. +// If exclude_radix is specified, the number of addresses will exlcude prefixes +// that are a /RADIX_LENGTH or larger +static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size, int paint, int exclude_radix) { assert(node); + uint64_t n; if (IS_LEAF(node)) { if (node->value == value) { - return size; + n = size; + // Exclude prefixes already included in the radix + if (exclude_radix && size >= (1 << (32 -RADIX_LENGTH))) { + n = 0; + } } else { - return 0; + n = 0; } + } else { + n = _count_ips_recurse(node->l, value, size >> 1, paint, exclude_radix) + + _count_ips_recurse(node->r, value, size >> 1, paint, exclude_radix); } - return _count_ips_recurse(node->l, value, size >> 1) + - _count_ips_recurse(node->r, value, size >> 1); -} - -// Return the number of addresses that have a given value. -uint64_t constraint_count_ips(constraint_t *con, value_t value) -{ - assert(con); - return _count_ips_recurse(con->root, value, (uint64_t)1 << 32); -} - -// Initialize the tree. -// All addresses will initally have the given value. -constraint_t* constraint_init(value_t value) -{ - log_trace("constraint", "Initializing"); - constraint_t* con = malloc(sizeof(constraint_t)); - con->root = _create_leaf(value); - con->radix = calloc(sizeof(node_t *), 1 << RADIX_LENGTH); - assert(con->radix); - con->optimized = 0; - return con; -} - -// Deinitialize and free the tree. -void constraint_free(constraint_t *con) -{ - assert(con); - log_trace("constraint", "Cleaning up"); - _destroy_subtree(con->root); - free(con->radix); - free(con); + if (paint) { + node->count = n; + } + return n; } // Return a node that determines the values for the addresses with @@ -250,8 +274,9 @@ static node_t* _lookup_node(node_t *root, uint32_t prefix, int len) node_t *node = root; uint32_t mask = 0x80000000; + int i; - for (int i=0; i < len; i++) { + for (i=0; i < len; i++) { if (IS_LEAF(node)) { return node; } @@ -265,21 +290,66 @@ static node_t* _lookup_node(node_t *root, uint32_t prefix, int len) return node; } -// After values have been set, precompute prefix lookups. -void constraint_optimize(constraint_t *con) +// For each node, precompute the count of leaves beneath it set to value. +// Note that the tree can be painted for only one value at a time. +void constraint_paint_value(constraint_t *con, value_t value) { assert(con); - if (con->optimized) { - return; - } - log_trace("constraint", "Optimizing constraints"); - for (uint32_t i=0; i < (1 << RADIX_LENGTH); i++) { + log_trace("constraint", "Painting value %lu", value); + + // Paint everything except what we will put in radix + _count_ips_recurse(con->root, value, (uint64_t)1 << 32, 1, 1); + + // Fill in the radix array with a list of addresses + uint32_t i; + con->radix_len = 0; + for (i=0; i < (1 << RADIX_LENGTH); i++) { uint32_t prefix = i << (32 - RADIX_LENGTH); - con->radix[i] = _lookup_node(con->root, prefix, RADIX_LENGTH); + node_t *node = _lookup_node(con->root, prefix, RADIX_LENGTH); + if (IS_LEAF(node) && node->value == value) { + // Add this prefix to the radix + con->radix[con->radix_len++] = prefix; + } } - con->optimized = 1; + log_debug("constraint", "%lu IPs in radix array, %lu IPs in tree", + con->radix_len * (1 << (32 - RADIX_LENGTH)), con->root->count); + con->painted = 1; + con->paint_value = value; } +// Return the number of addresses that have a given value. +uint64_t constraint_count_ips(constraint_t *con, value_t value) +{ + assert(con); + if (con->painted && con->paint_value == value) { + return con->root->count + con->radix_len * (1 << (32 - RADIX_LENGTH)); + } else { + return _count_ips_recurse(con->root, value, (uint64_t)1 << 32, 0, 0); + } +} + +// Initialize the tree. +// All addresses will initally have the given value. +constraint_t* constraint_init(value_t value) +{ + log_trace("constraint", "Initializing"); + constraint_t* con = malloc(sizeof(constraint_t)); + con->root = _create_leaf(value); + con->radix = calloc(sizeof(uint32_t), 1 << RADIX_LENGTH); + assert(con->radix); + con->painted = 0; + return con; +} + +// Deinitialize and free the tree. +void constraint_free(constraint_t *con) +{ + assert(con); + log_trace("constraint", "Cleaning up"); + _destroy_subtree(con->root); + free(con->radix); + free(con); +} /* int main(void) @@ -317,68 +387,3 @@ int main(void) } */ -/* -static int init(constraint_t *con, char *file, const char *name, value_t value) -{ - FILE *fp; - char line[1000]; - int blocked = 0; - - fp = fopen(file, "r"); - if (fp == NULL) { - log_fatal(name, "Unable to open %s file: %s: %s", - name, file, strerror(errno)); - } - - while (fgets(line, sizeof(line), fp) != NULL) { - char *comment = strchr(line, '#'); - if (comment) { - *comment = '\0'; - } - char ip[33]; - if ((sscanf(line, "%32s", ip)) == EOF) { - continue; - } - int prefix_len; - char *slash = strchr(ip, '/'); - if (slash == NULL) { - log_fatal(name, - "Unable to parse %s file: %s", - name, file); - } - // split apart network and prefix length - *slash = '\0'; - prefix_len = atoi(&slash[1]); - constraint_set(con, ntohl(inet_addr(ip)), prefix_len, value); - - blocked++; - } - fclose(fp); - return 0; -} - - - -void main() -{ - log_init(stderr, LOG_TRACE); - - constraint_t *con = constraint_init(1); - init(con, "blacklist.prefixes", "blacklist", 0); - //constraint_optimize(con); - - printf("count(0)=%lu\n", constraint_count_ips(con, 0)); - printf("count(1)=%lu\n", constraint_count_ips(con, 1)); - - uint32_t i=0, count=0; - do { - if (constraint_lookup_ip(con, i)) - count++; - } while (++i != 0); - printf("derived count(1)=%u\n", count); - - constraint_free(con); - -} - - */ diff --git a/lib/constraint.h b/lib/constraint.h index 2170f8f..228d755 100644 --- a/lib/constraint.h +++ b/lib/constraint.h @@ -1,14 +1,17 @@ #ifndef CONSTRAINT_H #define CONSTRAINT_H +#include + typedef struct _constraint constraint_t; -typedef int value_t; +typedef unsigned int value_t; constraint_t* constraint_init(value_t value); void constraint_free(constraint_t *con); void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value); -void constraint_optimize(constraint_t *con); -int constraint_lookup_ip(constraint_t *con, uint32_t address); +value_t constraint_lookup_ip(constraint_t *con, uint32_t address); uint64_t constraint_count_ips(constraint_t *con, value_t value); +uint32_t constraint_lookup_index(constraint_t *con, uint64_t index, value_t value); +void constraint_paint_value(constraint_t *con, value_t value); #endif //_CONSTRAINT_H diff --git a/src/cyclic.c b/src/cyclic.c index a42c7f4..7d2eeee 100644 --- a/src/cyclic.c +++ b/src/cyclic.c @@ -57,28 +57,65 @@ #include "aesrand.h" #define LSRC "cyclic" -#define PRIME 4294967311 // 2^32 + 15 -#define KNOWN_PRIMROOT 3 -// distinct prime factors of 2^32 + 15 -static const uint64_t psub1_f[] = { 2, 3, 5, 131, 364289 }; +typedef struct cyclic_group { + uint64_t prime; + uint64_t known_primroot; + size_t num_prime_factors; // number of unique prime factors of (prime-1) + uint64_t prime_factors[10]; // unique prime factors of (prime-1) +} cyclic_group_t; -// selected primitive root that we'll use as the generator +// We will pick the first cyclic group from this list that is +// larger than the number of IPs in our whitelist. E.g. for an +// entire Internet scan, this would be cyclic32 +// Note: this list should remain ordered by size (primes) ascending. +static cyclic_group_t groups[] = { +{ // 2^16 + 1 + .prime = 65537, + .known_primroot = 3, + .prime_factors = {2}, + .num_prime_factors = 1 +}, +{ // 2^24 + 43 + .prime = 16777259, + .known_primroot = 2, + .prime_factors = {2, 23, 103, 3541}, + .num_prime_factors = 4 +}, +{ // 2^28 + 3 + .prime = 268435459, + .known_primroot = 2, + .prime_factors = {2, 3, 19, 87211}, + .num_prime_factors = 4 +}, +{ // 2^32 + 15 + .prime = 4294967311, + .known_primroot = 3, + .prime_factors = {2, 3, 5, 131, 364289}, + .num_prime_factors = 5 +} +}; + + +// selected prime/primitive root that we'll use as the generator +static uint64_t prime = 0; static uint64_t primroot = 0; static uint64_t current = 0; +static uint64_t num_addrs = 0; + #define COPRIME 1 -#define NOT_COPRIME 0 +#define NOT_COPRIME 0 // check whether two integers are coprime -static int check_coprime(uint64_t check) +static int check_coprime(uint64_t check, const cyclic_group_t *group) { - for (unsigned i=0; i < sizeof(psub1_f)/sizeof(psub1_f[0]); i++) { - if (psub1_f[i] > check && !(psub1_f[i] % check)) { + for (unsigned i=0; i < group->num_prime_factors; i++) { + if (group->prime_factors[i] > check && !(group->prime_factors[i] % check)) { return NOT_COPRIME; - } else if (psub1_f[i] < check && !(check % psub1_f[i])) { + } else if (group->prime_factors[i] < check && !(check % group->prime_factors[i])) { return NOT_COPRIME; - } else if (psub1_f[i] == check) { + } else if (group->prime_factors[i] == check) { return NOT_COPRIME; } } @@ -86,18 +123,18 @@ static int check_coprime(uint64_t check) } // find gen of cyclic group Z modulo PRIME -static uint64_t find_primroot(void) +static uint64_t find_primroot(const cyclic_group_t *group) { // what luck, rand() returns a uint32_t! uint32_t candidate = (uint32_t) aesrand_getword() & 0xFFFF; - while(check_coprime(candidate) != COPRIME) { + while(check_coprime(candidate, group) != COPRIME) { ++candidate; } // pre-modded result is gigantic so use GMP mpz_t base, power, prime, primroot; - mpz_init_set_d(base, (double) KNOWN_PRIMROOT); + mpz_init_set_d(base, (double) group->known_primroot); mpz_init_set_d(power, (double) candidate); - mpz_init_set_d(prime, (double) PRIME); + mpz_init_set_d(prime, (double) group->prime); mpz_init(primroot); mpz_powm(primroot, base, power, prime); uint64_t retv = (uint64_t) mpz_get_ui(primroot); @@ -112,6 +149,24 @@ int cyclic_init(uint32_t primroot_, uint32_t current_) { assert(!(!primroot_ && current_)); + // Initialize blacklist + if (blacklist_init_from_files(zconf.whitelist_filename, + zconf.blacklist_filename)) { + return -1; + } + num_addrs = blacklist_count_allowed(); + + uint32_t i; + const cyclic_group_t *cur_group = NULL; + for (i=0; i num_addrs) { + cur_group = &groups[i]; + log_debug("cyclic", "using prime %lu, known_primroot %lu", cur_group->prime, cur_group->known_primroot); + prime = groups[i].prime; + break; + } + } + if (zconf.use_seed) { aesrand_init(zconf.seed+1); } else { @@ -119,7 +174,7 @@ int cyclic_init(uint32_t primroot_, uint32_t current_) } if (!primroot_) { do { - primroot = find_primroot(); + primroot = find_primroot(cur_group); } while (primroot >= (1LL << 32)); log_debug(LSRC, "primitive root: %lld", primroot); current = (uint32_t) aesrand_getword() & 0xFFFF; @@ -140,11 +195,6 @@ int cyclic_init(uint32_t primroot_, uint32_t current_) } } zconf.generator = primroot; - if (blacklist_init_from_files(zconf.whitelist_filename, - zconf.blacklist_filename)) { - return -1; - } - // make sure current is an allowed ip cyclic_get_next_ip(); @@ -153,7 +203,7 @@ int cyclic_init(uint32_t primroot_, uint32_t current_) uint32_t cyclic_get_curr_ip(void) { - return (uint32_t) current; + return (uint32_t) blacklist_lookup_index(current-1); } uint32_t cyclic_get_primroot(void) @@ -165,7 +215,7 @@ static inline uint32_t cyclic_get_next_elem(void) { do { current *= primroot; - current %= PRIME; + current %= prime; } while (current >= (1LL << 32)); return (uint32_t) current; } @@ -174,11 +224,10 @@ uint32_t cyclic_get_next_ip(void) { while (1) { uint32_t candidate = cyclic_get_next_elem(); - if (!blacklist_is_allowed(candidate)) { - zsend.blacklisted++; - } else { - return candidate; + if (candidate-1 < num_addrs) { + return blacklist_lookup_index(candidate-1); } + zsend.blacklisted++; } }