#include #include #include #include #include #include #include #include #include #include "../lib/constraint.h" #include "../lib/logger.h" // // Efficient address-space constraints (AH 7/2013) // // This module uses a tree-based representation to efficiently // manipulate and query constraints on the address space to be // scanned. It provides a value for every IP address, and these // values are applied by setting them for network prefixes. Order // matters: setting a value replaces any existing value for that // prefix or subsets of it. We use this to implement network // whitelisting and blacklisting. // // Think of setting values in this structure like painting // subnets with different colors. We can paint subnets black to // exclude them and white to allow them. Only the top color shows. // This makes for potentially very powerful constraint specifications. // // Internally, this is implemented using a binary tree, where each // node corresponds to a network prefix. (E.g., the root is // 0.0.0.0/0, and its children, if present, are 0.0.0.0/1 and // 128.0.0.0/1.) Each leaf of the tree stores the value that applies // to every address within the leaf's portion of the prefix space. // // As an optimization, after all values are set, we look up the // value or subtree for every /16 prefix and cache them as an array. // This lets subsequent lookups bypass the bottom half of the tree. // /* * Constraint Copyright 2013 Regents of the University of Michigan * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy * of the License at http://www.apache.org/licenses/LICENSE-2.0 */ typedef struct node { struct node *l; struct node *r; value_t value; } node_t; // As an optimization, we precompute lookups for every prefix of this // length: #define RADIX_LENGTH 16 struct _constraint { node_t *root; // root node of the tree node_t **radix; // array of nodes for every RADIX_LENGTH prefix int optimized; // is radix populated and up-to-date? }; // Tree operations respect the invariant that every node that isn't a // leaf has exactly two children. #define IS_LEAF(node) ((node)->l == NULL) // Allocate a new leaf with the given value static node_t* _create_leaf(value_t value) { node_t *node = malloc(sizeof(node_t)); assert(node); node->l = NULL; node->r = NULL; node->value = value; return node; } // Free the subtree rooted at node. static void _destroy_subtree(node_t *node) { if (node == NULL) return; _destroy_subtree(node->l); _destroy_subtree(node->r); free(node); } // Convert from an internal node to a leaf. static void _convert_to_leaf(node_t *node) { assert(node); assert(!IS_LEAF(node)); _destroy_subtree(node->l); _destroy_subtree(node->r); node->l = NULL; node->r = NULL; } // Recursive function to set value for a given network prefix within // the tree. (Note: prefix must be in host byte order.) static void _set_recurse(node_t *node, uint32_t prefix, int len, value_t value) { assert(node); assert(0 <= len && len <= 32); if (len == 0) { // We're at the end of the prefix; make this a leaf and set the value. if (!IS_LEAF(node)) { _convert_to_leaf(node); } node->value = value; return; } if (IS_LEAF(node)) { // We're not at the end of the prefix, but we hit a leaf. if (node->value == value) { // A larger prefix has the same value, so we're done. return; } // The larger prefix has a different value, so we need to convert it // into an internal node and continue processing on one of the leaves. node->l = _create_leaf(node->value); node->r = _create_leaf(node->value); } // We're not at the end of the prefix, and we're at an internal // node. Recurse on the left or right subtree. if (prefix & 0x80000000) { _set_recurse(node->r, prefix << 1, len - 1, value); } else { _set_recurse(node->l, prefix << 1, len - 1, value); } // At this point, we're an internal node, and the value is set // by one of our children or its descendent. If both children are // leaves with the same value, we can discard them and become a left. if (IS_LEAF(node->r) && IS_LEAF(node->l) && node->r->value == node->l->value) { node->value = node->l->value; _convert_to_leaf(node); } } // Set the value for a given network prefix, overwriting any existing // values on that prefix or subsets of it. // (Note: prefix must be in host byte order.) void constraint_set(constraint_t *con, uint32_t prefix, int len, value_t value) { assert(con); _set_recurse(con->root, prefix, len, value); con->optimized = 0; } // Return the value pertaining to an address, according to the tree // starting at given root. (Note: address must be in host byte order.) static int _lookup_ip(node_t *root, uint32_t address) { assert(root); node_t *node = root; uint32_t mask = 0x80000000; for (;;) { if (IS_LEAF(node)) { return node->value; } if (address & mask) { node = node->r; } else { node = node->l; } mask >>= 1; } } // Return the value pertaining to an address. // (Note: address must be in host byte order.) int constraint_lookup_ip(constraint_t *con, uint32_t address) { assert(con); if (con->optimized) { // Use radix optimization node_t *node = con->radix[address >> (32 - RADIX_LENGTH)]; if (IS_LEAF(node)) { return node->value; } return _lookup_ip(node, address << RADIX_LENGTH); } else { // Do a full lookup using the tree log_trace("constraint", "Unoptimized lookup"); return _lookup_ip(con->root, address); } } // Implement count_ips by recursing on halves of the tree. Size represents // the number of addresses in a prefix at the current level of the tree. static uint64_t _count_ips_recurse(node_t *node, value_t value, uint64_t size) { assert(node); if (IS_LEAF(node)) { if (node->value == value) { return size; } else { return 0; } } return _count_ips_recurse(node->l, value, size >> 1) + _count_ips_recurse(node->r, value, size >> 1); } // Return the number of addresses that have a given value. uint64_t constraint_count_ips(constraint_t *con, value_t value) { assert(con); return _count_ips_recurse(con->root, value, (uint64_t)1 << 32); } // Initialize the tree. // All addresses will initally have the given value. constraint_t* constraint_init(value_t value) { log_trace("constraint", "Initializing"); constraint_t* con = malloc(sizeof(constraint_t)); con->root = _create_leaf(value); con->radix = calloc(sizeof(node_t *), 1 << RADIX_LENGTH); assert(con->radix); con->optimized = 0; return con; } // Deinitialize and free the tree. void constraint_free(constraint_t *con) { assert(con); log_trace("constraint", "Cleaning up"); _destroy_subtree(con->root); free(con->radix); free(con); } // Return a node that determines the values for the addresses with // the given prefix. This is either the internal node that // corresponds to the end of the prefix or a leaf node that // encompasses the prefix. (Note: prefix must be in host byte order.) static node_t* _lookup_node(node_t *root, uint32_t prefix, int len) { assert(root); assert(0 <= len && len <= 32); node_t *node = root; uint32_t mask = 0x80000000; for (int i=0; i < len; i++) { if (IS_LEAF(node)) { return node; } if (prefix & mask) { node = node->r; } else { node = node->l; } mask >>= 1; } return node; } // After values have been set, precompute prefix lookups. void constraint_optimize(constraint_t *con) { assert(con); if (con->optimized) { return; } log_trace("constraint", "Optimizing constraints"); for (uint32_t i=0; i < (1 << RADIX_LENGTH); i++) { uint32_t prefix = i << (32 - RADIX_LENGTH); con->radix[i] = _lookup_node(con->root, prefix, RADIX_LENGTH); } con->optimized = 1; } /* int main(void) { log_init(stderr, LOG_DEBUG); constraint_t *con = constraint_init(0); constraint_set(con, ntohl(inet_addr("128.128.0.0")), 1, 22); constraint_set(con, ntohl(inet_addr("128.128.0.0")), 1, 1); constraint_set(con, ntohl(inet_addr("128.0.0.0")), 1, 1); constraint_set(con, ntohl(inet_addr("10.0.0.0")), 24, 1); constraint_set(con, ntohl(inet_addr("10.0.0.0")), 24, 0); constraint_set(con, ntohl(inet_addr("10.11.12.0")), 24, 1); constraint_set(con, ntohl(inet_addr("141.212.0.0")), 16, 0); for (int x=1; x < 2; x++) { if (x == 1) { constraint_optimize(con); } printf("count(0)=%ld\n", constraint_count_ips(con, 0)); printf("count(1)=%ld\n", constraint_count_ips(con, 1)); printf("%d\n", constraint_lookup_ip(con,ntohl(inet_addr("10.11.12.0")))); assert(constraint_count_ips(con, 0) + constraint_count_ips(con, 1) == (uint64_t)1 << 32); uint32_t i=0, count=0; do { if (constraint_lookup_ip(con, i)) count++; } while (++i != 0); printf("derived count(1)=%u\n", count); } constraint_free(con); } */ /* static int init(constraint_t *con, char *file, const char *name, value_t value) { FILE *fp; char line[1000]; int blocked = 0; fp = fopen(file, "r"); if (fp == NULL) { log_fatal(name, "Unable to open %s file: %s: %s", name, file, strerror(errno)); } while (fgets(line, sizeof(line), fp) != NULL) { char *comment = strchr(line, '#'); if (comment) { *comment = '\0'; } char ip[33]; if ((sscanf(line, "%32s", ip)) == EOF) { continue; } int prefix_len; char *slash = strchr(ip, '/'); if (slash == NULL) { log_fatal(name, "Unable to parse %s file: %s", name, file); } // split apart network and prefix length *slash = '\0'; prefix_len = atoi(&slash[1]); constraint_set(con, ntohl(inet_addr(ip)), prefix_len, value); blocked++; } fclose(fp); return 0; } void main() { log_init(stderr, LOG_TRACE); constraint_t *con = constraint_init(1); init(con, "blacklist.prefixes", "blacklist", 0); //constraint_optimize(con); printf("count(0)=%lu\n", constraint_count_ips(con, 0)); printf("count(1)=%lu\n", constraint_count_ips(con, 1)); uint32_t i=0, count=0; do { if (constraint_lookup_ip(con, i)) count++; } while (++i != 0); printf("derived count(1)=%u\n", count); constraint_free(con); } */