diff --git a/.gitignore b/.gitignore index 889d9e6..2c2e6ad 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,7 @@ Makefile CMakeCache.txt src/zopt.h src/zopt.c +lexer.c +lexer.h +parser.c +parser.h diff --git a/lib/stack.c b/lib/stack.c new file mode 100644 index 0000000..8dfe0db --- /dev/null +++ b/lib/stack.c @@ -0,0 +1,40 @@ +#include "stack.h" +#include "../lib/xalloc.h" + +#include + +struct stack { + size_t max_size; + size_t cur_size; + void** arr; +}; + +stack_t* alloc_stack(size_t size) +{ + stack_t* stack = xmalloc(sizeof(stack_t)); + stack->arr = xcalloc(size, sizeof(void*)); + stack->max_size = size; + stack->cur_size = 0; + return stack; +} + +void free_stack(stack_t* stack) +{ + xfree(stack->arr); + xfree(stack); +} + +void push(stack_t* stack, void* elt) +{ + if (stack->cur_size == stack->max_size) { + stack->max_size *= 2; + xrealloc(stack->arr, stack->max_size); + } + stack->arr[stack->cur_size++] = elt; +} + +void* pop(stack_t* stack) +{ + void* res = stack->arr[--stack->cur_size]; + return res; +} diff --git a/lib/stack.h b/lib/stack.h new file mode 100644 index 0000000..9417238 --- /dev/null +++ b/lib/stack.h @@ -0,0 +1,15 @@ +#ifndef ZMAP_STACK_H +#define ZMAP_STACK_H + +#include + +struct stack; +typedef struct stack stack_t; + +stack_t* alloc_stack(size_t size); +void free_stack(stack_t* stack); + +void push(stack_t* stack, void* elt); +void* pop(stack_t* stack); + +#endif /* ZMAP_STACK_H */ \ No newline at end of file diff --git a/lib/xalloc.c b/lib/xalloc.c new file mode 100644 index 0000000..60e73e7 --- /dev/null +++ b/lib/xalloc.c @@ -0,0 +1,44 @@ +#include "xalloc.h" +#include "../lib/logger.h" + +#include + +void die() __attribute__((noreturn)); + +void* xcalloc(size_t count, size_t size) +{ + void* res = calloc(count, size); + if (res == NULL) { + die(); + } + return res; +} + +void xfree(void *ptr) +{ + free(ptr); +} + +void* xmalloc(size_t size) +{ + void* res = malloc(size); + if (res == NULL) { + die(); + } + return res; +} + +void* xrealloc(void *ptr, size_t size) +{ + void* res = realloc(ptr, size); + if (res == NULL) { + die(); + } + return res; +} + +void die() +{ + log_fatal("zmap", "Out of memory"); +} + diff --git a/lib/xalloc.h b/lib/xalloc.h new file mode 100644 index 0000000..78cd278 --- /dev/null +++ b/lib/xalloc.h @@ -0,0 +1,14 @@ +#ifndef ZMAP_ALLOC_H +#define ZMAP_ALLOC_H + +#include + +void* xcalloc(size_t count, size_t size); + +void xfree(void *ptr); + +void* xmalloc(size_t size); + +void* xrealloc(void *ptr, size_t size); + +#endif \ No newline at end of file diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..87de78c --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,4 @@ +lexer.c +lexer.h +parser.c +parser.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a5c6346..95bd9ab 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,6 +12,8 @@ SET(LIB_SOURCES ${PROJECT_SOURCE_DIR}/lib/pbm.c ${PROJECT_SOURCE_DIR}/lib/random.c ${PROJECT_SOURCE_DIR}/lib/rijndael-alg-fst.c + ${PROJECT_SOURCE_DIR}/lib/stack.c + ${PROJECT_SOURCE_DIR}/lib/xalloc.c ) # ADD YOUR PROBE MODULE HERE @@ -40,7 +42,9 @@ SET(PROBE_MODULE_SOURCES SET(SOURCES aesrand.c cyclic.c + expression.c fieldset.c + filter.c get_gateway.c monitor.c recv.c @@ -50,6 +54,8 @@ SET(SOURCES zmap.c zopt_compat.c "${CMAKE_CURRENT_BINARY_DIR}/zopt.h" + "${CMAKE_CURRENT_BINARY_DIR}/lexer.c" + "${CMAKE_CURRENT_BINARY_DIR}/parser.c" ${EXTRA_PROBE_MODULES} ${EXTRA_OUTPUT_MODULES} ${PROBE_MODULE_SOURCES} @@ -69,6 +75,14 @@ add_custom_command(OUTPUT zopt.h COMMAND gengetopt -C --no-help --no-version --unamed-opts=SUBNETS -i "${CMAKE_CURRENT_SOURCE_DIR}/zopt.ggo" -F "${CMAKE_CURRENT_BINARY_DIR}/zopt" ) +add_custom_command(OUTPUT lexer.c + COMMAND flex -o "${CMAKE_CURRENT_BINARY_DIR}/lexer.c" --header-file="${CMAKE_CURRENT_BINARY_DIR}/lexer.h" lexer.l + ) + +add_custom_command(OUTPUT parser.c + COMMAND byacc -d -o parser.c parser.y + ) + add_executable(zmap ${SOURCES}) target_link_libraries( diff --git a/src/expression.c b/src/expression.c new file mode 100644 index 0000000..537be78 --- /dev/null +++ b/src/expression.c @@ -0,0 +1,162 @@ +#include "expression.h" +#include "fieldset.h" + +#include "../lib/xalloc.h" + +/* Static helper functions */ + +static node_t* alloc_node(); +static int eval_gt_node(node_t *node, fieldset_t *fields); +static int eval_lt_node(node_t *node, fieldset_t *fields); +static int eval_eq_node(node_t *node, fieldset_t *fields); +static int eval_lt_eq_node(node_t *node, fieldset_t *fields); +static int eval_gt_eq_node(node_t *node, fieldset_t *fields); + + +static node_t* alloc_node() +{ + node_t *node = xmalloc(sizeof(node_t)); + memset(node, 0, sizeof(node_t)); + return node; +} + +static int eval_gt_node(node_t *node, fieldset_t *fields) +{ + int index = node->left_child->value.field.index; + uint64_t expected = node->right_child->value.int_literal; + uint64_t actual = fs_get_uint64_by_index(fields, index); + return (actual > expected); +} + +static int eval_lt_node(node_t *node, fieldset_t *fields) +{ + int index = node->left_child->value.field.index; + uint64_t expected = node->right_child->value.int_literal; + uint64_t actual = fs_get_uint64_by_index(fields, index); + return (actual < expected); +} + +static int eval_eq_node(node_t *node, fieldset_t *fields) +{ + node_t *literal = node->right_child; + int index = node->left_child->value.field.index; + char *expected, *actual; + switch (literal->type) { + case STRING: + expected = literal->value.string_literal; + actual = fs_get_string_by_index(fields, index); + return (strcmp(expected, actual) == 0); + break; + case INT: + return (fs_get_uint64_by_index(fields, index) == literal->value.int_literal); + break; + default: + printf("wat\n"); + break; + } + return 0; +} + +static int eval_lt_eq_node(node_t *node, fieldset_t *fields) +{ + return !(eval_gt_node(node, fields)); +} + +static int eval_gt_eq_node(node_t *node, fieldset_t *fields) +{ + return !(eval_lt_node(node, fields)); +} + + +/* Exposed functions */ + +node_t* make_op_node(enum operation op) +{ + node_t* node = alloc_node(); + node->type = OP; + node->value.op = op; + return node; +} + +node_t* make_field_node(char *fieldname) +{ + node_t *node = alloc_node(); + node->type = FIELD; + node->value.field.fieldname = fieldname; + return node; +} + +node_t* make_string_node(char *literal) +{ + node_t *node = alloc_node(); + node->type = STRING; + node->value.string_literal = literal; + return node; +} + +node_t* make_int_node(int literal) +{ + node_t *node = alloc_node(); + node->type = INT; + node->value.int_literal = literal; + return node; +} + +int evaluate_expression(node_t *root, fieldset_t *fields) +{ + if (!root) return 1; + switch (root->type) { /* XXX Not sure if runs */ + case FIELD: + case STRING: + case INT: + return 1; + case OP: + break; + } + switch (root->value.op) { + case GT: + return eval_gt_node(root, fields); + case LT: + return eval_lt_node(root, fields); + case EQ: + return eval_eq_node(root, fields); + case NEQ: + return (!eval_eq_node(root, fields)); + case LT_EQ: + return eval_lt_eq_node(root, fields); + case GT_EQ: + return eval_gt_eq_node(root, fields); + case AND: + return (evaluate_expression(root->left_child, fields) + && evaluate_expression(root->right_child, fields)); + case OR: + return (evaluate_expression(root->left_child, fields) + || evaluate_expression(root->right_child, fields)); + } + return 0; +} + +void print_expression(node_t *root) +{ + if (!root) return; + printf("%s", "( "); + print_expression(root->left_child); + switch (root->type) { + case OP: + printf(" %i ", root->value.op); + break; + case FIELD: + printf(" (%s", root->value.field.fieldname); + break; + case STRING: + printf("%s) ", root->value.string_literal); + break; + case INT: + printf(" %llu) ", (long long unsigned) root->value.int_literal); + break; + default: + break; + } + print_expression(root->right_child); + printf("%s", " )"); +} diff --git a/src/expression.h b/src/expression.h new file mode 100644 index 0000000..d65668d --- /dev/null +++ b/src/expression.h @@ -0,0 +1,50 @@ +#ifndef ZMAP_TREE_H +#define ZMAP_TREE_H + +#include "fieldset.h" + +#include +#include +#include +#include + +enum operation { + GT, LT, EQ, NEQ, AND, OR, LT_EQ, GT_EQ +}; + +enum node_type { + OP, FIELD, STRING, INT +}; + +struct field_id { + int index; + char *fieldname; +}; + +union node_value { + struct field_id field; + char *string_literal; + uint64_t int_literal; + enum operation op; +}; + +typedef struct node { + struct node *left_child; + struct node *right_child; + enum node_type type; + union node_value value; +} node_t; + +node_t* make_op_node(enum operation op); + +node_t* make_field_node(char *fieldname); + +node_t* make_string_node(char *literal); + +node_t* make_int_node(int literal); + +int evaluate_expression(node_t *root, fieldset_t *fields); + +void print_expression(node_t *root); + +#endif /* ZMAP_TREE_H */ \ No newline at end of file diff --git a/src/filter.c b/src/filter.c new file mode 100644 index 0000000..83bee43 --- /dev/null +++ b/src/filter.c @@ -0,0 +1,99 @@ +#include "filter.h" +#include "state.h" +#include "lexer.h" +#include "parser.h" +#include "expression.h" +#include "../lib/logger.h" + +#include + +extern int yyparse(); + +node_t *zfilter; + +static int validate_node(node_t *node, fielddefset_t *fields) +{ + int index, found = 0; + if (node->type == OP) { + // These end up getting validated later + if (node->value.op == AND || node->value.op == OR) { + return 1; + } + // Comparison node (=, >, <, etc.) + // Validate that the field (left child) exists in the fieldset + for (index = 0; index < fields->len; index++) { + if (fields->fielddefs[index].name) { + if (strcmp(fields->fielddefs[index].name, + node->left_child->value.field.fieldname) == 0) { + node->left_child->value.field.index = index; + found = 1; + break; + } + } + } + if (!found) { + fprintf(stderr, "Field '%s' does not exist\n", + node->left_child->value.field.fieldname); + return 0; + } + // Fieldname is fine, match the type. + switch (node->right_child->type) { + case STRING: + if (strcmp(fields->fielddefs[index].type, "string") == 0) { + return 1; + } else { + fprintf(stderr, "Field '%s' is not of type 'string'\n", + fields->fielddefs[index].name); + return 0; + } + case INT: + if (strcmp(fields->fielddefs[index].type, "int") == 0) { + return 1; + } else { + fprintf(stderr, "Field '%s' is not of type 'int'\n", + fields->fielddefs[index].name); + return 0; + } + default: + return 0; + } + } else { + // All non-op nodes are valid + return 1; + } + // Didn't validate + return 0; + +} + +int parse_filter_string(char *filter) +{ + YY_BUFFER_STATE buffer_state = yy_scan_string(filter); + int status = yyparse(); + yy_delete_buffer(buffer_state); + if (status) { + // Error + log_error("zmap", "Unable to parse filter string: '%s'", filter); + return 0; + } + zconf.filter.expression = zfilter; + return 1; +} + +/* + * 0 Valid + * -1 Invalid Field Name + * -2 Type Mismatch + */ +int validate_filter(node_t *root, fielddefset_t *fields) +{ + int valid; + if (!root) { + return 1; + } + valid = validate_node(root, fields); + if (!valid) { + return 0; + } + return (validate_filter(root->left_child, fields) && validate_filter(root->right_child, fields)); +} diff --git a/src/filter.h b/src/filter.h new file mode 100644 index 0000000..f42af6d --- /dev/null +++ b/src/filter.h @@ -0,0 +1,15 @@ +#ifndef ZMAP_FILTER_H +#define ZMAP_FILTER_H + +#include "expression.h" +#include "fieldset.h" + +struct output_filter { + node_t *expression; +}; + +int parse_filter_string(char *filter); + +int validate_filter(node_t *root, fielddefset_t *fields); + +#endif /* ZMAP_FILTER_H */ \ No newline at end of file diff --git a/src/lexer.l b/src/lexer.l new file mode 100644 index 0000000..cd884fa --- /dev/null +++ b/src/lexer.l @@ -0,0 +1,26 @@ +%{ +#pragma GCC diagnostic ignored "-Wredundant-decls" +#include +#include "parser.h" + +%} + +%option noinput +%option nounput +%% +[0-9]+ yylval.int_literal = (uint64_t) atoll(yytext); return T_NUMBER; +\n /* Ignore end of line */ +[ \t]+ /* Ignore whitespace */ +!= return T_NOT_EQ; +>= return T_GT_EQ; +"<=" return T_LT_EQ; +&& return T_AND; +"||" return T_OR; += return '='; +">" return '>'; +"<" return '<'; +"(" return '('; +")" return ')'; +[a-zA-Z][a-zA-Z0-9]+ yylval.string_literal = strdup(yytext); return T_FIELD; + +%% \ No newline at end of file diff --git a/src/parser.y b/src/parser.y new file mode 100644 index 0000000..6c44e17 --- /dev/null +++ b/src/parser.y @@ -0,0 +1,144 @@ +%{ +#include +#include +#include "expression.h" +#include "lexer.h" +#include "filter.h" + +void yyerror(const char *str) +{ + fprintf(stderr,"Parse error: %s\n",str); +} + +int yywrap() +{ + return 1; +} + +extern node_t *zfilter; + +%} + +%union { + int int_literal; + char *string_literal; + struct node *expr; +} + +%token '(' ')' T_AND T_OR +%token T_NUMBER +%token T_FIELD +%token T_NOT_EQ T_GT_EQ '>' '<' '=' T_LT_EQ + +%left T_OR +%left T_AND + +%type filter +%type number_filter +%type string_filter +%type filter_expr + + +%% + +expression: filter_expr + { + zfilter = $1; + } + + +filter_expr: + filter_expr T_OR filter_expr + { + $$ = make_op_node(OR); + $$->left_child = $1; + $$->right_child = $3; + } + | filter_expr T_AND filter_expr + { + $$ = make_op_node(AND); + $$->left_child = $1; + $$->right_child = $3; + } + | '(' filter_expr ')' + { + $$ = $2; + } + | filter + { + $$ = $1; + } + ; + +filter: number_filter + { + $$ = $1; + } + | string_filter + { + $$ = $1; + } + ; + +number_filter: T_FIELD '=' T_NUMBER + { + $$ = make_op_node(EQ); + $$->left_child = make_field_node($1); + $$->right_child = make_int_node($3); + } + | + T_FIELD '>' T_NUMBER + { + $$ = make_op_node(GT); + $$->left_child = make_field_node($1); + $$->right_child = make_int_node($3); + } + | + T_FIELD '<' T_NUMBER + { + $$ = make_op_node(LT); + $$->left_child = make_field_node($1); + $$->right_child = make_int_node($3); + } + | + T_FIELD T_NOT_EQ T_NUMBER + { + $$ = make_op_node(NEQ); + $$->left_child = make_field_node($1); + $$->right_child = make_int_node($3); + } + | + T_FIELD T_GT_EQ T_NUMBER + { + $$ = make_op_node(GT_EQ); + $$->left_child = make_field_node($1); + $$->right_child = make_int_node($3); + } + | + T_FIELD T_LT_EQ T_NUMBER + { + $$ = make_op_node(LT_EQ); + $$->left_child = make_field_node($1); + $$->right_child = make_int_node($3); + } + ; + +string_filter: + T_FIELD '=' T_FIELD + { + $$ = make_op_node(EQ); + $$->left_child = make_field_node($1); + $$->right_child = make_string_node($3); + } + | + T_FIELD T_NOT_EQ T_FIELD + { + $$ = make_op_node(NEQ); + $$->left_child = make_field_node($1); + $$->right_child = make_string_node($3); + } + ; + +%% + + diff --git a/src/recv.c b/src/recv.c index bb52fcd..6dff8c6 100644 --- a/src/recv.c +++ b/src/recv.c @@ -35,6 +35,7 @@ #include "state.h" #include "validate.h" #include "fieldset.h" +#include "expression.h" #include "probe_modules/probe_modules.h" #include "output_modules/output_modules.h" @@ -129,6 +130,10 @@ void packet_cb(u_char __attribute__((__unused__)) *user, if (is_repeat && zconf.filter_duplicates) { goto cleanup; } + if (!evaluate_expression(zconf.filter.expression, fs)) { + goto cleanup; + } + o = translate_fieldset(fs, &zconf.fsconf.translation); if (zconf.output_module && zconf.output_module->process_ip) { zconf.output_module->process_ip(o); diff --git a/src/state.h b/src/state.h index bb4181a..6ccf4a4 100644 --- a/src/state.h +++ b/src/state.h @@ -15,6 +15,7 @@ #include "types.h" #include "fieldset.h" +#include "filter.h" #ifndef STATE_H #define STATE_H @@ -81,6 +82,7 @@ struct state_conf { int destination_cidrs_len; char *raw_output_fields; char **output_fields; + struct output_filter filter; struct fieldset_conf fsconf; int output_fields_len; int dryrun; diff --git a/src/whitelist b/src/whitelist new file mode 100644 index 0000000..aab8f1d --- /dev/null +++ b/src/whitelist @@ -0,0 +1 @@ +192.168.1.0/24 diff --git a/src/zmap.c b/src/zmap.c index b4f564e..7cf1079 100644 --- a/src/zmap.c +++ b/src/zmap.c @@ -36,6 +36,7 @@ #include "state.h" #include "monitor.h" #include "get_gateway.h" +#include "filter.h" #include "output_modules/output_modules.h" #include "probe_modules/probe_modules.h" @@ -514,6 +515,18 @@ int main(int argc, char *argv[]) &zconf.fsconf.defs, zconf.output_fields, zconf.output_fields_len); + // Parse and validate the output filter, if any + if (args.output_filter_arg) { + // Run it through yyparse to build the expression tree + if (!parse_filter_string(args.output_filter_arg)) { + log_fatal("zmap", "Unable to parse filter expression"); + } + + // Check the fields used against the fieldset in use + if (!validate_filter(zconf.filter.expression, &zconf.fsconf.defs)) { + log_fatal("zmap", "Invalid filter"); + } + } SET_BOOL(zconf.dryrun, dryrun); SET_BOOL(zconf.quiet, quiet); diff --git a/src/zopt.ggo b/src/zopt.ggo index 48844b6..d1852aa 100644 --- a/src/zopt.ggo +++ b/src/zopt.ggo @@ -94,6 +94,9 @@ option "probe-args" - "Arguments to pass to probe module" option "output-args" - "Arguments to pass to output module" typestr="args" optional string +option "output-filter" - "Read a file containing an output filter on the first line" + typestr="filename" + optional string option "list-output-modules" - "List available output modules" optional option "list-probe-modules" - "List available probe modules"