C Version of Password Dictionary Variator

2010-02-03

I wrote a program to expand password dictionaries the other day, but due to its recursive algorithm it choked on words greater than 11 characters long, such as “electroencephalograph’s,” the longest word in my /usr/share/dict/words. So now I’ve tweaked the algorithm and also reimplemented the whole thing in C. It’s not real pretty C, but I tried to aim more for performance. It doesn’t crash on “electroencephalograph’s” anymore, but generating just the combinations for that one word (all 61,917,364,224 of them) took 43,499 seconds of CPU time (42653.61 user, 846.95 system). And that’s not even the time-consuming part! Just imagine running that many combinations through aircrack, all to test a single word.

Anyway, here is the code:

/**
 * pw-vary.c - implements pw-vary, a program to obfuscate passwords by replacing letters with leet.
 *
 * Makes a lot of assumptions about limits. Also assumes ASCII strings. Pretty lazy!
 *
 * Copyright (c) 2009 by Paul A Jungwirth
 */
#include
#include
#include

#define LINE_LEN        1024
#define MAX_VARIANTS    20
#define LONGEST_RESULT  4092

#define is_whitespace(c) (c == ' ' || c == '\t' || c == '\n' || c == '\r')
#define set_or_die(v, s) if (!v) { perror(s); exit(EXIT_FAILURE); }
#define unset_or_die(v, s) if (v) { perror(s); exit(EXIT_FAILURE); }

#ifdef DEBUG
#define DEBUGGING(s) s;
#else
#define DEBUGGING(s)
#endif

typedef struct t_variant {
  char letter;
  char *variants[MAX_VARIANTS];
  int count;
  char line[LINE_LEN];
  int maxlen;
} t_variant;

static t_variant *init_variant(char letter) {
  t_variant* ret;

  ret = (t_variant*)malloc(sizeof(t_variant));
  set_or_die(ret, NULL);

  ret->letter = letter;
  ret->count = 1;
  ret->maxlen = 1;
  ret->variants[0] = ret->line;

  return ret;
}

static char *find_line_start(char *line) {
  int i = 0;
  char c;

  c = line[i];
  while (c) {
    if (!is_whitespace(c)) return &line[i];
    c = line[i];
    i++;
  }

  return NULL;
}

static void read_leet(t_variant **variants, FILE *f) {
  char line[LINE_LEN];
  char *line_start;
  int len;
  int i, j;
  char first_letter, c;
  int in_whitespace;
  t_variant *vs;

  while (fgets(line, LINE_LEN, f)) {
    // strip leading white space and check for contents
    line_start = find_line_start(line);
    if (!line_start) continue;

    // Create a new entry in variants for the letter.
    first_letter = line_start[0];
    variants[first_letter] = init_variant(first_letter);
    vs = variants[first_letter];

    // List all the possible variants.
    strncpy(vs->line, line_start, LINE_LEN);
    line_start = vs->line;
    len = strlen(line_start);
    i = j = 1;
    in_whitespace = 1;
    while (i < len && j variants[j] = &line_start[i];
          vs->count++;
          in_whitespace = 0;
          j++;
        }
      }
      i++;
    }

    // find the longest variant for this letter:
    for (i = 0; vs->variants[i]; i++) {
      len = strlen(vs->variants[i]);
      if (len > vs->maxlen) vs->maxlen = len;
    }
  }
  unset_or_die(ferror(f), "reading leet file");
}

/*@unused@*/
static void print_variants(t_variant **variants) {
  int i, j;

  for (i = 0; i variants[j]) {
        puts(variants[i]->variants[j]);
        j++;
      }
      puts("====");
    }
  }
}

static void do_word(char *buffer, t_variant **variants, char *orig) {
  char *end;
  int i, j;
  int len, newlen, len2;
  int *maxes;
  int *indices;
  t_variant *vs;

  orig = find_line_start(orig);
  if (orig) {
    end = strpbrk(orig, "\n\r");
    if (end) end[0] = '';
    // puts(orig);

    // initialize maxes & indices
    // TODO: pass these as buffers to speed things up
    len = strlen(orig);
    maxes = (int*)malloc(sizeof(int) * len);
    set_or_die(maxes, NULL);
    indices = (int*)calloc(len, sizeof(int));  // start with all zeroes.
    set_or_die(indices, NULL);

    newlen = 1; // start with 1 for the .
    for (i = 0; i count;
        newlen += vs->maxlen;
      } else {
        maxes[i] = 1;
        newlen++;
      }
    }

    // don't proceed if newlen is greater than LONGEST_RESULT (improbable)
    if (newlen >= LONGEST_RESULT) {
      fprintf(stderr, "Result for \"%s\" too long: %d characters\n", orig, newlen);
    } else {
      j = 0;
      while (j >= 0) {
        // Construct and print the string using our current position, represented by indices.
        buffer[0] = '';
        for (i = 0; i variants[indices[i]]);
          } else {
            len2 = strlen(buffer);
            buffer[len2] = orig[i];
            buffer[len2 + 1] = '';
          }
        }
        puts(buffer);

        // Now add one to our position.
        j = len - 1;
        do {
          indices[j] = (indices[j] + 1) % maxes[j];
        } while (indices[j] == 0 && j-- > 0); // short-circuit: j only decremented when we carry

        // when j is -1, we've overflowed; hence we've printed all combinations.
      }
    }

    free(maxes);
    free(indices);
  }
}

static void do_all_words(t_variant **variants, FILE *f) {
  char line[1024];
  char buffer[LONGEST_RESULT];

  while (fgets(line, 1024, f)) {
    do_word(buffer, variants, line);
  }
  unset_or_die(ferror(f), "reading file");
}

int main(int argc, char **argv) {
  t_variant *variants[255];    /* 255 pointers, each to an array of 20 pointers to strings. */
  FILE *f;
  int i;

  bzero(variants, sizeof(variants));

  f = fopen("leet.txt", "r");
  set_or_die(f, "opening leet file");
  read_leet(variants, f);
  unset_or_die(fclose(f), "closing leet file");
  DEBUGGING(print_variants(variants));

  // read words from argv or, if no args, from stdin
  if (argc > 1) {
    for (i = 1; i < argc; i++) {
      f = fopen(argv[i], &quot;r&quot;);
      set_or_die(f, argv[i]);
      do_all_words(variants, f);
      unset_or_die(fclose(f), argv[i]);
    }
  } else {
    do_all_words(variants, stdin);
  }

  return EXIT_SUCCESS;
}
blog comments powered by Disqus Prev: Variation on C Password Dictionary Variator Next: Password Dictionary Variations