Variation on C Password Dictionary Variator

2010-02-03

So I was thinking, since my password app generates too many possibilities, what if we limited its output so that any given letter was translated into the same variant each time it appears in the word? In other words, if your word was “aa” and your leet.txt file had “a A @,” then you’d get just “aa,” “AA,” and “@@,” not “aA,” “a@,” “Aa,” etc. So I made that change, and it seems so reasonable, I made it the default. To get all combinations, use the “-a” option. Here is the code:

/**
 * pw-vary.c - implements pw-vary, a program to obfuscate passwords by replacing letters with leet.
 *
 * Makes a lot of assumptions about limits. Also assumes ASCII strings. Pretty lazy!
 *
 * Copyright (c) 2009 by Paul A Jungwirth
 */
#include
#include
#include

#define LINE_LEN		1024
#define MAX_VARIANTS	20
#define LONGEST_RESULT	4092

#define is_whitespace(c) (c == ' ' || c == '\t' || c == '\n' || c == '\r')
#define set_or_die(v, s) if (!v) { perror(s); exit(EXIT_FAILURE); }
#define unset_or_die(v, s) if (v) { perror(s); exit(EXIT_FAILURE); }

#ifdef DEBUG
#define DEBUGGING(s) s;
#else
#define DEBUGGING(s)
#endif

typedef struct t_variant {
	char letter;
	char *variants[MAX_VARIANTS];
	int count;
	char line[LINE_LEN];
	int maxlen;
} t_variant;

static t_variant *init_variant(char letter) {
	t_variant* ret;

	ret = (t_variant*)malloc(sizeof(t_variant));
	set_or_die(ret, NULL);

	ret->letter = letter;
	ret->count = 1;
	ret->maxlen = 1;
	ret->variants[0] = ret->line;

	return ret;
}

static char *find_line_start(char *line) {
	int i = 0;
	char c;

	c = line[i];
	while (c) {
		if (!is_whitespace(c)) return &line[i];
		c = line[i];
		i++;
	}

	return NULL;
}

static void read_leet(t_variant **variants, FILE *f) {
	char line[LINE_LEN];
	char *line_start;
	int len;
	int i, j;
	char first_letter, c;
	int in_whitespace;
	t_variant *vs;

	while (fgets(line, LINE_LEN, f)) {
		// strip leading white space and check for contents
		line_start = find_line_start(line);
		if (!line_start) continue;

		// Create a new entry in variants for the letter.
		first_letter = line_start[0];
		variants[first_letter] = init_variant(first_letter);
		vs = variants[first_letter];

		// List all the possible variants.
		strncpy(vs->line, line_start, LINE_LEN);
		line_start = vs->line;
		len = strlen(line_start);
		i = j = 1;
		in_whitespace = 1;
		while (i < len && j variants[j] = &line_start[i];
					vs->count++;
					in_whitespace = 0;
					j++;
				}
			}
			i++;
		}

		// find the longest variant for this letter:
		for (i = 0; vs->variants[i]; i++) {
			len = strlen(vs->variants[i]);
			if (len > vs->maxlen) vs->maxlen = len;
		}
	}
	unset_or_die(ferror(f), "reading leet file");
}

/*@unused@*/
static void print_variants(t_variant **variants) {
	int i, j;

	for (i = 0; i variants[j]) {
				puts(variants[i]->variants[j]);
				j++;
			}
			puts("====");
		}
	}
}

/**
 * do_word_all and do_word_selective:
 * Each iterates through all the variant combinations for the given word.
 *
 * do_word_all does this with two arrays, each the same length as the
 * original string:
 *   - maxes holds the number of variants for each letter in the string.
 *   - indices holds the current variant number to be printed. We keep
 *     incrementing this array, "carrying" when necessary, just like
 *     counting.
 * This algorithm tests all possible combinations.
 *
 * do_word_selective causes a slightly more complex variation:
 * instead of testing all combinations, we assume that all instances of a
 * given letter use written with the same variant, cutting down on the
 * number of combinations to print. In this version, we rely on three arrays:
 *   - maxes & indices are only as long as the number of *unique* letters.
 *     We use them for "counting" just as before.
 *   - poses has one entry for each char in the string, and it stores
 *     an int for indexing into the two other arrays.
 */

static void do_word_selective(char *buffer, t_variant **variants, char *orig) {
	char c;
	char *end;
	int i, j, k;
	int len, newlen, len2, maxes_len;
	int *maxes;
	int *indices;
	int *poses;
	t_variant *vs;

	orig = find_line_start(orig);
	if (orig) {
		end = strpbrk(orig, "\n\r");
		if (end) end[0] = '';
		// puts(orig);

		// initialize maxes & indices
		// TODO: pass these as buffers to speed things up
		len = strlen(orig);
		maxes = (int*)malloc(sizeof(int) * len);
		set_or_die(maxes, NULL);
		indices = (int*)calloc(len, sizeof(int));	// start with all zeroes.
		set_or_die(indices, NULL);
		poses = (int*)calloc(len, sizeof(int));
		set_or_die(poses, NULL);

		newlen = 1; // start with 1 for the .
		j = 0;
		for (i = 0; i count;
				} else {
					maxes[j] = 1;
				}
				poses[i] = j;
				j++;
			} else {
				// point wherever the first letter points
				poses[i] = poses[k];
			}
			newlen += maxes[poses[i]];
		}
		maxes_len = j;

		// don't proceed if newlen is greater than LONGEST_RESULT (improbable)
		if (newlen >= LONGEST_RESULT) {
			fprintf(stderr, "Result for \"%s\" too long: %d characters\n", orig, newlen);
		} else {
			j = 0;
			while (j >= 0) {
				// Construct and print the string using our current position, represented by indices.
				buffer[0] = '';
				for (i = 0; i variants[indices[poses[i]]]);
					} else {
						len2 = strlen(buffer);
						buffer[len2] = orig[i];
						buffer[len2 + 1] = '';
					}
				}
				puts(buffer);

				// Now add one to our position.
				j = maxes_len - 1;
				do {
					indices[j] = (indices[j] + 1) % maxes[j];
				} while (indices[j] == 0 && j-- > 0); // short-circuit: j only decremented when we carry

				// when j is -1, we've overflowed; hence we've printed all combinations.
			}
		}

		free(maxes);
		free(indices);
		free(poses);
	}
}

static void do_word_all(char *buffer, t_variant **variants, char *orig) {
	char *end;
	int i, j;
	int len, newlen, len2;
	int *maxes;
	int *indices;
	t_variant *vs;

	orig = find_line_start(orig);
	if (orig) {
		end = strpbrk(orig, "\n\r");
		if (end) end[0] = '';
		// puts(orig);

		// initialize maxes & indices
		// TODO: pass these as buffers to speed things up
		len = strlen(orig);
		maxes = (int*)malloc(sizeof(int) * len);
		set_or_die(maxes, NULL);
		indices = (int*)calloc(len, sizeof(int));	// start with all zeroes.
		set_or_die(indices, NULL);

		newlen = 1; // start with 1 for the .
		for (i = 0; i count;
				newlen += vs->maxlen;
			} else {
				maxes[i] = 1;
				newlen++;
			}
		}

		// don't proceed if newlen is greater than LONGEST_RESULT (improbable)
		if (newlen >= LONGEST_RESULT) {
			fprintf(stderr, "Result for \"%s\" too long: %d characters\n", orig, newlen);
		} else {
			j = 0;
			while (j >= 0) {
				// Construct and print the string using our current position, represented by indices.
				buffer[0] = '';
				for (i = 0; i variants[indices[i]]);
					} else {
						len2 = strlen(buffer);
						buffer[len2] = orig[i];
						buffer[len2 + 1] = '';
					}
				}
				puts(buffer);

				// Now add one to our position.
				j = len - 1;
				do {
					indices[j] = (indices[j] + 1) % maxes[j];
				} while (indices[j] == 0 && j-- > 0); // short-circuit: j only decremented when we carry

				// when j is -1, we've overflowed; hence we've printed all combinations.
			}
		}

		free(maxes);
		free(indices);
	}
}

static void do_all_words(t_variant **variants, FILE *f, int do_all) {
	char line[1024];
	char buffer[LONGEST_RESULT];

	while (fgets(line, 1024, f)) {
		if (do_all) do_word_all(buffer, variants, line);
		else do_word_selective(buffer, variants, line);
	}
	unset_or_die(ferror(f), "reading file");
}

int main(int argc, char **argv) {
	t_variant *variants[255];		/* 255 pointers, each to an array of 20 pointers to strings. */
	FILE *f;
	int i;
	int arg_start, do_all;

	// Argument processing:
	if (argc > 1 && (strcmp(argv[1], "-a") == 0)) {
		do_all = 1;
		arg_start = 2;
	} else {
		do_all = 0;
		arg_start = 1;
	}


	bzero(variants, sizeof(variants));

	f = fopen("leet.txt", "r");
	set_or_die(f, "opening leet file");
	read_leet(variants, f);
	unset_or_die(fclose(f), "closing leet file");
	DEBUGGING(print_variants(variants));

	// read words from argv or, if no args, from stdin
	if (argc > arg_start) {
		for (i = arg_start; i < argc; i++) {
			f = fopen(argv[i], &quot;r&quot;);
			set_or_die(f, argv[i]);
			do_all_words(variants, f, do_all);
			unset_or_die(fclose(f), argv[i]);
		}
	} else {
		do_all_words(variants, stdin, do_all);
	}

	return EXIT_SUCCESS;
}
blog comments powered by Disqus Prev: Decline in Inverse and Leveraged ETFs Next: C Version of Password Dictionary Variator