#!/usr/bin/perl -w
use strict;

# like du, but lets you filter out files & directories less than x bytes.
# default filtering is 10M, default depth is 10.
# also, by default the files are sorted by size from greatest to least.

my @ts = ();

for my $file (@ARGV) {
	push $ts, handle_path($file);
}

print_paths(\@ts, 0);

=name handle_path

given an input path returns a tuple [name, size, contents]
where contents is a list-ref of the same sort of tuples for all contents of path
(assuming path is a directory). If path is not a directory, contents is [].

=cut
sub handle_path {
	my $path = shift;
	my $total = 0;
	my @ret = ();
	my @contents = ();
	# First build up the "contents" tuple-list.
	# For each subdir, call handle_path.
	if (-d $path) {
		opendir DIR, $path;
		while ($subdir = <DIR>) {
			my $t = handle_path("$path/$subdir");
			$total += $t->[1];
			push @contents, $t;
		}
		closedir DIR;
	} else {
		$total = lstat($path)[7];
	}

	$ret[0] = $path;
	$ret[1] = $total;
	$ret[2] = \@contents;
	return \@ret;
}

=name print_paths

Takes an array-ref. Sorts it by filesize and prints each item, recursing when it has children.

=cut
sub print_paths {
	my $ts = shift;
	my $depth = shift;
	my @sorted = reverse sort { $a->[1] <=> $b->[1] } @$ts;
	for my $t (@sorted) {
		my $ab = sprintf("%4s", abbrev($t->[1]));
		print ("\t" x $depth), $ab, "\t", $t->[0], "\n";
		print_paths($t->[2], $depth + 1) if $t->[2];
	}
}

sub abbrev {
	my $val = shift;
	# TODO: don't include a . unless < 10
	if ($val >= 1000000000000000) {
		return sprintf("%0.1fP", $val / 1000000000000000);
	} elsif ($val >= 1000000000000) {
		return sprintf("%0.1fT", $val / 1000000000000);
	} elsif ($val >= 1000000000) {
		return sprintf("%0.1fG", $val / 1000000000);
	} elsif ($val >= 1000000) {
		return sprintf("%0.1fM", $val / 1000000);
	} elsif ($val >= 1000) {
		return sprintf("%0.1fK", $val / 1000);
	} else {
		return "${val}B";
	}
}

