#!/usr/bin/perl # # Generated a percentile chart for a series of numbers. # # Tom Moertel # 6 Dec 1999 # # Docs at __END__. # # $Id: percentile,v 1.7 2005/08/10 17:30:26 thor Exp $ use strict; use warnings; use POSIX qw(ceil floor); my $pct_increment = 10; # increment by 10% by default while (@ARGV && $ARGV[0] =~ /^-/) { $ARGV[0] =~ /^-?-i/ && do {$pct_increment = (shift, shift) ; next}; $ARGV[0] =~ /^-?-b/ && do {shift, $pct_increment = 100./shift ; next}; print STDERR "$0: bad option ", $ARGV[0], "\n"; exit 1; } undef $/; # read data in one, big gulp my @data = sort {$a <=> $b} grep {/\d/} split /[^-0-9.eE]+/, scalar <>; my $count = @data; unless ($count) { print STDERR "No data provided; end of analysis.\n"; exit 1; } my $max = $data[-1]; my $scaling = 50.0 / $max; for (my $percent = 0; $percent <= 100; $percent += $pct_increment) { graph_cutoff($percent/100.0); } # helpers ===================================================================== sub graph_cutoff { my $portion = $_[0]; my $cutoff = percentile($portion); my $spaces = $scaling * $cutoff - 1; my $graphln = sprintf "%3.0f%% |", 100 * $portion; $graphln .= "-" x ($spaces - 1) if $spaces > 1; $graphln .= "*"; printf "%-56s % 10.3f\n", $graphln, $cutoff; } sub percentile { # given a portion P, return X such that the portion of # values in the data that are <= X is P my $P = $_[0]; my $pos = $P * ($count - 1); my $pos_low = floor($pos); my $pos_high = ceil($pos); my $X_low = $data[$pos_low]; my $X_high = $data[$pos_high]; return $X_low + ($X_high - $X_low) * ($pos - $pos_low); } __END__ =head1 NAME percentile - computes summary statistics for a set of numbers =head1 SYNOPSIS B [B<--increment> I] [I...] =head1 DESCRIPTION This program will generate a percentile plot for the set of numbers in I. The program will also attempt to ignore any non-numeric garbage that may be in the file. The output follows this format: 0% |* 0.000 10% |* 2.464 20% |* 2.614 30% |* 2.874 40% |* 3.079 50% |* 3.254 60% |* 3.475 70% |-* 3.892 80% |---------------------* 28.167 90% |---------------------------------* 43.472 100% |------------------------------------------------* 60.427 You can change the increment between percentages by using the B<--increment> option. The default percentage increment is 10. =head1 CAVEATS The input set must have the following properties in order to make use of this program: =over 4 =item * All of the elements must be non-negative. =item * The maximum value in the set must be greater than zero. =back (This program was originally written to aid in the analysis of transaction timings, which are necessarily non-negative.) =head1 AUTHOR Tom Moertel 6 Dec 1999 $Id: percentile,v 1.7 2005/08/10 17:30:26 thor Exp $