blob: ed993111fc58431de4a636eb310529ce2def6dab [file] [log] [blame]
 #!/usr/local/bin/perl # ******************************************************************** # * COPYRIGHT: # * Copyright (c) 2006, International Business Machines Corporation and # * others. All Rights Reserved. # ******************************************************************** package Dataset; use Statistics::Descriptive; use Statistics::Distributions; use strict; # Create a new Dataset with the given data. sub new { my (\$class) = shift; my \$self = bless { _data => \@_, _scale => 1.0, _mean => 0.0, _error => 0.0, }, \$class; my \$n = @_; if (\$n >= 1) { my \$stats = Statistics::Descriptive::Full->new(); \$stats->add_data(@{\$self->{_data}}); \$self->{_mean} = \$stats->mean(); if (\$n >= 2) { # Use a t distribution rather than Gaussian because (a) we # assume an underlying normal dist, (b) we do not know the # standard deviation -- we estimate it from the data, and (c) # we MAY have a small sample size (also works for large n). my \$t = Statistics::Distributions::tdistr(\$n-1, 0.005); \$self->{_error} = \$t * \$stats->standard_deviation(); } } \$self; } # Set a scaling factor for all data; 1.0 means no scaling. # Scale must be > 0. sub setScale { my (\$self, \$scale) = @_; \$self->{_scale} = \$scale; } # Multiply the scaling factor by a value. sub scaleBy { my (\$self, \$a) = @_; \$self->{_scale} *= \$a; } # Return the mean. sub getMean { my \$self = shift; return \$self->{_mean} * \$self->{_scale}; } # Return a 99% error based on the t distribution. The dataset # is desribed as getMean() +/- getError(). sub getError { my \$self = shift; return \$self->{_error} * \$self->{_scale}; } # Divide two Datasets and return a new one, maintaining the # mean+/-error. The new Dataset has no data points. sub divide { my \$self = shift; my \$rhs = shift; my \$minratio = (\$self->{_mean} - \$self->{_error}) / (\$rhs->{_mean} + \$rhs->{_error}); my \$maxratio = (\$self->{_mean} + \$self->{_error}) / (\$rhs->{_mean} - \$rhs->{_error}); my \$result = Dataset->new(); \$result->{_mean} = (\$minratio + \$maxratio) / 2; \$result->{_error} = \$result->{_mean} - \$minratio; \$result->{_scale} = \$self->{_scale} / \$rhs->{_scale}; \$result; } # subtracts two Datasets and return a new one, maintaining the # mean+/-error. The new Dataset has no data points. sub subtract { my \$self = shift; my \$rhs = shift; my \$result = Dataset->new(); \$result->{_mean} = \$self->{_mean} - \$rhs->{_mean}; \$result->{_error} = \$self->{_error} + \$rhs->{_error}; \$result->{_scale} = \$self->{_scale}; \$result; } # adds two Datasets and return a new one, maintaining the # mean+/-error. The new Dataset has no data points. sub add { my \$self = shift; my \$rhs = shift; my \$result = Dataset->new(); \$result->{_mean} = \$self->{_mean} + \$rhs->{_mean}; \$result->{_error} = \$self->{_error} + \$rhs->{_error}; \$result->{_scale} = \$self->{_scale}; \$result; } # Divides a dataset by a scalar. # The new Dataset has no data points. sub divideByScalar { my \$self = shift; my \$s = shift; my \$result = Dataset->new(); \$result->{_mean} = \$self->{_mean}/\$s; \$result->{_error} = \$self->{_error}/\$s; \$result->{_scale} = \$self->{_scale}; \$result; } # Divides a dataset by a scalar. # The new Dataset has no data points. sub multiplyByScalar { my \$self = shift; my \$s = shift; my \$result = Dataset->new(); \$result->{_mean} = \$self->{_mean}*\$s; \$result->{_error} = \$self->{_error}*\$s; \$result->{_scale} = \$self->{_scale}; \$result; } 1;