| #!/usr/local/bin/perl |
| # ******************************************************************** |
| # * COPYRIGHT: |
| # * Copyright (c) 2006, International Business Machines Corporation and |
| # * others. All Rights Reserved. |
| # ******************************************************************** |
| |
| package Dataset; |
| use Statistics::Descriptive; |
| use Statistics::Distributions; |
| use strict; |
| |
| # Create a new Dataset with the given data. |
| sub new { |
| my ($class) = shift; |
| my $self = bless { |
| _data => \@_, |
| _scale => 1.0, |
| _mean => 0.0, |
| _error => 0.0, |
| }, $class; |
| |
| my $n = @_; |
| |
| if ($n >= 1) { |
| my $stats = Statistics::Descriptive::Full->new(); |
| $stats->add_data(@{$self->{_data}}); |
| $self->{_mean} = $stats->mean(); |
| |
| if ($n >= 2) { |
| # Use a t distribution rather than Gaussian because (a) we |
| # assume an underlying normal dist, (b) we do not know the |
| # standard deviation -- we estimate it from the data, and (c) |
| # we MAY have a small sample size (also works for large n). |
| my $t = Statistics::Distributions::tdistr($n-1, 0.005); |
| $self->{_error} = $t * $stats->standard_deviation(); |
| } |
| } |
| |
| $self; |
| } |
| |
| # Set a scaling factor for all data; 1.0 means no scaling. |
| # Scale must be > 0. |
| sub setScale { |
| my ($self, $scale) = @_; |
| $self->{_scale} = $scale; |
| } |
| |
| # Multiply the scaling factor by a value. |
| sub scaleBy { |
| my ($self, $a) = @_; |
| $self->{_scale} *= $a; |
| } |
| |
| # Return the mean. |
| sub getMean { |
| my $self = shift; |
| return $self->{_mean} * $self->{_scale}; |
| } |
| |
| # Return a 99% error based on the t distribution. The dataset |
| # is desribed as getMean() +/- getError(). |
| sub getError { |
| my $self = shift; |
| return $self->{_error} * $self->{_scale}; |
| } |
| |
| # Divide two Datasets and return a new one, maintaining the |
| # mean+/-error. The new Dataset has no data points. |
| sub divide { |
| my $self = shift; |
| my $rhs = shift; |
| |
| my $minratio = ($self->{_mean} - $self->{_error}) / |
| ($rhs->{_mean} + $rhs->{_error}); |
| my $maxratio = ($self->{_mean} + $self->{_error}) / |
| ($rhs->{_mean} - $rhs->{_error}); |
| |
| my $result = Dataset->new(); |
| $result->{_mean} = ($minratio + $maxratio) / 2; |
| $result->{_error} = $result->{_mean} - $minratio; |
| $result->{_scale} = $self->{_scale} / $rhs->{_scale}; |
| $result; |
| } |
| |
| # subtracts two Datasets and return a new one, maintaining the |
| # mean+/-error. The new Dataset has no data points. |
| sub subtract { |
| my $self = shift; |
| my $rhs = shift; |
| |
| my $result = Dataset->new(); |
| $result->{_mean} = $self->{_mean} - $rhs->{_mean}; |
| $result->{_error} = $self->{_error} + $rhs->{_error}; |
| $result->{_scale} = $self->{_scale}; |
| $result; |
| } |
| |
| # adds two Datasets and return a new one, maintaining the |
| # mean+/-error. The new Dataset has no data points. |
| sub add { |
| my $self = shift; |
| my $rhs = shift; |
| |
| my $result = Dataset->new(); |
| $result->{_mean} = $self->{_mean} + $rhs->{_mean}; |
| $result->{_error} = $self->{_error} + $rhs->{_error}; |
| $result->{_scale} = $self->{_scale}; |
| $result; |
| } |
| |
| # Divides a dataset by a scalar. |
| # The new Dataset has no data points. |
| sub divideByScalar { |
| my $self = shift; |
| my $s = shift; |
| |
| my $result = Dataset->new(); |
| $result->{_mean} = $self->{_mean}/$s; |
| $result->{_error} = $self->{_error}/$s; |
| $result->{_scale} = $self->{_scale}; |
| $result; |
| } |
| |
| # Divides a dataset by a scalar. |
| # The new Dataset has no data points. |
| sub multiplyByScalar { |
| my $self = shift; |
| my $s = shift; |
| |
| my $result = Dataset->new(); |
| $result->{_mean} = $self->{_mean}*$s; |
| $result->{_error} = $self->{_error}*$s; |
| $result->{_scale} = $self->{_scale}; |
| $result; |
| } |
| |
| 1; |