blob: 0c502f2e472ec1936ba70a098c84ac4270b7f987 [file] [log] [blame]
#/**
# * © 2016 and later: Unicode, Inc. and others.
# * License & terms of use: http://www.unicode.org/copyright.html
# *******************************************************************************
# * Copyright (C) 2002-2004, International Business Machines Corporation and *
# * others. All Rights Reserved. *
# *******************************************************************************
# */
package Dataset;
use Statistics::Descriptive;
use Statistics::Distributions;
use strict;
# Create a new Dataset with the given data.
sub new {
my ($class) = shift;
my $self = bless {
_data => \@_,
_scale => 1.0,
_mean => 0.0,
_error => 0.0,
}, $class;
my $n = @_;
if ($n >= 1) {
my $stats = Statistics::Descriptive::Full->new();
$stats->add_data(@{$self->{_data}});
$self->{_mean} = $stats->mean();
if ($n >= 2) {
# Use a t distribution rather than Gaussian because (a) we
# assume an underlying normal dist, (b) we do not know the
# standard deviation -- we estimate it from the data, and (c)
# we MAY have a small sample size (also works for large n).
my $t = Statistics::Distributions::tdistr($n-1, 0.005);
$self->{_error} = $t * $stats->standard_deviation();
}
}
$self;
}
# Set a scaling factor for all data; 1.0 means no scaling.
# Scale must be > 0.
sub setScale {
my ($self, $scale) = @_;
$self->{_scale} = $scale;
}
# Multiply the scaling factor by a value.
sub scaleBy {
my ($self, $a) = @_;
$self->{_scale} *= $a;
}
# Return the mean.
sub getMean {
my $self = shift;
return $self->{_mean} * $self->{_scale};
}
# Return a 99% error based on the t distribution. The dataset
# is described as getMean() +/- getError().
sub getError {
my $self = shift;
return $self->{_error} * $self->{_scale};
}
# Divide two Datasets and return a new one, maintaining the
# mean+/-error. The new Dataset has no data points.
sub divide {
my $self = shift;
my $rhs = shift;
my $minratio = ($self->{_mean} - $self->{_error}) /
($rhs->{_mean} + $rhs->{_error});
my $maxratio = ($self->{_mean} + $self->{_error}) /
($rhs->{_mean} - $rhs->{_error});
my $result = Dataset->new();
$result->{_mean} = ($minratio + $maxratio) / 2;
$result->{_error} = $result->{_mean} - $minratio;
$result->{_scale} = $self->{_scale} / $rhs->{_scale};
$result;
}
1;