package Statistics::PointEstimation; use strict; use Carp; use vars qw($VERSION @ISA $AUTOLOAD); use Statistics::Distributions qw(chisqrdistr tdistr fdistr udistr uprob chisqrprob tprob fprob); use Statistics::Descriptive; use POSIX; @ISA= qw (Statistics::Descriptive::Full); $VERSION = '0.01'; my %confidence_interval= #data related to confidence interval ( "significance" => undef, "alpha" => undef, "df" =>undef, "standard_error" => undef, "t_value" =>undef, "t_statistic" =>undef, "t_prob" =>undef, "delta" =>undef, "upper_clm" => undef, "lower_clm" =>undef, "valid" =>undef ); sub new{ my $proto = shift; my $class = ref($proto) || $proto; my $self = $class->SUPER::new(); my %confidence=%confidence_interval; $self->{confidence}=\%confidence; bless ($self, $class); return $self; } sub compute_confidence_interval{ my $self=shift; croak "sample size must be >1 to compute the confidence interval \n" if($self->count()<=1); $self->{confidence}->{'significance'}=95 if (!defined($self->{confidence}->{'significance'})); $self->{confidence}->{df}=$self->count()-1; $self->{confidence}->{alpha}=(100-$self->{confidence}->{significance})/2; $self->{confidence}->{alpha}/=100; $self->{confidence}->{standard_error}=$self->standard_deviation()/sqrt($self->count()); $self->{confidence}->{t_value}=abs tdistr($self->{confidence}->{df},$self->{confidence}->{alpha}); $self->{confidence}->{delta}=$self->{confidence}->{t_value}*$self->{confidence}->{standard_error}; $self->{confidence}->{upper_clm}=$self->mean() +$self->{confidence}->{delta}; $self->{confidence}->{lower_clm}=$self->mean() -$self->{confidence}->{delta}; $self->{confidence}->{t_statistic}=$self->{confidence}->{standard_error} ?($self->mean()/$self->{confidence}->{standard_error}):0; $self->{confidence}->{t_prob}=1- abs (tprob($self->{confidence}->{df},-1*$self->{confidence}->{t_statistic})-tprob($self->{confidence}->{df},$self->{confidence}->{t_statistic})) ; $self->{confidence}->{valid}=1; return 1; } sub add_data{ my $self = shift; my $aref; if (ref $_[0] eq 'ARRAY') { $aref = $_[0]; } else { $aref = \@_; } my $significance=$self->{confidence}->{'significance'} if (defined($self->{confidence}->{'significance'})); $self->SUPER::add_data($aref); $self->{confidence}->{'significance'}=$significance; $self->compute_confidence_interval() if ($self->count()>1) ; return 1; } sub set_significance{ # set the significance level. usually 90, 95 or 99 my $self=shift; my $significance=shift; $self->{confidence}->{'significance'}=$significance if (($significance>0)&&($significance<100)); $self->compute_confidence_interval() if($self->count()>1); return 1; } sub print_confidence_interval{ my $self=shift; print "mean:",$self->mean(),"\n"; print "variance:",$self->variance(),"\n"; my $confidence=$self->{confidence}; foreach my $k ( keys %$confidence) { print "$k: $confidence->{$k} \n"; } return 1; } sub output_confidence_interval{ my $self=shift; croak "sample size must be >1 to compute the confidence interval\n" if($self->{confidence}->{valid}!=1); my $title=shift; print "Summary from the observed values of the sample $title:\n"; print "\tsample size= ", $self->count()," , degree of freedom=", $self->df(), "\n"; print "\tmean=", $self->mean()," , variance=", $self->variance(),"\n"; print "\tstandard deviation=", $self->standard_deviation()," , standard error=", $self->standard_error(),"\n"; print "\t the estimate of the mean is ", $self->mean()," +/- ",$self->delta(),"\n\t", " or (",$self->lower_clm()," to ",$self->upper_clm," ) with ",$self->significance," % of confidence\n"; print "\t t-statistic=T=",$self->t_statistic()," , Prob >|T|=",$self->t_prob(),"\n"; } sub AUTOLOAD{ my $self = shift; my $type = ref($self) or croak "$self is not an object"; my $name = $AUTOLOAD; $name =~ s/.*://; return if $name eq "DESTROY"; if (exists $self->{_permitted}->{$name} ) { return $self->{$name}; } elsif(exists $self->{confidence}->{$name}) { return $self->{confidence}->{$name}; } else { croak "Can't access `$name' field in class $type"; } } 1; __END__ =head1 NAME Statistics::PointEstimation - Perl module for computing the confidence interval in parameter estimation with Student's T distribution =head1 SYNOPSIS use Statistics::PointEstimation; my @r=(); for($i=1;$i<=32;$i++) #generate a uniformly distributed sample with mean=5 { $rand=rand(10); push @r,$rand; } my $stat = new Statistics::PointEstimation; $stat->set_significance(95); #set the significance(confidence) level to 95% $stat->add_data(@r); $stat->output_confidence_interval(); #output summary $stat->print_confidence_interval(); #output the data hash related to confidence interval estimation #the following is the same as $stat->output_confidence_interval(); print "Summary from the observed values of the sample:\n"; print "\tsample size= ", $stat->count()," , degree of freedom=", $stat->df(), "\n"; print "\tmean=", $stat->mean()," , variance=", $stat->variance(),"\n"; print "\tstandard deviation=", $stat->standard_deviation()," , standard error=", $stat->standard_error(),"\n"; print "\t the estimate of the mean is ", $stat->mean()," +/- ",$stat->delta(),"\n\t", " or (",$stat->lower_clm()," to ",$stat->upper_clm," ) with ",$stat->significance," % of confidence\n"; print "\t t-statistic=T=",$stat->t_statistic()," , Prob >|T|=",$stat->t_prob(),"\n"; =head1 DESCRIPTION This module is a subclass of Statistics::Descriptive::Full. It uses T-distribution for point estimation assuming the data is normally distributed or the sample size is sufficiently large. It overrides the add_data() method in Statistics::Descriptive to compute the confidence interval with the specified significance level (default is 95%). It also computes the t-statistic=T and Prob>|T| in case of hypothesis testing of paired T-tests. =head1 AUTHOR Yun-Fang Juan , Yahoo! Inc. (yunfang@yahoo-inc.com) =head1 SEE ALSO Statistics::Descriptive Statistics::Distributions =cut