# Composite Standard Deviation source code

## Contents:

1. Perl
2. Python
3. PHP
4. For MATLAB see Sebastian W's implementation.

# Perl:   composite_sd.pl

```#!/usr/bin/perl

# Copyright 2010, by David A. Burton
# Cary, NC  USA
# +1 919-481-0149
# Email: http://www.burtonsys.com/email/
# Permission is hereby granted to use this function for any purpose,
# with or without modification, provided only that this copyright &
# permission notice is retained.

# You can use a "require" to pull this into another Perl program, like this:
#
#   @INC = ('.','..');
#   require "composite_sd.pl";
#
# It defines four functions:
#
# &composite_SD is a function to calculate the overall standard deviation of
# a collection of sample groups, given only the group standard deviations,
# means, and sample counts.
#
# &sample_SD is a simple function to calculate the standard deviation of a
# collection of samples.
#
# &sum and &avg (to calculate summation and simple average, respectively)
# are also defined.
#
# These functions are compatible both with 32-bit Perl 4.036, and with Perl 5.

if (!defined \$debugmode) {
\$debugmode=0;  # set it >0 for debug prints
}

# Input is an array of samples; result is the summation.
sub sum {
local( \$sum ) = 0;
foreach \$i (@_) {  # note that \$i is implicitly local to the loop
\$sum += \$i;
}
return \$sum;
}

# Input is an array of samples; result is the mean.
sub avg {
if (-1 == \$#_) { die "ERR: \&avg( empty list )\n"; }
return &sum(@_) / (1+\$#_)
}

# input is an array of samples; result is the standard deviation
sub sample_SD {
local( @samples ) = @_;
local( \$mean ) = &avg( @samples );
local( \$sum_of_squared_deviations ) = 0;
if (\$#samples <= 0) {
return 0;
} else {
foreach \$i (@samples) {
\$sum_of_squared_deviations += ((\$i - \$mean) * (\$i - \$mean));
}
return sqrt( \$sum_of_squared_deviations / \$#samples );
}
}

# Calculate combined standard deviation via ANOVA (ANalysis Of VAriance)
# See:  http://www.burtonsys.com/climate/composite_standard_deviations.html
# Inputs are:
#   \$G, the number of groups
#   @means, the array of \$G group means
#   @SDs, the array of \$G group standard deviations
#   \$ncount or @ncounts -- number of samples in each group (can be scalar
#                          if all groups have same number of samples)
# Result is the overall standard deviation.
sub composite_SD {
local( \$G ) = shift @_;
local( @means ) = splice( @_, 0, \$G );
local( @SDs ) = splice( @_, 0, \$G );
local( @ncounts );
local( \$i );
if (0 == \$#_) {
for (\$i=(\$G-1); \$i >= 0; \$i--) {
\$ncounts[\$i] = \$_;
}
} else {
@ncounts = @_;
}
if (   (\$G != (\$#means + 1))
|| (\$G != (\$#SDs + 1))
|| (\$G != (\$#ncounts + 1)) ) {
die "ERR: sub composite_SD called w/ wrong number of parameters [\$G,\$#means,\$#SDs,\$#ncounts]\n";
}
# Okay, we have the parameters, now.

# calculate total number of samples, N, and grand mean, GM
local(\$N) = &sum(@ncounts);  # total number of samples
if (\$N <= 1) {
print "Warning: only \$N samples, SD is incalculable\n";
return -1;
}
local(\$GM) = 0;
for (\$i=0; \$i<\$G; \$i++) {
\$GM += (\$means[\$i] * \$ncounts[\$i]);
}
\$GM /= \$N; # grand mean

# calculate Error Sum of Squares
local(\$ESS) = 0;
for (\$i=0; \$i<\$G; \$i++) {
\$ESS += ((\$SDs[\$i])**2) * (\$ncounts[\$i] - 1);
}

# calculate Total Group Sum of Squares
local(\$TGSS) = 0;
for (\$i=0; \$i<\$G; \$i++) {
\$TGSS += ((\$means[\$i]-\$GM)**2) * \$ncounts[\$i];
}

# calculate standard deviation as square root of grand variance
local( \$result ) = sqrt( (\$ESS+\$TGSS)/(\$N-1) );
if (\$debugmode > 0) { printf "dbg composite_SD: N = \$N, GM = %4.3f, ESS = %5.1f, TGSS = %5.1f, SD = %4.3f\n", \$GM, \$ESS, \$TGSS, \$result; }
return \$result;
} #composite_SD

# return true value to 'require'
1;
```

# Python:   composite_sd.py

```#!/usr/bin/python3

'''
composite_sd.py defines three functions:

composite_SD is a function to calculate the overall standard deviation of
a collection of sample groups, given only the group standard deviations,
means, and sample counts.

sample_SD is a simple function to calculate the standard deviation of a
collection of samples.

avg (to calculate the simple average) is also defined.

These functions are compatible with Python 2.6, Python 2.7, and Python 3.*.

Copyright 2010, 2013, by David A. Burton
Cary, NC  USA
+1 919-481-0149
Email: http://www.burtonsys.com/email/
Permission is hereby granted to use this function for any purpose,
with or without modification, provided only that this copyright &
permission notice is retained.

Note: if what you really want is a "running" calculation of variance and/or
http://www.johndcook.com/standard_deviation.html
(That's from the American statistician John Cook, not the Australian
climate activist John Cook.)
'''

from __future__ import print_function, division  # requires python 2.6 or later (2.7 or later preferred)
import math

__all__ = ['iterable', 'avg', 'sample_SD', 'composite_SD']

def iterable(obj):
'''True iff obj is iterable: a list, tuple, or string.'''
return hasattr(obj, '__contains__')

def avg(samples):
if len(samples) >= 1:
return sum(samples) / len(samples)
return float('nan')

def sample_SD(samples):
'''input is an array of samples; result is the standard deviation'''
mean = avg(samples)
sum_of_squared_deviations = 0;
sd = 0
if len(samples) >= 2:
for datum in samples:
sum_of_squared_deviations += ((datum - mean) * (datum - mean));
sd = math.sqrt(sum_of_squared_deviations / (len(samples)-1) );
return sd

def composite_SD(means, SDs, ncounts):
'''Calculate combined standard deviation via ANOVA (ANalysis Of VAriance)
See:  http://www.burtonsys.com/climate/composite_standard_deviations.html
Inputs are:
means, the array of group means
SDs, the array of group standard deviations
ncounts, number of samples in each group (can be scalar
if all groups have same number of samples)
Result is the overall standard deviation.
'''
G = len(means)  # number of groups
if G != len(SDs):
raise Exception('inconsistent list lengths')
if not iterable(ncounts):
ncounts = [ncounts] * G  # convert scalar ncounts to array
elif G != len(ncounts):
raise Exception('wrong ncounts list length')

# calculate total number of samples, N, and grand mean, GM
N = sum(ncounts)  # total number of samples
if N <= 1:
raise Exception("Warning: only " + str(N) + " samples, SD is incalculable")
GM = 0.0
for i in range(G):
GM += means[i] * ncounts[i]
GM /= N  # grand mean

# calculate Error Sum of Squares
ESS = 0.0
for i in range(G):
ESS += ((SDs[i])**2) * (ncounts[i] - 1)

# calculate Total Group Sum of Squares
TGSS = 0.0
for i in range(G):
TGSS += ((means[i]-GM)**2) * ncounts[i]

# calculate standard deviation as square root of grand variance
result = math.sqrt((ESS+TGSS)/(N-1))
return result

# samples = range(10)
# print('avg=', avg(samples))
# sd = sample_SD(samples)
# print('sd=', sd)
# pt1 = [0,1,2]
# pt2 = [3,4]
# pt3 = [5,6,7,8,9]
# means = [avg(pt1), avg(pt2), avg(pt3)]
# SDs = [sample_SD(pt1), sample_SD(pt2), sample_SD(pt3)]
# ncounts = [len(pt1), len(pt2), len(pt3)]
# sd2 = composite_SD(means, SDs, ncounts)
# print('sd2=', sd2)
#
# samples = range(9)
# print('avg=', avg(samples))
# sd = sample_SD(samples)
# print('sd=', sd)
# pt1 = [0,1,2]
# pt2 = [3,4,5]
# pt3 = [6,7,8]
# means = [avg(pt1), avg(pt2), avg(pt3)]
# SDs = [sample_SD(pt1), sample_SD(pt2), sample_SD(pt3)]
# ncounts = 3
# sd2 = composite_SD(means, SDs, ncounts)
# print('sd2=', sd2)

```

# PHP:   composite_sd.php.txt

Date: Mon, Jun 22, 2015 at 1:05 PM
Subject: Composite standard deviations - PHP Code

Hi Dave,

I converted the python code which is attached into PHP code. I tested and it gives a good composite SD number. You are free to copy or do as you wish as you provided the original python source.

Here is the function which I have added as part of a statistics class.

Cheers, Eric

Eric Jennings, Sr. Systems Analyst
Hampton Affiliates
```//---------------------------------------------------------------------------------------------
// This function returns a composite standard deviation.
// Calculate combined standard deviation via ANOVA (ANalysis Of VAriance)
//   See:  http://www.burtonsys.com/climate/composite_standard_deviations.html
//   Inputs are:
//     Means, the array of group means
//     SDs, the array of group standard deviations
//     nCounts, number of samples in each group (can be scalar
//              if all groups have same number of samples)
// Return: Result is the overall standard deviation.
//
// Copyright 2010, 2013, by David A. Burton; Copyright 2015, by Eric Jennings.
// Contact by email: http://www.burtonsys.com/email/
// Permission is hereby granted to use this function for any purpose,
// with or without modification, provided only that this copyright and
// permission notice is retained.
//
// Note: if what you really want is a "running" calculation of variance and/or
// http://www.johndcook.com/standard_deviation.html
//
public function composite_SD( \$Means, \$SDs, \$nCounts ){

// number of groups
\$G = count( \$Means );
if( \$G != count(\$SDs)){
\$this->Msg .= 'inconsistent list lengths';
return 0;
}
if( \$G != count(\$nCounts)){
\$this->Msg .= 'wrong nCounts list length';
return 0;
}

// calculate total number of samples, N, and grand mean, GM
\$N = array_sum(\$nCounts);  // total number of samples
if( \$N <= 1 ){
\$this->Msg .= "Warning: only \$N samples, SD is incalculable";
}
\$GM = 0.0;
for( \$i = 0; \$i < \$G; \$i++ ){
\$GM += \$Means[\$i] * \$nCounts[\$i];
}
\$GM /= \$N;  // grand mean

// calculate Error Sum of Squares
\$ESS = 0.0;
for( \$i = 0; \$i < \$G; \$i++){
\$ESS += (pow(\$SDs[\$i], 2)) * (\$nCounts[\$i] - 1);
}

// calculate Total Group Sum of Squares
\$TGSS = 0.0;
for( \$i = 0; \$i < \$G; \$i++ ){
\$TGSS += (pow(\$Means[\$i]-\$GM, 2)) * \$nCounts[\$i];
}

// calculate standard deviation as square root of grand variance
\$result = sqrt((\$ESS+\$TGSS)/(\$N-1));
return \$result;
}
```