#!/usr/bin/perl use strict; use warnings; # push @INC,'.'; # require "traceback.pl"; # handy for debugging, but I'm not using it now # This program calculates binomial probabilities (or, optionally, just the binomial coefficient). # Unlike most binomial calculators, this one goes to considerable effort to avoid overflow and # underflow problems, so it works correctly when most others fail. # Copyright 2021, by David A. Burton # Cary, NC USA # Mobile / WhatsApp: +1 919-244-3316 # Email: https://sealevel.info/contact.html # This is free, open source software, but it is copyrighted. You are welcome to use it, in whole # or in part, for any purpose. However, if you do so, I require that you retain this notice, or # a substantially equivalent notice, in copies or substantial excerpts of the program. # TLIB Version Control fills in the version information for us: our $version_str; #--=>keyflag<=-- "&(#)%n, version %v, %11d " $version_str = "&(#)binomcalc.cgi, version 95, 20-Aug-2025 "; our $trimmed_version_str; if ($version_str =~ /, (version [0-9]+, [0-9]*\-[a-zA-Z]{3}\-[0-9]+) /) { $trimmed_version_str = "$1"; } else { $trimmed_version_str = ''; } $| = 1; # We would need cgi.pm for "user_agent" lookup, and some convenience functions, like header(), param(), etc. # But I'm not using any of those. # use CGI ':standard'; # Example of usage: # # our $Inf = 2e262144; # Scalar floating-point infinity our $POS_NORM_SMALLEST = 2.22507385850720138e-308; # smallest possible normalized positive IEEE 64-bit float value our $POS_DENORM_SMALLEST = 4.94065645841247e-324; # smallest possible denormalized (subnormal) positive IEEE 64-bit float value use Scalar::Util qw(blessed isdual reftype tainted isvstring looks_like_number set_prototype); use Time::HiRes qw(time sleep); # use lib '/root/perl5/lib/perl5/x86_64-linux-thread-multi/Math'; use lib '/root/perl5/lib/perl5'; # for GMP use Math::GMP; # Do this so it'll fail if GMP's missing (otherwise it falls back to the slow lib). If it fails try installing: "cpan Math::GMP" "cpan Math::BigInt::GMP" use Math::BigInt lib => 'GMP'; use Math::BigFloat lib => 'GMP'; use Math::BigRat lib => 'GMP'; # use bignum; <--- Do not do this! (My code expects scalars by default.) # Math::BigFloat->accuracy(500); <--- I'll do this further down (number of digits is now adjustable) # printf(" Bigfloat version = " . $Math::BigFloat::VERSION . "\n" ); BEGIN { # Polyfill for missing nparts method prior to BigFloat version 1.999808 (circa 2017) if (!exists &Math::BigFloat::nparts) { *Math::BigFloat::nparts = sub { my $x = shift; my ($mantis, $exp) = $x->parts(); # both are BigInts my $len = $mantis->length(); # number of digits in the mantissa $exp += ($len-1); # $mantis = Math::BigFloat->new($mantis) / (10**($len-1)); <== this fails for large $len, due to overflow $mantis = Math::BigFloat->new($mantis) / (Math::BigFloat->new(10)**($len-1)); return ($mantis,$exp); }; } } our $dbg = 0; # default our $echo_cmdline = 1; # normally we display command line, but v=0 can suppress it our $vrblx; # temp value used by &dspl() our $typefunc_avail = 0; # needed by sub dspl(), and set to 1 if we successfully load type() from autobox our $in_midst_of_generating_acu_table = 0; # if
block
if ($parameters =~ /[ \-\&]o=([htp])/) {
$p_o = $1;
$cgi_context = ('h' eq $1);
}
# In a cgi context, we have to emit this magic line, before we can write anything else.
if ($cgi_context || ('p' eq $p_o)) {
print "Content-Type: text/html\n\n"; # in case it's running as a .cgi script
}
if ($dbg>0) {
&wprint( "$scriptname $trimmed_version_str\n" );
&wprint( "OS = '$^O'\n" ); # What OS is this?
&wprint( "Perl version " . $] . " = " . $^V . "\n" );
&wprint( 'Math::BigFloat->VERSION = ' . Math::BigFloat->VERSION . "\n" );
&wprint( 'Math::BigInt::GMP->VERSION = ' . Math::BigInt::GMP->VERSION . "\n" );
} elsif ($cgi_context) {
print( "\n\n" );
}
# Because binomial probability calculations can be very computationally intensive,
# we need to ensure that we don't end up with too many instances running, lest it
# monopolize my little web server. (I don't bother with this if running on the
# desktop.)
our $num_server_instances = 0;
if (($^O =~ /linux/i) && ($scriptname =~ /\.cgi$/)) {
$num_server_instances = 1;
# my $tmp = `pgrep -a binomcalc\\.cgi`;
# &wprint("PGREP output:\n$tmp\n-----------\n");
my $tmpcount = `pgrep -c binomcalc\\.cgi`;
$num_server_instances = 0 + $tmpcount;
if ($dbg>0) {
&wprint("Instance count = '$num_server_instances'\n");
}
}
if ($num_server_instances > 2) {
# The usual case is one instance running (this process).
# My server has four cores, so I allow at most two instances, to avoid bogging it down too much.
print "ERROR: Server busy. Wait a few minutes and try again.
\n" .
'(Or download binomcalc.pl,' .
" and run it on your own computer.)
\n";
exit 1;
}
if ((!$parameters) && !$cgi_context) {
# no parameters specified -- print help message and quit
&givehelp("** ERROR: no parameters were specified to $scriptname");
exit 1;
}
if ($dbg >= 1) {
&wprintf("dbg: QUERY_STRING='%s', ARGV='%s'\n", (defined $parameters1) ? $parameters1 : 'UNDEF', (defined $parameters2) ? $parameters2 : 'UNDEF' );
}
# define type() for debugging:
if ($dbg>0) {
# use autobox::universal qw(type);
if (eval {require autobox::universal;}) {
autobox::universal->import('type');
$typefunc_avail = 1;
} else {
if ($cgi_context) {
print 'Warning: Cannot locate autobox/universal.pm in @INC. You may need to install autobox, like this:' . "
\n" .
"cpan autobox::universal\n
\n";
} else {
print 'Warning: Cannot locate autobox/universal.pm in @INC (you may need to install autobox)' . "\n" .
"In Strawberry Perl you can get autobox like this:\n" .
" 1. from a Windows Administrator (elevated) command prompt...\n" .
" 2. c:\n" .
" 3. cd \\strawberry\\perl\\bin\n" .
" 4(a). cpan autobox::universal\n" .
" or:\n".
" 4(b). cpanm autobox\n\n";
}
eval("sub type { return '?';}");
}
}
# for benchmarking; the only parameter is an arbitrary name
our %runtimes = ();
our %runtime_cnt = ();
our %interval_started_at = ();
# Call this at start of timing interval. (If it's called more than once, the 2nd call just increments the runtime_cnt.)
sub timeit_start {
my $name = shift;
if (defined $interval_started_at{$name}) {
$runtime_cnt{$name} += 1;
} else {
$interval_started_at{$name} = time();
if (!exists $runtime_cnt{$name}) {
$runtime_cnt{$name} = 0;
$runtimes{$name} = 0.0;
}
}
}
# Call this at the end of the timing interval.
sub timeit_end {
my $name = shift;
if (!exists $interval_started_at{$name}) {
die "ERR: timeit_end('$name'), but timeit_start wasn't called.";
}
my $time_increment = time() - $interval_started_at{$name};
delete $interval_started_at{$name};
$runtimes{$name} += $time_increment;
$runtime_cnt{$name} += 1;
}
# Call this to reset timer (forget previous timings).
sub timeit_reset {
my $name = shift;
delete $interval_started_at{$name};
delete $runtimes{$name};
delete $runtime_cnt{$name};
}
# If a name is specified, display the timing stats for that name;
# otherwise, display them all.
sub timeit_display {
my $name;
if ($#_ >= 0) {
$name = shift;
if (exists $interval_started_at{$name}) {
&timeit_end($name);
}
&wprintf("Time %s, %f / %d = %f each\n", $name, $runtimes{$name}, $runtime_cnt{$name}, ($runtimes{$name} / $runtime_cnt{$name}) );
} else {
foreach $name (sort keys %runtimes) {
&timeit_display($name);
}
}
}
# return a description of the input paramter (for my debug prints)
sub dspl {
local($vrblx) = $_[0];
my( $result, $tp, $rp );
if (!defined $vrblx) {
$result = 'UNDEF';
if ($dbg > 2) {
&wprint( "dbg: dspl(UNDEF)" );
}
} else {
$tp = '';
$rp = '';
if ($typefunc_avail) {
$tp = &type($vrblx);
if (('STRING' eq $tp) and looks_like_number($vrblx)) {
if ($vrblx =~ /^(\-|)[0-9]+$/) {
$tp = 'Integer';
} else {
$tp = 'Float';
}
}
}
if (('' eq $tp) or ('HASH' eq $tp)) {
$rp = ref($vrblx);
}
if ($dbg > 2) {
&wprint( "dbg: dspl(), type=$tp, ref=$rp,\n" );
}
if (('' eq $rp) && ('' eq $tp)) {
$result = "$vrblx";
} elsif ('' ne $rp) {
$result = "$rp $vrblx";
} elsif ('STRING' eq $tp) {
$result = "$tp '$vrblx'";
} else {
$result = "$tp $vrblx";
}
}
if ($dbg > 2) {
&wprint( " result='$result'\n" );
}
return $result;
}
# v= and o= have already been handled; here we process the rest of the parameters.
our ($p_n, $p_k, $p_p, $p_m, $p_a, $p_t, $p_d, $p_b);
$p_p = "0.5"; # default
$p_m = 'b'; # default
$p_a = 'q'; # default
our $accuracy = 500; # default is 500 digits except for a=q, which is about 14 digits (note: BigFloat default is 40)
our $effective_accuracy = $accuracy;
sub process1param {
my ($opt,$val) = @_;
# &wprintf(" opt='%s' val='%s'\n", $opt, $val);
if ('n' eq $opt) {
if ($val =~ /^([1-9][0-9]*)$/) {
$p_n = $1;
} else {
&wprintf("ERROR: 'n=%s' (should be a positive integer)\n", $val);
exit 1;
}
} elsif ('k' eq $opt) {
if ($val =~ /^([0-9]+)$/) {
$p_k = $1;
} else {
&wprintf("ERROR: 'k=%s' (should be 0..n)\n", $val);
exit 1;
}
} elsif ('p' eq $opt) {
if ($val =~ /^([0-9\/\.eE\-\+]+)$/) {
# if it contains a / then it's a rational number; the e, E, -, and + are so that it can be in scientific notation
$p_p = $1;
} else {
&wprintf("ERROR: 'p=%s' (should be 0..1)\n", $val);
exit 1;
}
} elsif ('m' eq $opt) {
# the m=... parameter must be 'b' (default Binomial probability), 'cu' (CUmulative probability), 'acu' (All CUmulative probabilities), or 'co' (binomial COefficient)
if ($val =~ /^(b|cu|acu|co)$/i) {
$p_m = lc $1;
} else {
&wprint("ERROR: 'm=$val' (mode) parameter to $scriptname may only be b, cu, acu, or co.\n");
exit 1;
}
} elsif ('a' eq $opt) {
# the a=... parameter must be 's' (Slow = GMP BigRat), 'q' (Quick = scalar float = default), 'h' (Hybrid), 'x' (eXperimental = BigFloat), or 'xx' (also eXperimental)
if ($val =~ /^(x(x|)|s|q|h)$/) {
$p_a = lc $1;
} else {
&wprintf("ERROR: 'a=$val' (accuracy) parameter may only be q, h, s, x or xx.\n");
exit 1;
}
} elsif ('t' eq $opt) {
# the t=... (timeout) parameter is optional
if ($val =~ /^([0-9\.]*)$/) {
$p_t = $1;
} else {
&wprint("ERROR: 't=$val' (timeout) parameter must be numberic (max number of seconds).\n");
exit 1;
}
} elsif ('d' eq $opt) {
# the d=... (digits) parameter is optional (but ignored for a=q)
if ($val =~ /^([0-9]+)$/) {
$p_d = $1;
$effective_accuracy = $accuracy = 0 + $p_d;
} else {
&wprint("ERROR: 'd=$val' (digits of precision) parameter must be a non-negative integer.\n" .
"(Default is 500 digits, except for a=q, which is about 15 digits.)\n");
exit 1;
}
} elsif ('b' eq $opt) {
# the b=1 (benchmark) parameter is optional
if ($val =~ /^([01])$/) {
$p_b = $1;
} else {
&wprint("ERROR: 'b=$val' (benchmark) parameter must be 0 or 1.\n");
exit 1;
}
} elsif (('v' eq $opt) || ('o' eq $opt)) {
# handled above
} else {
&wprintf("ERROR: unrecognized parameter to $scriptname: '%s=%s'\n", $opt, $val);
&wprint("parameters='$parameters'\n");
exit 1;
}
}
my $parm;
for $parm (split(/\s+\-|\s+|\&/, $parameters)) {
if ($dbg > 1) {
&wprint( "dbg: parm='$parm'\n");
}
if ('' ne $parm) {
if ($parm =~ /^([a-zA-Z]+)=(.+)$/) {
my $opt = lc $1;
my $val = $2;
&process1param( $opt, $val );
} else {
&wprintf("ERROR: unrecognized parameter to $scriptname: '%s'\n", $parm);
&wprint("Parameters='$parameters'\n");
exit 1;
}
}
}
# the k=... parameter is reqired
# the n=... parameter is reqired
# the p=... parameter defaults to 0.5
# the m=... parameter defaults to b
# the a=... parameter defaults to q
our $p; # Note: $p will be set waaaay down in the program
if ((!defined $p_n) && !defined $p_k) {
&givehelp('');
exit 1;
}
if ((!defined $p_n) || !defined $p_k) {
&givehelp("ERROR: the n and k parameters are required.\n");
exit 1;
}
if ($accuracy < 14) {
if ($accuracy < 1) {
$effective_accuracy = $accuracy = 1;
}
&wprintf("Warning: 'd=%d' digits is less than the accuracy of the 'a=q' default 'quick' setting (about 14 digits). That's probably a mistake.\n", $accuracy );
} elsif ($dbg && ('q' ne $p_a) && (('h' ne $p_a) || ('b' ne $p_m))) {
&wprintf("Computing with 'd=%d' digits of accuracy.\n", $accuracy );
}
Math::BigRat->accuracy($accuracy);
Math::BigFloat->accuracy($accuracy);
if (($effective_accuracy > 13) && (('q' eq $p_a) || ('h' eq $p_a))) {
# in a=q or a=h mode we calculate with 64-bit scalar floats, so effective accuracy is only about 14 digits
$effective_accuracy = 13; # I'm being conservative
}
if ($echo_cmdline) {
# echo the equivalent Perl command-line
my $cmdline = 'perl ' . $scriptdotpl;
if ($p_v) { $cmdline .= " v=$p_v"; }
# if ($p_t ne '') { $cmdline .= " t=$p_t"; }
if ($p_m) { $cmdline .= " m=$p_m"; }
if ($p_a) { $cmdline .= " a=$p_a"; }
if ($p_d) { $cmdline .= " d=$p_d"; }
if ($p_b) { $cmdline .= " b=$p_b"; }
if ($p_n) { $cmdline .= " n=$p_n"; }
if ($p_k ne '') { $cmdline .= " k=$p_k"; } # note: k can be zero
if ($p_p) { $cmdline .= " p=$p_p"; }
if ($cgi_context) {
print "$cmdline
\n"
} else {
print "\n$cmdline\n";
}
}
sub givehelp {
my $ermsg = shift;
# First, reformat the TLIB version info, for prettier display
# e.g., $version_str = "&(#)binomcalc.pl, version 42, 19-Dec-2020 ";
my $vstr = $trimmed_version_str;
if ('' ne $vstr) {
$vstr = " ($vstr)";
}
if (!defined $ermsg) {
$ermsg = '';
} elsif ('' ne $ermsg) {
$ermsg .= "\n\n";
}
# Now display the Help message, including version number
&wprint( "\n" . $ermsg .
"$scriptdotpl -- extreme precision binomial calculator\n" .
"by Dave Burton$vstr\n" .
"\n" .
"Note: you can rename this program to binomcalc.cgi and use it from a web page, via a\n" .
"\"server-side include,\" as in this example:\n" .
"\n" .
" \n" .
"\n" .
"That does the same calculation as this command:\n" .
"\n" .
" c:\\> perl -f binomcalc.pl n=12 k=3 p=1/2\n" .
"\n" .
"The following command-line parameters are supported:\n" .
"\n" .
" n={integer} (REQUIRED: the n of 'n choose k')\n" .
" k={integer} (REQUIRED: the k of 'n choose k')\n" .
" p={probability} (decimal value 0..1, e.g., 0.5, or a fraction, e.g., 1/3)\n" .
" m={b, cu, acu, co} (b = binomial probability, cu = cumulative probabilities,\n" .
" acu = all cumulative probabilities, co = coefficient only)\n" .
" a={q, s, h, x, xx} (q = quick/scalar = default, s is slower w/ extreme precision,\n" .
" h = hybrid, x and xx are experimental [don't use them])\n" .
" o={h, t, p} (output: h = html, t = text, p = , default determined by context)\n" .
" t=nnn (timeout limit, nnn seconds; default = 0 for no timeout)\n" .
" b=1 (benchmark it; show timings)\n" .
" d={integer} (digits of accuracy: default for a=s is 500; for a=q it's about 13)\n" .
" v={integer} (verbosity; 0 = no debug prints, 1 = some, 2 = lots)\n" .
"\n" .
"Other notes:\n" .
"1. Default mode is m=b (calculate binomial probability).\n" .
"2. Default accuracy setting is a=q, which is fastest. For higher accuracy use a=s.\n" .
" The x and xx (experimental) settings are usually slower and not as good.\n"
);
# And then exit, with errorlevel = 1
exit 1;
}
# wrap a printable string in , add
s, etc., if running in a CGI context
sub wrap {
my $str = shift;
if ($cgi_context) {
$str =~ s/\n$//;
$str =~ s/\n /\n /g;
$str =~ s/\n /\n /g;
$str =~ s/ / /g;
$str =~ s/</g;
$str =~ s/\n/
\n/g;
$str = "$str
\n";
}
return $str;
}
# Ensure that argument is padded to at least n characters, and roughly centered
sub center {
my ($s,$n) = @_;
my $result = $s;
my $l = length($s);
if ($l <= $n) {
my $pr = int(($n-$l)/2);
my $pl = ($n-$l) - $pr;
$result = (' ' x $pl) . $s . (' ' x $pr);
# if there's a leading '<' or '>' then we might move the result one space to the left, to line up better with other m=acu results:
if ($pl > $pr) {
if ($result =~ / ([<>][^ ]*) /) {
$result = substr($result,1) . ' ';
}
}
}
return $result;
}
# wrapped print -- does something reasonable either at command-line or in CGI context
sub wprint {
my $str = shift;
print &wrap($str);
}
# wrapped printf -- does something reasonable either at command-line or in CGI context
sub wprintf {
my $fmt = shift;
my $str = sprintf( $fmt, @_ );
&wprint($str);
}
# insert commas into an integer if it's > 4 digits long
sub commafy {
my $number = shift;
my @pieces = ();
$number .= '';
# if ($dbg>0) { &wprint "dbg: number='$number'\n"; }
if ((length($number) > 4) && ($number =~ /^(\-|)[0-9]+$/)) {
while (length($number) > 0) {
unshift(@pieces,substr($number,-3));
substr($number,-3) = '';
# if ($dbg>0) {
# $tm1 = join(',',@pieces);
# &wprint( "dbg: number='$number', pieces='$tm1'\n" );
# }
}
$number = join(',',@pieces);
}
# if ($dbg>0) { &wprint "dbg: number='$number'\n"; }
return $number;
}#comafy
# Format a float or BigFloat into normalized scientific notation.
# This is like the ->bnstr() method for BigFloats, but with mantissa rounded to N significant digits.
# Input parameters are $x (either scalar float or BigFloat), and $n (number of significant digits).
sub bnstrN {
my ($x,$n) = @_;
my $x2 = Math::BigFloat->new($x); # in case $x is scalar
my $result;
my ($mantis,$exp) = $x2->nparts();
# print "dbg: bnstrN:\n exp=" . &dspl($exp) . "\n";
$exp->bfround(0); # otherwise exponent might display as "-6.00000000000000000000000000000" instead of "-6"
my $rounded_mantis = $mantis->copy();
$rounded_mantis->bfround(-($n-1));
if ($rounded_mantis =~ /^10\.([0-9]*)$/) {
# Sometimes rounding "almost 1" results in "10.000000000e-1", this changes it to "1.0000000000"
$rounded_mantis = '1.0' . $1;
$exp++;
}
if ($exp) {
if (ref($exp)) {
# convert BigFloat to scalar, lest we get something like "1.00e-12.0000000000000"
$exp = $exp->numify();
}
$result = $rounded_mantis . 'e' . $exp;
} else {
$result = $rounded_mantis;
}
# print "dbg: bnstrN:\n exp=$exp\n mantis=$rounded_mantis\n result=$result\n";
return $result;
}
# if input is 0.005 then result is 2 (there are two zeros after the decimal point)
sub number_of_zeros_after_the_decimal_point {
my $x = shift;
my $result;
if (ref $x) {
# BigFloat or BigRat
if ($x <= 0) {
# negative is only possible due to accumulated rounding errors
$result = $effective_accuracy - 1;
} else {
my $result2 = int(abs(log10($x)));
# print "dbg: x=" . &dspl($x) . " result2=" . &dspl($result2) . "\n";
$result = $result2->numify();
}
} else {
# scaler 64-bit float
if ($x <= 0) {
$result = $effective_accuracy - 1;
if ($dbg>0) { print "dbg: x.LE.0, x=" . &dspl($x) . " result=" . &dspl($result) . "\n"; }
} else {
$result = int(abs(log10($x)));
}
}
if ($dbg>0) { print "dbg: number_of_zeros_after_the_decimal_point(" . &dspl($x) . ") = " . &dspl($result) . "\n"; }
return $result;
}
# Depending on how tiny the result is, we might display it in both floating point and scientific notation, or just in one or the other.
# First parameter ($prob) should be a scalar float or BigFloat in [0..1].
# (Due to accumulated rounding errors, $prob can sometimes be slightly < 0 or slightly > 1. We handle that gracefully.)
# Optional second parameter ($verbosity) can be 1 (to display only "5.450654626823856e-368") or 2 (for "<0.000000000000001 = 5.450654626823856e-368").
# Third parameter ($digits) is the maximum number of sighificant digits with which to display the result (default 15).
# (Note: do not confuse $digits with global $effective_accuracy, the number of digits ued in calculations.)
# Forth parameter ($max_addend) is an optional parameter for effectively reducing the number of signifcant digits when $prob is a small number calculated from
# sums & differences of larger-magnitude addends. For example, if digits is 6, then 1.000000 - 0.999990 = 0.000010 has only two significant digits, not six.
# $max_addend should be the largest of the addends/subtrahends/minuends which were included in the calculation.
# Fifth parameter ($definitely_not_exactly_0or1) can be passed as True to tell this subroutine that "zero" is really slightly larger than zero,
# and "one" is always really slightly less than one.
sub display_prob {
&timeit_start('display_prob');
my ($prob, $verbosity, $digits, $max_addend, $definitely_not_exactly_0or1) = @_;
my $prob_is_scalar = !(ref $prob); # true for IEEE 64-bit, false for BigRat or BigFloat
if (!defined $verbosity) {
$verbosity = 2;
}
if (!defined $digits) {
$digits = 15;
if ($dbg>0) { &wprint( "dbg: digits defaults to $digits\n" ); }
} else {
if ($dbg>0) {
&wprint( "dbg: digits passed in as $digits\n" );
# &traceback("display_prob");
}
}
if (!defined $definitely_not_exactly_0or1) {
$definitely_not_exactly_0or1 = 0;
}
if ($dbg>0) { printf( "dbg: display_prob( $prob, $verbosity, $digits, %s, $definitely_not_exactly_0or1 )\n", (defined $max_addend) ? $max_addend : '{undef}' ); }
# first, we calcluate a plain floating point representation
my $fp_prob;
my $digits_fp = $digits; # floating point format digits (as opposed to scientific notation digits)
my $fp_prob1;
my $fp_prob1_digits_exceeded = 0;
my $fp_prob1_is_tiny = 0;
my $smallest_possible_fp = substr('0.00000000000000000000',0,$digits_fp+1) . "1";
my $all_nines_fp = substr('0.99999999999999999999',0,$digits_fp+1);
if ($prob <= $smallest_possible_fp) {
if (($prob <= 0) && !$definitely_not_exactly_0or1) {
# exactly 0 and exactly 1 get special-cased
$fp_prob = $fp_prob1 = substr('0.00000000000000000000',0,$digits_fp+2);
} else {
$fp_prob1 = $smallest_possible_fp;
if ($cgi_context) {
$fp_prob = '<' . $smallest_possible_fp;
} else {
$fp_prob = '<' . $smallest_possible_fp;
}
$fp_prob1_digits_exceeded = $fp_prob1_is_tiny = 1;
}
} elsif ($prob > $all_nines_fp) {
if (($prob >= 1) && !$definitely_not_exactly_0or1) {
$fp_prob = $fp_prob1 = substr('1.00000000000000000000',0,$digits_fp+2);
} else {
$fp_prob1 = $all_nines_fp;
if ($cgi_context) {
$fp_prob = '>' . $all_nines_fp;
} else {
$fp_prob = '>' . $all_nines_fp;
}
$fp_prob1_digits_exceeded = 1;
}
} else {
my $fmt_f;
if ($prob < 0.1) {
$fmt_f = '%.' . ($digits_fp+1) . 'f'; # default "%.16f"
$fp_prob = $fp_prob1 = sprintf($fmt_f, $prob);
# Problem: 0.099999999999999 would print as 0.1, resulting in one too many digits; this is the hacky fix:
if ($fp_prob =~ /^0.[1-9]/) {
$fmt_f = '%.' . $digits_fp . 'f'; # default "%.15f"
$fp_prob = $fp_prob1 = sprintf($fmt_f, $prob);
}
} else {
$fmt_f = '%.' . $digits_fp . 'f'; # default "%.15f"
$fp_prob = $fp_prob1 = sprintf($fmt_f, $prob);
}
# print "dbg: fmt_f = '$fmt_f'\n";
}
my $len0 = length($fp_prob1);
if ($dbg>0) { &wprintf("dbg: fp_prob1='$fp_prob1', fp_prob='$fp_prob'\n"); }
# scientific notation is trickier...
my $prob2_is_tiny = ($prob <= 0);
my $prob_zeros = 0;
my $max_addend_zeros = 0;
if ($prob < 0.01) {
$prob_zeros = number_of_zeros_after_the_decimal_point($prob);
# ...but we only bother with scientific notation for probabilities less than 0.01
if (!defined $max_addend) {
if ($prob_is_scalar) {
$max_addend_zeros = 308;
if ($dbg>0) { &wprint( "dbg: max_addend=undef and prob_is_scalar=1, so max_addend_zeros=$max_addend_zeros\n" ); }
} else {
# $max_addend = $prob;
$max_addend_zeros = $prob_zeros;
if ($dbg>0) { &wprint( "dbg: max_addend=undef and prob_is_scalar=0, set max_addend_zeros=prob_zeros=$max_addend_zeros (I probably need to fix this!)\n" ); }
}
} else {
# we might not have as many digits of accuracy as we'd hoped.
$max_addend_zeros = number_of_zeros_after_the_decimal_point($max_addend);
if ($definitely_not_exactly_0or1 && $prob2_is_tiny & ($dbg>1)) { print "dbg: prob2_is_tiny, prob='$prob', max_addend_zeros=$max_addend_zeros, prob_zeros=$prob_zeros, digits=$digits\n"; }
if ($dbg>0) { print "dbg: prob2_is_tiny=$prob2_is_tiny, definitely_not_exactly_0or1=$definitely_not_exactly_0or1, prob='$prob', max_addend_zeros=$max_addend_zeros, prob_zeros=$prob_zeros, digits=$digits\n"; }
if ($prob_zeros > $max_addend_zeros) {
if (($effective_accuracy - ($prob_zeros - $max_addend_zeros)) < ($digits + 3)) {
if ($dbg>0) {
print "dbg: effective_accuracy = " . dspl($effective_accuracy) . "\n";
print "dbg: prob_zeros = " . dspl($prob_zeros) . "\n";
print "dbg: max_addend_zeros = " . dspl($max_addend_zeros) . "\n";
print "dbg: digits = " . dspl($digits) . "\n";
}
$digits = ($effective_accuracy - ($prob_zeros - $max_addend_zeros)) - 3;
if ($dbg>0) { print "dbg: new digits = " . dspl($digits) . "\n"; }
if ($digits < 2) {
if ($digits <= 0) {
$digits = 1;
$prob2_is_tiny = 1;
} else {
$digits = 2;
}
if ($dbg>0) { print " new digits = " . dspl($digits) . "\n"; }
}
}
}
}
if ($prob_is_scalar && ($prob > 0) && ($prob < $POS_NORM_SMALLEST)) {
if ($dbg>0) { &wprintf( "dbg: scalar prob='%.16g' < POS_NORM_SMALLEST='%.16g'\n", $prob, $POS_NORM_SMALLEST ); }
# result is so tiny that we lost digits of precision due to denormalized 64-bit scalar representation
my $tmp = sprintf("%.1g", $prob);
if ($tmp =~ /e(\-[0-9]+)$/i) {
my $tmp_expon = $1; # should be -308..-324 because POS_NORM_SMALLEST = 2.22507385850720138e-308 and POS_DENORM_SMALLEST = 4.94065645841247e-324
if ($dbg>0) { &wprint( "dbg: scalar subnormal prob is approx. $tmp, digits was='$digits', tmp='$tmp', exp='$tmp_expon', tiny='$prob2_is_tiny'\n" ); }
if( ($tmp_expon > -308) || ($tmp_expon < -324)) {
&wprintf( "ERR: prob='%.16g' tmp='$tmp' exp='$tmp_expon' (outside expected range)\n", $prob );
exit 1;
}
if ($tmp_expon < -309) {
my $tmp_sav_digits = $digits;
$digits += ($tmp_expon + 309); # reduce number of digits
if ($dbg>0) { &wprint( "dbg: digits reduced from $tmp_sav_digits to $digits\n" ); }
if ($digits <= 1) {
$digits = 1;
$prob2_is_tiny = 1;
}
}
if ($dbg>0) { &wprint( "dbg: prob='$prob', tmp='$tmp', exp='$tmp_expon', tiny='$prob2_is_tiny', new denorm scalar digits = " . dspl($digits) . "\n" ); }
}
}
# print "dbg: prob2_is_tiny = '$prob2_is_tiny'\n";
if ($prob2_is_tiny) {
my $tmp_expon = ($max_addend_zeros + $effective_accuracy - 2);
if ($dbg>0) {print "dbg: prob2_is_tiny = True, expon='$tmp_expon', max_addend_zeros=$max_addend_zeros, effective_accuracy=$effective_accuracy, (defined max_addend)='" . (defined $max_addend) . "'\n"; }
if ($prob_is_scalar && ($tmp_expon > 317)) {
if ($dbg>0) {print "dbg: expon='$tmp_expon' -> 117 because max_addend=undef\n";}
$tmp_expon = 317;
}
if (defined $max_addend) {
# more conservative for cummulative sums, since they tend to accumulate errors
$tmp_expon -= 1;
}
# if (0 == $prob) {
# $fp_prob = '(underflow)';
# } else {
$fp_prob = '1.0e-' . $tmp_expon;
if ($cgi_context) {
$fp_prob = '<' . $fp_prob;
} else {
$fp_prob = '<' . $fp_prob;
}
# }
} else {
if ($dbg>0) { &wprintf( "prob_zeros=$prob_zeros, max_addend_zeros=$max_addend_zeros, (prob_zeros > max_addend_zeros)=%d\n", ($prob_zeros > $max_addend_zeros) ); }
if ($prob_zeros > $max_addend_zeros) {
if (($effective_accuracy - ($prob_zeros - $max_addend_zeros)) < 1) {
$prob2_is_tiny = 1;
if (!$fp_prob1_is_tiny) {
&wprint( "ERR: prob2 tiny but not prob1! Please note the exact parameter you used, and tell Dave!\n"
. "prob='$prob'\n"
. "prob1='$fp_prob1'\n"
. "prob_zeros=$prob_zeros\n"
. "max_addend_zeros=$max_addend_zeros\n"
. "effective_accuracy=$effective_accuracy\n" );
}
}
}
my $fp_prob2;
# Here we generate what I call the %g version (but I don't really use %g anymore)
#if ($prob_is_scalar) {
# $fp_prob2 = sprintf( ('%#.' . ($digits-1) . 'e' ), $prob ); # '#' flag avoids removal of trailing decimal point
# if ($dbg>0) { &wprint( "dbg: scalar fp_prob2='$fp_prob2' vs. '" . bnstrN($prob,$digits) . "'\n" ); }
#} else {
$fp_prob2 = bnstrN($prob,$digits); # scientific notation (do this because sprintf %g doesn't work for BigFloats)
# }
if ($dbg>0) { &wprint( "dbg: prob='$prob', digits=$digits, initial fp_prob2='$fp_prob2'\n" ); }
if ($fp_prob2 =~ /^(\-|)([1-9])\.([0-9]*)e\-1$/) {
# change "8.187225652655315e-1" to "0.8187225652655315"
if ($dbg>0) { &wprint( "dbg: fp2 fixup: was '$fp_prob2'\n"); }
$fp_prob2 = $1 . '0.' . $2 . $3;
if ($dbg>0) { &wprint( " now '$fp_prob2'\n"); }
}
my $len2 = length($fp_prob2);
if ($dbg>0) { &wprint( "dbg:\n fb0 = '$fp_prob'\n fb2 = '$fp_prob2'\n" ); }
if (($fp_prob1 ne $fp_prob2) && ($fp_prob2 ne substr($fp_prob1,0,$len2))) {
# the %f and %g versions are different, and the %g version isn't just %f version missing a digit or two at the end
if ($prob2_is_tiny) {
if ($fp_prob2 =~ /^([^e]*)e(.*)$/) {
my $tmp = '1.0e' . (1+$2);
# print "dbg: fp_prob2 is tiny = '$fp_prob2' -> '<$tmp'\n";
if ($cgi_context) {
$fp_prob = '<' . $tmp;
} else {
$fp_prob = '<' . $tmp;
}
} else {
&wprint("ERR: fp_prob2_is_tiny, fp_prob2='$fp_prob2', please tell Dave!\n");
}
} elsif ($verbosity <= 1) {
$fp_prob = $fp_prob2; # just use the scientific notation version
} elsif ((!$fp_prob1_digits_exceeded) && ($len0 < $len2) && ($fp_prob2 !~ /e/i)) {
# the %g version is in plain floating point notation, but has more digits than the %f version, so use the %g version
$fp_prob = $fp_prob2; # I'm unsure whether this can really happen
} else {
# display both versions
if ($cgi_context) {
$fp_prob = $fp_prob . ' = ' . $fp_prob2;
} else {
$fp_prob = $fp_prob . ' = ' . $fp_prob2;
}
}
}
}
}
# printf("dbg: display_prob:\n tmp3='%s'\n fp_prob='%s'\n", $tmp1, $tmp3, $fp_prob );
if ($cgi_context) {
$fp_prob =~ s/-/−/g; # Use minus sign instead of hyphen, to avoid inappropriate line-break; or we could use ‑ non-breaking hyphen
}
&timeit_end('display_prob');
return $fp_prob;
}#display_prob
# a=s
# Given n, k & p, calculate and return P(X=k)
# The "R" suffix means this is the BigRat version
# n & k should be scalar integers, p is a BigRat.
# Result ($probability) will be a BigFloat.
sub calc_binom_probR {
my ($n,$k,$p) = @_;
$p = Math::BigRat->new($p);
my $probability = Math::BigRat->new(0);
if ($dbg >= 1) {
&wprint( "dbg: calc_binom_probR( n=" . dspl($n) . ", k=" . dspl($k) . ", p=" . dspl($p) . " )...\n" );
}
&timeit_start('bnok');
my $nchoosek = Math::BigInt->new($n);
$nchoosek = $nchoosek -> bnok( $k );
&timeit_end('bnok');
if ($dbg >= 1) {
&wprint( "dbg: $n -> bnok($k) = nchoosek = " . dspl($nchoosek) . "\n" );
}
if ($dbg >= 2) {
my $commafied_nchoosek = commafy($nchoosek);
if ($cgi_context) {
print "
\n";
print "| ( | $n | ) | = $commafied_nchoosek |
| $k |
| ( | $n | ) | = $commafied_nchoosek |
| $k |
| ( | $n | ) | = $commafied_nchoosek |
| $k |
n=$n0, k=$k0, (n choose k) = overflow";
if ('q' eq $p_a) {
print "
\nTry again, in 'Slow' mode.
Probabilities:
\n"
. " P( X = k ): $fp_prob
\n" );
} else {
&wprint( "Probabilities:\n"
. " P( X = k ): $fp_prob\n" );
}
if ($k0 > 0) {
for (my $i0=0; $i0 < $k0; $i0++) {
&timeit_start('cumu_sum');
my $probability_tmp = &$calc_binom_prob( $n0, $i0, $p );
if ('h' eq $p_a) {
# "hybrid" mode: convert from 64-bit floating point to BigFloat for summing
$probability_tmp = Math::BigFloat->new($probability_tmp);
}
$cummu_0_to_im1 += $probability_tmp;
if ($probability_tmp > $max_addend) {
$max_addend = $probability_tmp;
}
&timeit_end('cumu_sum');
}
}
# Then display the cumulative probabilities
my $cprob1;
&timeit_start('cumu_sum');
if ($n0 == $k0) {
$cprob1 = 1;
} else {
my $probability_tmp = $probability;
if ('h' eq $p_a) {
# in "hybrid" mode we calculate probabilities in 64-bit floating point, but sum them as BigFloats
$probability_tmp = Math::BigFloat->new($probability_tmp);
}
$cprob1 = $cummu_0_to_im1 + $probability_tmp;
if ($probability_tmp > $max_addend) {
$max_addend = $probability_tmp;
}
}
my $cprob2 = 1.0 - $cprob1;
my $cprob3 = 1.0 - $cummu_0_to_im1;
&timeit_end('cumu_sum');
if ($dbg>1) { print "dbg: fp_cprob0 -- p='$p', k0='$k0'\n"; }
my $fp_cprob0 = &display_prob( $cummu_0_to_im1, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 > 0) );
if ($dbg>1) { print "dbg: fp_cprob1 -- p='$p', k0='$k0', n0='$n0'\n"; }
my $fp_cprob1 = &display_prob( $cprob1, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 < $n0) );
if ($dbg>1) { print "dbg: fp_cprob2 -- p='$p', k0='$k0', n0='$n0'\n"; }
my $fp_cprob2 = &display_prob( $cprob2, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 < $n0) );
if ($dbg>1) { print "dbg: fp_cprob3 -- p='$p', k0='$k0'\n"; }
my $fp_cprob3 = &display_prob( $cprob3, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 > 0) );
if ($cgi_context) {
print( " P( X < k ): $fp_cprob0
\n"
. " P( X ≤ k ): $fp_cprob1
\n"
. " P( X > k ): $fp_cprob2
\n"
. " P( X ≥ k ): $fp_cprob3
| k | P(X=k) | " . "range | " . "P(X in range) | 1 - P(X in range) |
|---|---|---|---|---|
| $i0 | $fp_prob | " . "0..$i0 | " . "$fp_cummu_to_i | $fp_one_minus_cummu_to_i |
ERROR: Server timed out.
\n" .
"The calculation was too computationally intensive to run on my server. But if you " .
'download binomcalc.pl ' .
"and run it on your own computer, you can let it run for as long as necessary.