#!/usr/bin/perl use strict; use warnings; # push @INC,'.'; # require "traceback.pl"; # handy for debugging, but I'm not using it now # This program calculates binomial probabilities (or, optionally, just the binomial coefficient). # Unlike most binomial calculators, this one goes to considerable effort to avoid overflow and # underflow problems, so it works correctly when most others fail. # Copyright 2021, by David A. Burton # Cary, NC USA # Mobile / WhatsApp: +1 919-244-3316 # Email: https://sealevel.info/contact.html # This is free, open source software, but it is copyrighted. You are welcome to use it, in whole # or in part, for any purpose. However, if you do so, I require that you retain this notice, or # a substantially equivalent notice, in copies or substantial excerpts of the program. # TLIB Version Control fills in the version information for us: our $version_str; #--=>keyflag<=-- "&(#)%n, version %v, %11d " $version_str = "&(#)binomcalc.cgi, version 92, 29-Apr-2023 "; our $trimmed_version_str; if ($version_str =~ /, (version [0-9]+, [0-9]*\-[a-zA-Z]{3}\-[0-9]+) /) { $trimmed_version_str = "$1"; } else { $trimmed_version_str = ''; } $| = 1; # We would need cgi.pm for "user_agent" lookup, and some convenience functions, like header(), param(), etc. # But I'm not using any of those. # use CGI ':standard'; # Example of usage: # <!--#INCLUDE VIRTUAL="/cgi-bin/test_cgi.cgi?n=12&k=3&p=1/2" --> # our $Inf = 2e262144; # Scalar floating-point infinity our $POS_NORM_SMALLEST = 2.22507385850720138e-308; # smallest possible normalized positive IEEE 64-bit float value our $POS_DENORM_SMALLEST = 4.94065645841247e-324; # smallest possible denormalized (subnormal) positive IEEE 64-bit float value use Scalar::Util qw(blessed isdual reftype tainted isvstring looks_like_number set_prototype); use Time::HiRes qw(time sleep); # use lib '/root/perl5/lib/perl5/x86_64-linux-thread-multi/Math'; use lib '/root/perl5/lib/perl5'; # for GMP use Math::GMP; # Do this so it'll fail if GMP's missing (otherwise it falls back to the slow lib). If it fails try installing: "cpan Math::GMP" "cpan Math::BigInt::GMP" use Math::BigInt lib => 'GMP'; use Math::BigFloat lib => 'GMP'; use Math::BigRat lib => 'GMP'; # use bignum; <--- Do not do this! (My code expects scalars by default.) # Math::BigFloat->accuracy(500); <--- I'll do this further down (number of digits is now adjustable) # printf(" Bigfloat version = " . $Math::BigFloat::VERSION . "\n" ); BEGIN { # Polyfill for missing nparts method prior to BigFloat version 1.999808 (circa 2017) if (!exists &Math::BigFloat::nparts) { *Math::BigFloat::nparts = sub { my $x = shift; my ($mantis, $exp) = $x->parts(); # both are BigInts my $len = $mantis->length(); # number of digits in the mantissa $exp += ($len-1); # $mantis = Math::BigFloat->new($mantis) / (10**($len-1)); <== this fails for large $len, due to overflow $mantis = Math::BigFloat->new($mantis) / (Math::BigFloat->new(10)**($len-1)); return ($mantis,$exp); }; } } our $dbg = 0; # default our $echo_cmdline = 1; # normally we display command line, but v=0 can suppress it our $vrblx; # temp value used by &dspl() our $typefunc_avail = 0; # needed by sub dspl(), and set to 1 if we successfully load type() from autobox our $in_midst_of_generating_acu_table = 0; # if <table></table> is being generated, this will be 1 (so we can close the table on a timeout) # What is the name of this program file? our $scriptname = $0; if ($scriptname =~ /([^\/\\\:]+)$/) { $scriptname = $1; # just the file name, not the directory } our $scriptdotpl = $scriptname; # this script name, which could end in .pl or .cgi $scriptdotpl =~ s/\.cgi$/.pl/; # same, except always ends in .pl our $parameters1 = $ENV{'QUERY_STRING'}; # when invoked via server-side include our $parameters2 = join(' ',@ARGV); # when run standaline, at a command line (any OS) our $cgi_context; our $parameters; if ($parameters2) { $cgi_context = 0; $parameters = ' ' . $parameters2; # from @ARGV, so it must be at a command prompt } elsif ($parameters1) { $cgi_context = 1; $parameters = ' ' . $parameters1; # from $ENV{'QUERY_STRING'}, so it's probably CGI } else { # no parameters, so is it in CGI context, or not? $cgi_context = ($scriptname =~ /\.cgi$/); # best guess is from the filename extension: .pl or .cgi $parameters = ''; } # The parameters will be processed further down, for the most part, but here we # take a sneek peek at two of them: v={verbosity} and o={outputmode}. our $p_v = ''; # verbosity (debug level) if ($parameters =~ /[ \-\&]v=([0-9\-]+)/) { $p_v = $1; $dbg = 0 + $p_v; if ($dbg <= 0) { $echo_cmdline = 0; # explicit v=0 supresses echoing the command line } } # The o=... parameter is optional, for overriding the output mode: our $p_o = ''; # h = html / cgi, t = plain text, p = <pre> block if ($parameters =~ /[ \-\&]o=([htp])/) { $p_o = $1; $cgi_context = ('h' eq $1); } # In a cgi context, we have to emit this magic line, before we can write anything else. if ($cgi_context || ('p' eq $p_o)) { print "Content-Type: text/html\n\n"; # in case it's running as a .cgi script } if ($dbg>0) { &wprint( "$scriptname $trimmed_version_str\n" ); &wprint( "OS = '$^O'\n" ); # What OS is this? &wprint( "Perl version " . $] . " = " . $^V . "\n" ); &wprint( 'Math::BigFloat->VERSION = ' . Math::BigFloat->VERSION . "\n" ); &wprint( 'Math::BigInt::GMP->VERSION = ' . Math::BigInt::GMP->VERSION . "\n" ); } elsif ($cgi_context) { print( "<!--\n" . " $scriptname $trimmed_version_str\n" . " OS = '$^O'\n" , " Perl version " . $] . " = " . $^V . "\n" . ' Math::BigFloat->VERSION = ' . Math::BigFloat->VERSION . "\n" . ' Math::BigInt::GMP->VERSION = ' . Math::BigInt::GMP->VERSION . "\n" . "-->\n\n" ); } # Because binomial probability calculations can be very computationally intensive, # we need to ensure that we don't end up with too many instances running, lest it # monopolize my little web server. (I don't bother with this if running on the # desktop.) our $num_server_instances = 0; if (($^O =~ /linux/i) && ($scriptname =~ /\.cgi$/)) { $num_server_instances = 1; # my $tmp = `pgrep -a binomcalc\\.cgi`; # &wprint("PGREP output:\n$tmp\n-----------\n"); my $tmpcount = `pgrep -c binomcalc\\.cgi`; $num_server_instances = 0 + $tmpcount; if ($dbg>0) { &wprint("Instance count = '$num_server_instances'\n"); } } if ($num_server_instances > 2) { # The usual case is one instance running (this process). # My server has four cores, so I allow at most two instances, to avoid bogging it down too much. print "<p><b style='color:red'>ERROR: Server busy. Wait a few minutes and try again.</b><br>\n" . '(Or <a href="https://sealevel.info/binomcalc.sphp#download">download binomcalc.pl</a>,' . " and run it on your own computer.)</p>\n"; exit 1; } if ((!$parameters) && !$cgi_context) { # no parameters specified -- print help message and quit &givehelp("** ERROR: no parameters were specified to $scriptname"); exit 1; } if ($dbg >= 1) { &wprintf("dbg: QUERY_STRING='%s', ARGV='%s'\n", (defined $parameters1) ? $parameters1 : 'UNDEF', (defined $parameters2) ? $parameters2 : 'UNDEF' ); } # define type() for debugging: if ($dbg>0) { # use autobox::universal qw(type); if (eval {require autobox::universal;}) { autobox::universal->import('type'); $typefunc_avail = 1; } else { if ($cgi_context) { print '<p><b>Warning:</b> Cannot locate autoboxx/universal.pm in @INC. You may need to install autoboxx, like this:' . "<br>\n" . "cpan autobox::universal\n</p>\n"; } else { print 'Warning: Cannot locate autoboxx/universal.pm in @INC (you may need to install autoboxx)' . "\n" . "In Strawberry Perl you can get autobox like this:\n" . " 1. from a Windows Administrator (elevated) command prompt...\n" . " 2. c:\n" . " 3. cd \\strawberry\\perl\\bin\n" . " 4. cpan autobox::universal\n\n"; } eval("sub type { return '?';}"); } } # for benchmarking; the only parameter is an arbitrary name our %runtimes = (); our %runtime_cnt = (); our %interval_started_at = (); # Call this at start of timing interval. (If it's called more than once, the 2nd call just increments the runtime_cnt.) sub timeit_start { my $name = shift; if (defined $interval_started_at{$name}) { $runtime_cnt{$name} += 1; } else { $interval_started_at{$name} = time(); if (!exists $runtime_cnt{$name}) { $runtime_cnt{$name} = 0; $runtimes{$name} = 0.0; } } } # Call this at the end of the timing interval. sub timeit_end { my $name = shift; if (!exists $interval_started_at{$name}) { die "ERR: timeit_end('$name'), but timeit_start wasn't called."; } my $time_increment = time() - $interval_started_at{$name}; delete $interval_started_at{$name}; $runtimes{$name} += $time_increment; $runtime_cnt{$name} += 1; } # Call this to reset timer (forget previous timings). sub timeit_reset { my $name = shift; delete $interval_started_at{$name}; delete $runtimes{$name}; delete $runtime_cnt{$name}; } # If a name is specified, display the timing stats for that name; # otherwise, display them all. sub timeit_display { my $name; if ($#_ >= 0) { $name = shift; if (exists $interval_started_at{$name}) { &timeit_end($name); } &wprintf("Time %s, %f / %d = %f each\n", $name, $runtimes{$name}, $runtime_cnt{$name}, ($runtimes{$name} / $runtime_cnt{$name}) ); } else { foreach $name (sort keys %runtimes) { &timeit_display($name); } } } # return a description of the input paramter (for my debug prints) sub dspl { local($vrblx) = $_[0]; my( $result, $tp, $rp ); if (!defined $vrblx) { $result = 'UNDEF'; if ($dbg > 2) { &wprint( "dbg: dspl(UNDEF)" ); } } else { $tp = ''; $rp = ''; if ($typefunc_avail) { $tp = &type($vrblx); if (('STRING' eq $tp) and looks_like_number($vrblx)) { if ($vrblx =~ /^(\-|)[0-9]+$/) { $tp = 'Integer'; } else { $tp = 'Float'; } } } if (('' eq $tp) or ('HASH' eq $tp)) { $rp = ref($vrblx); } if ($dbg > 2) { &wprint( "dbg: dspl(), type=$tp, ref=$rp,\n" ); } if (('' eq $rp) && ('' eq $tp)) { $result = "$vrblx"; } elsif ('' ne $rp) { $result = "$rp $vrblx"; } elsif ('STRING' eq $tp) { $result = "$tp '$vrblx'"; } else { $result = "$tp $vrblx"; } } if ($dbg > 2) { &wprint( " result='$result'\n" ); } return $result; } # v= and o= have already been handled; here we process the rest of the parameters. our ($p_n, $p_k, $p_p, $p_m, $p_a, $p_t, $p_d, $p_b); $p_p = "0.5"; # default $p_m = 'b'; # default $p_a = 'q'; # default our $accuracy = 500; # default is 500 digits except for a=q, which is about 14 digits (note: BigFloat default is 40) our $effective_accuracy = $accuracy; sub process1param { my ($opt,$val) = @_; # &wprintf(" opt='%s' val='%s'\n", $opt, $val); if ('n' eq $opt) { if ($val =~ /^([1-9][0-9]*)$/) { $p_n = $1; } else { &wprintf("ERROR: 'n=%s' (should be a positive integer)\n", $val); exit 1; } } elsif ('k' eq $opt) { if ($val =~ /^([0-9]+)$/) { $p_k = $1; } else { &wprintf("ERROR: 'k=%s' (should be 0..n)\n", $val); exit 1; } } elsif ('p' eq $opt) { if ($val =~ /^([0-9\/\.eE\-\+]+)$/) { # if it contains a / then it's a rational number; the e, E, -, and + are so that it can be in scientific notation $p_p = $1; } else { &wprintf("ERROR: 'p=%s' (should be 0..1)\n", $val); exit 1; } } elsif ('m' eq $opt) { # the m=... parameter must be 'b' (default Binomial probability), 'cu' (CUmulative probability), 'acu' (All CUmulative probabilities), or 'co' (binomial COefficient) if ($val =~ /^(b|cu|acu|co)$/i) { $p_m = lc $1; } else { &wprint("ERROR: 'm=$val' (mode) parameter to $scriptname may only be b, cu, acu, or co.\n"); exit 1; } } elsif ('a' eq $opt) { # the a=... parameter must be 's' (Slow = GMP BigRat), 'q' (Quick = scalar float = default), 'h' (Hybrid), 'x' (eXperimental = BigFloat), or 'xx' (also eXperimental) if ($val =~ /^(x(x|)|s|q|h)$/) { $p_a = lc $1; } else { &wprintf("ERROR: 'a=$val' (accuracy) parameter may only be q, h, s, x or xx.\n"); exit 1; } } elsif ('t' eq $opt) { # the t=... (timeout) parameter is optional if ($val =~ /^([0-9\.]*)$/) { $p_t = $1; } else { &wprint("ERROR: 't=$val' (timeout) parameter must be numberic (max number of seconds).\n"); exit 1; } } elsif ('d' eq $opt) { # the d=... (digits) parameter is optional (but ignored for a=q) if ($val =~ /^([0-9]+)$/) { $p_d = $1; $effective_accuracy = $accuracy = 0 + $p_d; } else { &wprint("ERROR: 'd=$val' (digits of precision) parameter must be a non-negative integer.\n" . "(Default is 500 digits, except for a=q, which is about 15 digits.)\n"); exit 1; } } elsif ('b' eq $opt) { # the b=1 (benchmark) parameter is optional if ($val =~ /^([01])$/) { $p_b = $1; } else { &wprint("ERROR: 'b=$val' (benchmark) parameter must be 0 or 1.\n"); exit 1; } } elsif (('v' eq $opt) || ('o' eq $opt)) { # handled above } else { &wprintf("ERROR: unrecognized parameter to $scriptname: '%s=%s'\n", $opt, $val); &wprint("parameters='$parameters'\n"); exit 1; } } my $parm; for $parm (split(/\s+\-|\s+|\&/, $parameters)) { if ($dbg > 1) { &wprint( "dbg: parm='$parm'\n"); } if ('' ne $parm) { if ($parm =~ /^([a-zA-Z]+)=(.+)$/) { my $opt = lc $1; my $val = $2; &process1param( $opt, $val ); } else { &wprintf("ERROR: unrecognized parameter to $scriptname: '%s'\n", $parm); &wprint("Parameters='$parameters'\n"); exit 1; } } } # the k=... parameter is reqired # the n=... parameter is reqired # the p=... parameter defaults to 0.5 # the m=... parameter defaults to b # the a=... parameter defaults to q our $p; # Note: $p will be set waaaay down in the program if ((!defined $p_n) && !defined $p_k) { &givehelp(''); exit 1; } if ((!defined $p_n) || !defined $p_k) { &givehelp("ERROR: the n and k parameters are required.\n"); exit 1; } if ($accuracy < 14) { if ($accuracy < 1) { $effective_accuracy = $accuracy = 1; } &wprintf("Warning: 'd=%d' digits is less than the accuracy of the 'a=q' default 'quick' setting (about 14 digits). That's probably a mistake.\n", $accuracy ); } elsif ($dbg && ('q' ne $p_a) && (('h' ne $p_a) || ('b' ne $p_m))) { &wprintf("Computing with 'd=%d' digits of accuracy.\n", $accuracy ); } Math::BigRat->accuracy($accuracy); Math::BigFloat->accuracy($accuracy); if (($effective_accuracy > 13) && (('q' eq $p_a) || ('h' eq $p_a))) { # in a=q or a=h mode we calculate with 64-bit scalar floats, so effective accuracy is only about 14 digits $effective_accuracy = 13; # I'm being conservative } if ($echo_cmdline) { # echo the equivalent Perl command-line my $cmdline = 'perl ' . $scriptdotpl; if ($p_v) { $cmdline .= " v=$p_v"; } # if ($p_t ne '') { $cmdline .= " t=$p_t"; } if ($p_m) { $cmdline .= " m=$p_m"; } if ($p_a) { $cmdline .= " a=$p_a"; } if ($p_d) { $cmdline .= " d=$p_d"; } if ($p_b) { $cmdline .= " b=$p_b"; } if ($p_n) { $cmdline .= " n=$p_n"; } if ($p_k ne '') { $cmdline .= " k=$p_k"; } # note: k can be zero if ($p_p) { $cmdline .= " p=$p_p"; } if ($cgi_context) { print "<p><span style='background-color:#FFFF99;max-width:100%;padding:5px 10px 5px 3px' " . "title='This is the equivalent command line, to do this calculation on your own computer.'>$cmdline</span> </p>\n" } else { print "\n$cmdline\n"; } } sub givehelp { my $ermsg = shift; # First, reformat the TLIB version info, for prettier display # e.g., $version_str = "&(#)binomcalc.pl, version 42, 19-Dec-2020 "; my $vstr = $trimmed_version_str; if ('' ne $vstr) { $vstr = " ($vstr)"; } if (!defined $ermsg) { $ermsg = ''; } elsif ('' ne $ermsg) { $ermsg .= "\n\n"; } # Now display the Help message, including version number &wprint( "\n" . $ermsg . "$scriptdotpl -- extreme precision binomial calculator\n" . "by Dave Burton$vstr\n" . "\n" . "Note: you can rename this program to binomcalc.cgi and use it from a web page, via a\n" . "\"server-side include,\" as in this example:\n" . "\n" . " <!--#INCLUDE VIRTUAL=\"/cgi-bin/binomcalc.cgi?n=12&k=3&p=1/2\" -->\n" . "\n" . "That does the same calculation as this command:\n" . "\n" . " c:\\> perl -f binomcalc.pl n=12 k=3 p=1/2\n" . "\n" . "The following command-line parameters are supported:\n" . "\n" . " n={integer} (REQUIRED: the n of 'n choose k')\n" . " k={integer} (REQUIRED: the k of 'n choose k')\n" . " p={probability} (decimal value 0..1, e.g., 0.5, or a fraction, e.g., 1/3)\n" . " m={b, cu, acu, co} (b = binomial probability, cu = cumulative probabilities,\n" . " acu = all cumulative probabilities, co = coefficient only)\n" . " a={q, s, h, x, xx} (q = quick/scalar = default, s is slower w/ extreme precision,\n" . " h = hybrid, x and xx are experimental [don't use them])\n" . " o={h, t, p} (output: h = html, t = text, p = <pre>, default determined by context)\n" . " t=nnn (timeout limit, nnn seconds; default = 0 for no timeout)\n" . " b=1 (benchmark it; show timings)\n" . " d={integer} (digits of accuracy: default for a=s is 500; for a=q it's about 13)\n" . " v={integer} (verbosity; 0 = no debug prints, 1 = some, 2 = lots)\n" . "\n" . "Other notes:\n" . "1. Default mode is m=b (calculate binomial probability).\n" . "2. Default accuracy setting is a=q, which is fastest. For higher accuracy use a=s.\n" . " The x and xx (experimental) settings are usually slower and not as good.\n" ); # And then exit, with errorlevel = 1 exit 1; } # wrap a printable string in <p></p>, add <br>s, etc., if running in a CGI context sub wrap { my $str = shift; if ($cgi_context) { $str =~ s/\n$//; $str =~ s/\n /\n  /g; $str =~ s/\n /\n /g; $str =~ s/ / /g; $str =~ s/</</g; $str =~ s/\n/<br>\n/g; $str = "<p>$str</p>\n"; } return $str; } # Ensure that argument is padded to at least n characters, and roughly centered sub center { my ($s,$n) = @_; my $result = $s; my $l = length($s); if ($l <= $n) { my $pr = int(($n-$l)/2); my $pl = ($n-$l) - $pr; $result = (' ' x $pl) . $s . (' ' x $pr); # if there's a leading '<' or '>' then we might move the result one space to the left, to line up better with other m=acu results: if ($pl > $pr) { if ($result =~ / ([<>][^ ]*) /) { $result = substr($result,1) . ' '; } } } return $result; } # wrapped print -- does something reasonable either at command-line or in CGI context sub wprint { my $str = shift; print &wrap($str); } # wrapped printf -- does something reasonable either at command-line or in CGI context sub wprintf { my $fmt = shift; my $str = sprintf( $fmt, @_ ); &wprint($str); } # insert commas into an integer if it's > 4 digits long sub commafy { my $number = shift; my @pieces = (); $number .= ''; # if ($dbg>0) { &wprint "dbg: number='$number'\n"; } if ((length($number) > 4) && ($number =~ /^(\-|)[0-9]+$/)) { while (length($number) > 0) { unshift(@pieces,substr($number,-3)); substr($number,-3) = ''; # if ($dbg>0) { # $tm1 = join(',',@pieces); # &wprint( "dbg: number='$number', pieces='$tm1'\n" ); # } } $number = join(',',@pieces); } # if ($dbg>0) { &wprint "dbg: number='$number'\n"; } return $number; }#comafy # Format a float or BigFloat into normalized scientific notation. # This is like the ->bnstr() method for BigFloats, but with mantissa rounded to N significant digits. # Input parameters are $x (either scalar float or BigFloat), and $n (number of significant digits). sub bnstrN { my ($x,$n) = @_; my $x2 = Math::BigFloat->new($x); # in case $x is scalar my $result; my ($mantis,$exp) = $x2->nparts(); # print "dbg: bnstrN:\n exp=" . &dspl($exp) . "\n"; $exp->bfround(0); # otherwise exponent might display as "-6.00000000000000000000000000000" instead of "-6" my $rounded_mantis = $mantis->copy(); $rounded_mantis->bfround(-($n-1)); if ($rounded_mantis =~ /^10\.([0-9]*)$/) { # Sometimes rounding "almost 1" results in "10.000000000e-1", this changes it to "1.0000000000" $rounded_mantis = '1.0' . $1; $exp++; } if ($exp) { if (ref($exp)) { # convert BigFloat to scalar, lest we get something like "1.00e-12.0000000000000" $exp = $exp->numify(); } $result = $rounded_mantis . 'e' . $exp; } else { $result = $rounded_mantis; } # print "dbg: bnstrN:\n exp=$exp\n mantis=$rounded_mantis\n result=$result\n"; return $result; } # if input is 0.005 then result is 2 (there are two zeros after the decimal point) sub number_of_zeros_after_the_decimal_point { my $x = shift; my $result; if (ref $x) { # BigFloat or BigRat if ($x <= 0) { # negative is only possible due to accumulated rounding errors $result = $effective_accuracy - 1; } else { my $result2 = int(abs(log10($x))); # print "dbg: x=" . &dspl($x) . " result2=" . &dspl($result2) . "\n"; $result = $result2->numify(); } } else { # scaler 64-bit float if ($x <= 0) { $result = $effective_accuracy - 1; if ($dbg>0) { print "dbg: x.LE.0, x=" . &dspl($x) . " result=" . &dspl($result) . "\n"; } } else { $result = int(abs(log10($x))); } } if ($dbg>0) { print "dbg: number_of_zeros_after_the_decimal_point(" . &dspl($x) . ") = " . &dspl($result) . "\n"; } return $result; } # Depending on how tiny the result is, we might display it in both floating point and scientific notation, or just in one or the other. # First parameter ($prob) should be a scalar float or BigFloat in [0..1]. # (Due to accumulated rounding errors, $prob can sometimes be slightly < 0 or slightly > 1. We handle that gracefully.) # Optional second parameter ($verbosity) can be 1 (to display only "5.450654626823856e-368") or 2 (for "<0.000000000000001 = 5.450654626823856e-368"). # Third parameter ($digits) is the maximum number of sighificant digits with which to display the result (default 15). # (Note: do not confuse $digits with global $effective_accuracy, the number of digits ued in calculations.) # Forth parameter ($max_addend) is an optional parameter for effectively reducing the number of signifcant digits when $prob is a small number calculated from # sums & differences of larger-magnitude addends. For example, if digits is 6, then 1.000000 - 0.999990 = 0.000010 has only two significant digits, not six. # $max_addend should be the largest of the addends/subtrahends/minuends which were included in the calculation. # Fifth parameter ($definitely_not_exactly_0or1) can be passed as True to tell this subroutine that "zero" is really slightly larger than zero, # and "one" is always really slightly less than one. sub display_prob { &timeit_start('display_prob'); my ($prob, $verbosity, $digits, $max_addend, $definitely_not_exactly_0or1) = @_; my $prob_is_scalar = !(ref $prob); # true for IEEE 64-bit, false for BigRat or BigFloat if (!defined $verbosity) { $verbosity = 2; } if (!defined $digits) { $digits = 15; if ($dbg>0) { &wprint( "dbg: digits defaults to $digits\n" ); } } else { if ($dbg>0) { &wprint( "dbg: digits passed in as $digits\n" ); # &traceback("display_prob"); } } if (!defined $definitely_not_exactly_0or1) { $definitely_not_exactly_0or1 = 0; } if ($dbg>0) { printf( "dbg: display_prob( $prob, $verbosity, $digits, %s, $definitely_not_exactly_0or1 )\n", (defined $max_addend) ? $max_addend : '{undef}' ); } # first, we calcluate a plain floating point representation my $fp_prob; my $digits_fp = $digits; # floating point format digits (as opposed to scientific notation digits) my $fp_prob1; my $fp_prob1_digits_exceeded = 0; my $fp_prob1_is_tiny = 0; my $smallest_possible_fp = substr('0.00000000000000000000',0,$digits_fp+1) . "1"; my $all_nines_fp = substr('0.99999999999999999999',0,$digits_fp+1); if ($prob <= $smallest_possible_fp) { if (($prob <= 0) && !$definitely_not_exactly_0or1) { # exactly 0 and exactly 1 get special-cased $fp_prob = $fp_prob1 = substr('0.00000000000000000000',0,$digits_fp+2); } else { $fp_prob1 = $smallest_possible_fp; if ($cgi_context) { $fp_prob = '<' . $smallest_possible_fp; } else { $fp_prob = '<' . $smallest_possible_fp; } $fp_prob1_digits_exceeded = $fp_prob1_is_tiny = 1; } } elsif ($prob > $all_nines_fp) { if (($prob >= 1) && !$definitely_not_exactly_0or1) { $fp_prob = $fp_prob1 = substr('1.00000000000000000000',0,$digits_fp+2); } else { $fp_prob1 = $all_nines_fp; if ($cgi_context) { $fp_prob = '>' . $all_nines_fp; } else { $fp_prob = '>' . $all_nines_fp; } $fp_prob1_digits_exceeded = 1; } } else { my $fmt_f; if ($prob < 0.1) { $fmt_f = '%.' . ($digits_fp+1) . 'f'; # default "%.16f" $fp_prob = $fp_prob1 = sprintf($fmt_f, $prob); # Problem: 0.099999999999999 would print as 0.1, resulting in one too many digits; this is the hacky fix: if ($fp_prob =~ /^0.[1-9]/) { $fmt_f = '%.' . $digits_fp . 'f'; # default "%.15f" $fp_prob = $fp_prob1 = sprintf($fmt_f, $prob); } } else { $fmt_f = '%.' . $digits_fp . 'f'; # default "%.15f" $fp_prob = $fp_prob1 = sprintf($fmt_f, $prob); } # print "dbg: fmt_f = '$fmt_f'\n"; } my $len0 = length($fp_prob1); if ($dbg>0) { &wprintf("dbg: fp_prob1='$fp_prob1', fp_prob='$fp_prob'\n"); } # scientific notation is trickier... my $prob2_is_tiny = ($prob <= 0); my $prob_zeros = 0; my $max_addend_zeros = 0; if ($prob < 0.01) { $prob_zeros = number_of_zeros_after_the_decimal_point($prob); # ...but we only bother with scientific notation for probabilities less than 0.01 if (!defined $max_addend) { if ($prob_is_scalar) { $max_addend_zeros = 308; if ($dbg>0) { &wprint( "dbg: max_addend=undef and prob_is_scalar=1, so max_addend_zeros=$max_addend_zeros\n" ); } } else { # $max_addend = $prob; $max_addend_zeros = $prob_zeros; if ($dbg>0) { &wprint( "dbg: max_addend=undef and prob_is_scalar=0, set max_addend_zeros=prob_zeros=$max_addend_zeros (I probably need to fix this!)\n" ); } } } else { # we might not have as many digits of accuracy as we'd hoped. $max_addend_zeros = number_of_zeros_after_the_decimal_point($max_addend); if ($definitely_not_exactly_0or1 && $prob2_is_tiny & ($dbg>1)) { print "dbg: prob2_is_tiny, prob='$prob', max_addend_zeros=$max_addend_zeros, prob_zeros=$prob_zeros, digits=$digits\n"; } if ($dbg>0) { print "dbg: prob2_is_tiny=$prob2_is_tiny, definitely_not_exactly_0or1=$definitely_not_exactly_0or1, prob='$prob', max_addend_zeros=$max_addend_zeros, prob_zeros=$prob_zeros, digits=$digits\n"; } if ($prob_zeros > $max_addend_zeros) { if (($effective_accuracy - ($prob_zeros - $max_addend_zeros)) < ($digits + 3)) { if ($dbg>0) { print "dbg: effective_accuracy = " . dspl($effective_accuracy) . "\n"; print "dbg: prob_zeros = " . dspl($prob_zeros) . "\n"; print "dbg: max_addend_zeros = " . dspl($max_addend_zeros) . "\n"; print "dbg: digits = " . dspl($digits) . "\n"; } $digits = ($effective_accuracy - ($prob_zeros - $max_addend_zeros)) - 3; if ($dbg>0) { print "dbg: new digits = " . dspl($digits) . "\n"; } if ($digits < 2) { if ($digits <= 0) { $digits = 1; $prob2_is_tiny = 1; } else { $digits = 2; } if ($dbg>0) { print " new digits = " . dspl($digits) . "\n"; } } } } } if ($prob_is_scalar && ($prob > 0) && ($prob < $POS_NORM_SMALLEST)) { if ($dbg>0) { &wprintf( "dbg: scalar prob='%.16g' < POS_NORM_SMALLEST='%.16g'\n", $prob, $POS_NORM_SMALLEST ); } # result is so tiny that we lost digits of precision due to denormalized 64-bit scalar representation my $tmp = sprintf("%.1g", $prob); if ($tmp =~ /e(\-[0-9]+)$/i) { my $tmp_expon = $1; # should be -308..-324 because POS_NORM_SMALLEST = 2.22507385850720138e-308 and POS_DENORM_SMALLEST = 4.94065645841247e-324 if ($dbg>0) { &wprint( "dbg: scalar subnormal prob is approx. $tmp, digits was='$digits', tmp='$tmp', exp='$tmp_expon', tiny='$prob2_is_tiny'\n" ); } if( ($tmp_expon > -308) || ($tmp_expon < -324)) { &wprintf( "ERR: prob='%.16g' tmp='$tmp' exp='$tmp_expon' (outside expected range)\n", $prob ); exit 1; } if ($tmp_expon < -309) { my $tmp_sav_digits = $digits; $digits += ($tmp_expon + 309); # reduce number of digits if ($dbg>0) { &wprint( "dbg: digits reduced from $tmp_sav_digits to $digits\n" ); } if ($digits <= 1) { $digits = 1; $prob2_is_tiny = 1; } } if ($dbg>0) { &wprint( "dbg: prob='$prob', tmp='$tmp', exp='$tmp_expon', tiny='$prob2_is_tiny', new denorm scalar digits = " . dspl($digits) . "\n" ); } } } # print "dbg: prob2_is_tiny = '$prob2_is_tiny'\n"; if ($prob2_is_tiny) { my $tmp_expon = ($max_addend_zeros + $effective_accuracy - 2); if ($dbg>0) {print "dbg: prob2_is_tiny = True, expon='$tmp_expon', max_addend_zeros=$max_addend_zeros, effective_accuracy=$effective_accuracy, (defined max_addend)='" . (defined $max_addend) . "'\n"; } if ($prob_is_scalar && ($tmp_expon > 317)) { if ($dbg>0) {print "dbg: expon='$tmp_expon' -> 117 because max_addend=undef\n";} $tmp_expon = 317; } if (defined $max_addend) { # more conservative for cummulative sums, since they tend to accumulate errors $tmp_expon -= 1; } # if (0 == $prob) { # $fp_prob = '(underflow)'; # } else { $fp_prob = '1.0e-' . $tmp_expon; if ($cgi_context) { $fp_prob = '<' . $fp_prob; } else { $fp_prob = '<' . $fp_prob; } # } } else { if ($dbg>0) { &wprintf( "prob_zeros=$prob_zeros, max_addend_zeros=$max_addend_zeros, (prob_zeros > max_addend_zeros)=%d\n", ($prob_zeros > $max_addend_zeros) ); } if ($prob_zeros > $max_addend_zeros) { if (($effective_accuracy - ($prob_zeros - $max_addend_zeros)) < 1) { $prob2_is_tiny = 1; if (!$fp_prob1_is_tiny) { &wprint( "ERR: prob2 tiny but not prob1! Please note the exact parameter you used, and tell Dave!\n" . "prob='$prob'\n" . "prob1='$fp_prob1'\n" . "prob_zeros=$prob_zeros\n" . "max_addend_zeros=$max_addend_zeros\n" . "effective_accuracy=$effective_accuracy\n" ); } } } my $fp_prob2; # Here we generate what I call the %g version (but I don't really use %g anymore) #if ($prob_is_scalar) { # $fp_prob2 = sprintf( ('%#.' . ($digits-1) . 'e' ), $prob ); # '#' flag avoids removal of trailing decimal point # if ($dbg>0) { &wprint( "dbg: scalar fp_prob2='$fp_prob2' vs. '" . bnstrN($prob,$digits) . "'\n" ); } #} else { $fp_prob2 = bnstrN($prob,$digits); # scientific notation (do this because sprintf %g doesn't work for BigFloats) # } if ($dbg>0) { &wprint( "dbg: prob='$prob', digits=$digits, initial fp_prob2='$fp_prob2'\n" ); } if ($fp_prob2 =~ /^(\-|)([1-9])\.([0-9]*)e\-1$/) { # change "8.187225652655315e-1" to "0.8187225652655315" if ($dbg>0) { &wprint( "dbg: fp2 fixup: was '$fp_prob2'\n"); } $fp_prob2 = $1 . '0.' . $2 . $3; if ($dbg>0) { &wprint( " now '$fp_prob2'\n"); } } my $len2 = length($fp_prob2); if ($dbg>0) { &wprint( "dbg:\n fb0 = '$fp_prob'\n fb2 = '$fp_prob2'\n" ); } if (($fp_prob1 ne $fp_prob2) && ($fp_prob2 ne substr($fp_prob1,0,$len2))) { # the %f and %g versions are different, and the %g version isn't just %f version missing a digit or two at the end if ($prob2_is_tiny) { if ($fp_prob2 =~ /^([^e]*)e(.*)$/) { my $tmp = '1.0e' . (1+$2); # print "dbg: fp_prob2 is tiny = '$fp_prob2' -> '<$tmp'\n"; if ($cgi_context) { $fp_prob = '<' . $tmp; } else { $fp_prob = '<' . $tmp; } } else { &wprint("ERR: fp_prob2_is_tiny, fp_prob2='$fp_prob2', please tell Dave!\n"); } } elsif ($verbosity <= 1) { $fp_prob = $fp_prob2; # just use the scientific notation version } elsif ((!$fp_prob1_digits_exceeded) && ($len0 < $len2) && ($fp_prob2 !~ /e/i)) { # the %g version is in plain floating point notation, but has more digits than the %f version, so use the %g version $fp_prob = $fp_prob2; # I'm unsure whether this can really happen } else { # display both versions if ($cgi_context) { $fp_prob = $fp_prob . ' = ' . $fp_prob2; } else { $fp_prob = $fp_prob . ' = ' . $fp_prob2; } } } } } # printf("dbg: display_prob:\n tmp3='%s'\n fp_prob='%s'\n", $tmp1, $tmp3, $fp_prob ); if ($cgi_context) { $fp_prob =~ s/-/−/g; # Use minus sign instead of hyphen, to avoid inappropriate line-break; or we could use ‑ non-breaking hyphen } &timeit_end('display_prob'); return $fp_prob; }#display_prob # a=s # Given n, k & p, calculate and return P(X=k) # The "R" suffix means this is the BigRat version # n & k should be scalar integers, p is a BigRat. # Result ($probability) will be a BigFloat. sub calc_binom_probR { my ($n,$k,$p) = @_; $p = Math::BigRat->new($p); my $probability = Math::BigRat->new(0); if ($dbg >= 1) { &wprint( "dbg: calc_binom_probR( n=" . dspl($n) . ", k=" . dspl($k) . ", p=" . dspl($p) . " )...\n" ); } &timeit_start('bnok'); my $nchoosek = Math::BigInt->new($n); $nchoosek = $nchoosek -> bnok( $k ); &timeit_end('bnok'); if ($dbg >= 1) { &wprint( "dbg: $n -> bnok($k) = nchoosek = " . dspl($nchoosek) . "\n" ); } if ($dbg >= 2) { my $commafied_nchoosek = commafy($nchoosek); if ($cgi_context) { print "<hr>\n"; print "<table>\n"; print "<tr style='margin:45px 0'><td rowspan=2 style='font-size:38pt;padding:0 0 10px 0;font-family:Arial Light;Arial;font-weight:lighter'>(</td><td style='vertical-align:top'><b>$n</b></td><td rowspan=2 style='font-size:38pt;padding:0 0 10px 0;font-family:Arial Light;Arial;font-weight:lighter'>)</td><td rowspan=2 style='vertical-align:middle'> = $commafied_nchoosek</td></tr>\n"; print "<tr><td><b>$k</b></td></tr>\n"; print "</table>\n"; print "<hr>\n"; } else { print "$n choose $k = $commafied_nchoosek\n"; } } my $one = Math::BigRat->new(1); my $one_minus_p = $one - $p; if ($dbg >= 1) { &wprint( "p = $p\n1-p = $one_minus_p\n" ); } my $tmpa = $p -> copy(); if ($dbg >= 3) { &wprint( "initial tmpa = $tmpa\n" ); } my $tmpb = $one_minus_p -> copy(); if ($dbg >= 3) { &wprint( "initial tmpb = $tmpb\n" ); } &timeit_start('bpow'); if (0 == $k) { # On Linux, the GMP BigRat gives X^0 = X instead of 1; this works around that bug $tmpa = $one; } else { $tmpa = $tmpa->bpow($k); } &timeit_end('bpow'); &timeit_start('bpow'); if ($n == $k) { $tmpb = $one; } else { $tmpb = $tmpb->bpow($n-$k); } &timeit_end('bpow'); if ($dbg >= 3) { &wprint( "dbg[3]: a=$tmpa, b=$tmpb, nchoosek=$nchoosek\n" ); } $probability = $tmpa * $tmpb * $nchoosek; # $probability = $probability->numify(); # believe it or not, if p is a ratio, then numufy() can be 70% of the runtime # Convert from BigRat to BigFloat # $probability = $probability->as_float(4000); $probability = Math::BigFloat->new($probability->numerator()) / $probability->denominator(); if ($dbg >= 2) { &wprint( "calc_binom_probR( $n, $k, $p ) = $probability\n" ); } # if ($dbg >= 2) { &wprint( "dbg: calc_binom_probR() returning, result = " . &dspl($probability) . "\n" ); } return $probability; } # a=x # Given n, k & p, calculate and return P(X=k) # The "F" suffix means this is the BigFloat (slow) version (which turns # out to be superior to the BigRat version, most of the time). # n & k should be scalar integers, p is a BigFloat. # Result ($probability) will be a scalar float. sub calc_binom_probF { my ($n,$k,$p) = @_; my $probability = Math::BigFloat->new(0); &timeit_start('nchoosek'); my $nchoosek = Math::BigFloat->new($n); $nchoosek = $nchoosek -> bnok( $k ); &timeit_end('nchoosek'); if ($dbg >= 2) { my $commafied_nchoosek = commafy($nchoosek); if ($cgi_context) { print "<hr>\n"; print "<table>\n"; print "<tr style='margin:45px 0'><td rowspan=2 style='font-size:38pt;padding:0 0 10px 0;font-family:Arial Light;Arial;font-weight:lighter'>(</td><td style='vertical-align:top'><b>$n</b></td><td rowspan=2 style='font-size:38pt;padding:0 0 10px 0;font-family:Arial Light;Arial;font-weight:lighter'>)</td><td rowspan=2 style='vertical-align:middle'> = $commafied_nchoosek</td></tr>\n"; print "<tr><td><b>$k</b></td></tr>\n"; print "</table>\n"; print "<hr>\n"; } else { print "$n choose $k = $commafied_nchoosek\n"; } } &timeit_start('ones'); my $one = Math::BigFloat->new(1); my $one_minus_p = $one - $p; &timeit_end('ones'); if ($dbg >= 2) { &wprint( "p = $p\n1-p = $one_minus_p\n" ); } &timeit_start('bpows'); my $tmpa = $p -> copy(); my $tmpb = $one_minus_p -> copy(); if (0 == $k) { # On Linux, the GMP BigRat gives X^0 = X instead of 1. I've not seen that bug for BigFloat, # but this makes sure. $tmpa = $one; } else { $tmpa = $tmpa->bpow($k); } &timeit_start('bpows'); if ($n == $k) { $tmpb = $one; } else { $tmpb = $tmpb->bpow($n-$k); } &timeit_end('bpows'); if ($dbg >= 2) { &wprint( "dbg: a=$tmpa, b=$tmpb, nchoosek=$nchoosek\n" ); } &timeit_start('prob'); $probability = $tmpa * $tmpb * $nchoosek; &timeit_end('prob'); if ($dbg >= 2) { &wprint( "The probability of $n choose $k (w/ p=$p) is $probability\n" ); } # $probability = $probability->numify(); return $probability; } # Calculate binomial coefficient, "n choose k". Equivalent to .bnok() but for scalars: # n! / ((k!) * ((n-k)!)) sub n_choose_k { my ($n, $k) = @_; my $result = 1; if ((!defined $k) || ($k < 0) || ($k > $n)) { die "ERROR: Bad parameters to n_choose_k($n,$k), stopped\n"; } else { my $z = $n - $k; if ($z > $k) { ($z, $k) = ($k, $z); # swap 'em, so that $k >= $z } # At this point we know that $n >= $k >= $z # n! is a multiple of k!, so we'll skip those multiplications & divisions. # Additionally, we know that n-k = z, so counting k up to n will take the # same number of loops as counting z down to 1, which means we only need # to test one of the two counters. my $x = $k+1; while ($x <= $n) { # print "dbg: x = $x, z = $z\n"; $result *= $x; $x += 1; $result /= $z; $z -= 1; } } if ($dbg>0) { &wprint( "dbg: ($n choose $_[1]) = $result\n" ); } return $result; } # Given n, k & p, calculate and return P(X=k) # The "S" suffix means this is the short / scalar ("quick") version. # Inputs are assumed to be scalars (m=int, k=int, p=float). # No bignums are used. # The only problem with this simple version is that it's vulnerable to # overflow of "n choose k" and underflow of "p^k" and "(1-p)^(n-k)". # *** THIS SUBROUTINE IS NOT CURRENTLY USED *** sub calc_binom_probS { my ($n,$k,$p) = @_; my $probability1 = 0; if ($dbg>0) { my $n_dspl = dspl($n); my $k_dspl = dspl($k); &wprint("dbg: calc_binom_probS( n=$n_dspl, k=$k_dspl )\n"); } my $nchoosek1 = $n; $nchoosek1 = &n_choose_k($n,$k); if ($dbg>0) { &wprint("dbg: n=" . dspl($n) . ", k=" . dspl($k) . ", nchoosek=" . dspl($nchoosek1) . "\n"); } if ($dbg >= 2) { my $commafied_nchoosek = commafy($nchoosek1); if ($cgi_context) { print "<hr>\n"; print "<table>\n"; print "<tr style='margin:45px 0'><td rowspan=2 style='font-size:38pt;padding:0 0 10px 0;font-family:Arial Light;Arial;font-weight:lighter'>(</td><td style='vertical-align:top'><b>$n</b></td><td rowspan=2 style='font-size:38pt;padding:0 0 10px 0;font-family:Arial Light;Arial;font-weight:lighter'>)</td><td rowspan=2 style='vertical-align:middle'> = $commafied_nchoosek</td></tr>\n"; print "<tr><td><b>$k</b></td></tr>\n"; print "</table>\n"; print "<hr>\n"; } else { print "$n choose $k = $commafied_nchoosek\n"; } } my $one_minus_p1 = 1 - $p; if ($dbg >= 2) { &wprint( "p = $p\n1-p = $one_minus_p1\n" ); } my $tmpa1 = $p; my $tmpb1 = $one_minus_p1; $tmpa1 = $tmpa1 ** $k; $tmpb1 = $tmpb1 ** ($n-$k); if ($dbg >= 2) { &wprint( "dbg: a=$tmpa1, b=$tmpb1, nchoosek=$nchoosek1\n" ); } $probability1 = $tmpa1 * $nchoosek1 * $tmpb1; if ($dbg >= 2) { &wprint( "The probability of $n choose $k (w/ p=$p) is $probability1\n" ); } return $probability1; } # Given n, k & p, calculate and return P(X=k) # The "S2" suffix means this is the 2nd short / scalar version. # Inputs are assumed to be scalars (m=int, k=int, p=float). # No bignums are used. # This version is more robust than the "S" version. It fixes the possible # overflow of "n choose k" but not the underflow of "p^k" and "(1-p)^(n-k)". # Here's a test case that this version fixes: # perl binomcalc.pl -n=2000 -k=890 -p=0.46 -d=2 # result = 0.007243291418493 # (Berman gets that one right; Vassarstats gets it wrong in the 3rd digit.) # *** THIS SUBROUTINE IS NOT CURRENTLY USED *** sub calc_binom_probS2 { my ($n,$k1,$p) = @_; my $k = $k1; my $probability = 0; my $result = 1; my $cntr = 1; my $tmpmsg; # for debug prints if ($dbg>0) { my $n_dspl = dspl($n); my $k_dspl = dspl($k); &wprint("dbg: calc_binom_probS2( n=$n_dspl, k=$k_dspl, p=$p )\n"); } # my $nchoosek = &n_choose_k($n,$k); # if ($dbg>0) { # &wprint("dbg: n=" . dspl($n) . ", k=" . dspl($k) . ", nchoosek=" . dspl($nchoosek) . "\n"); # } my $one_minus_p = 1 - $p; if ($dbg >= 2) { &wprint( "p = $p\n1-p = $one_minus_p\n" ); } my $tmpa = $p; my $tmpb = $one_minus_p; $tmpa = $tmpa ** $k; $tmpb = $tmpb ** ($n-$k); if ($dbg >= 2) { &wprint( "dbg: a=$tmpa, b=$tmpb\n" ); } # $probability = $tmpa * $nchoosek * $tmpb; # Binomial coefficient, "n choose k" is equivalent to .bnok() but for scalars: # n! / ((k!) * ((n-k)!)) my $z = $n - $k; if ($z > $k) { ($z, $k) = ($k, $z); # swap 'em, so that $k >= $z } # At this point we know that $n >= $k >= $z my $mul_by_a_done = 0; my $mul_by_b_done = 0; # The result we need is (n choose k) * $tmpa * $tmpb. # (n choose k) is often very, very large. $tmpa and $tmpb are often very, very tiny. # Calculate (n choose k). # # n! is a multiple of k!, so we'll skip those multiplications & divisions. # Additionally, we know that n-k = z, so counting k up to n will take the # same number of loops as counting z down to 1, which means we only need # to test one of the two counters. # # To avoid loss of precision in extreme cases, if the (n choose k) value begins to approach # the limit of what can be represented in a IEEE 754 double float (a little over 1e308) then # we do the multiplications by $tmpa and perhaps $tmpb in the midst of the calculation of # (n choose k). my $x = $k+1; while ($x <= $n) { # print "dbg: x = $x, z = $z\n"; $result *= $x; $x += 1; $result /= $z; $z -= 1; if ($result > 1e290) { if (!$mul_by_a_done) { if ($dbg>0) { $tmpmsg = "dbg: early mul by a, X before = $result, a=$tmpa,"; } $result *= $tmpa; $mul_by_a_done = 1; if ($dbg>0) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); } } if ($result > 1e290) { if (!$mul_by_b_done) { if ($dbg>0) { $tmpmsg = "dbg: early mul by b, X before = $result, b=$tmpb,"; } $result *= $tmpb; $mul_by_b_done = 1; if ($dbg>0) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); } } } } $cntr++; } if (!$mul_by_a_done) { if ($dbg>0) { $tmpmsg = "dbg: late mul by a, X before = $result, a=$tmpa,"; } $result *= $tmpa; if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); } } if (!$mul_by_b_done) { if ($dbg>0) { $tmpmsg = "dbg: late mul by b, X before = $result, b=$tmpb,"; } $result *= $tmpb; if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); } } $probability = $result; if ($dbg >= 2) { &wprint( "The probability of $n choose $k (w/ p=$p) is $probability ($cntr loops)\n" ); } return $probability; } # base 10 logarithm sub log10 { my $n = shift; if ($n <= 0) { return -10000; } # if (($n < -1.911e-12) && ($n > -1.913e-12)) { # &traceback('log10'); # } return log($n)/log(10); } # Input is small (perhaps very tiny!) positive number, $p, between 0 and 1. # This function determines the maximum power to which $p can be raised without # the result having an exponent less than -100. # (This is used as part of an underflow avoidance strategy.) # E.g., if $p = 1E-5 then the result is 20. if $p = 1E-20 then the result is 10. sub max_nonunderflowing_power1 { my ($p) = @_; my $result; my $ap = abs($p); if ($ap <= 1e-100) { $result = 1; } elsif ($ap > 0.99977) { $result = 1000000; # arbitrary maximum of one million } else { my $tmp = abs(log10($ap)); # if p=1E-5 then this is 5 $result = int(100/$tmp); # if p=1E-5 then this is 20 } return $result; } # one-digit log10: 1 2 3 4 5 6 7 8 9 9.5 our @quiklog = (0, 0, 0.30103, 0.47712, 0.60206, 0.69897, 0.77815, 0.84510, 0.90309, 0.95424, 0.97772 ); # Roughly equivalent to max_nonunderflowing_power1(), calculated a different way. # For scalars, this is slower, but for BigFloats it might be faster? sub max_nonunderflowing_power2 { my ($p) = @_; my $result; my $ap = abs($p); if ($ap <= 1e-100) { $result = 1; } elsif ($ap > 0.99977) { $result = 1000000; } else { $ap = Math::BigFloat->new($ap); # my $e = $p->exponent(); this doesn't work (it's apparently not normalized) my ($m,$e) = $ap->nparts(); my $substr = int($m+0.5); my $tmp = abs($e) - $quiklog[$substr]; # if p=1E-5 then abs($e) is 5 $result = int(100/$tmp); # if p=1E-5 then this is 20 # printf("dbg: m=$m, e=$e, substr=$substr, qlog[$substr]=%f, tmp=$tmp, q=%f, r=$result\n", $quiklog[$substr], 100/$tmp); } return $result; } # *** THIS SUBROUTINE IS NOT CURRENTLY USED *** sub max_nonunderflowing_power { my $p = shift; &timeit_start('max_nonunderflowing_power1'); my $result1 = max_nonunderflowing_power1($p); &timeit_end('max_nonunderflowing_power1'); &timeit_start('max_nonunderflowing_power2'); my $result2 = max_nonunderflowing_power2($p); &timeit_end('max_nonunderflowing_power2'); if ($result1 != $result2) { print "dbg: max_nonunderflowing_power($p), $result1, $result2\n"; } return $result1; } # my $xp = 2.9E-5; # my $ex = 55; # my $maxpower = max_nonunderflowing_power($xp); # printf("p=%.3g, ex=%d, maxpower=%d, p^maxpower=%.4g, p^e=%.4g\n", $xp, $ex, $maxpower, $xp**$maxpower, $xp**$ex ); # $maxpower++; # printf("p=%.3g, ex=%d, maxpower=%d, p^maxpower=%.4g, p^e=%.4g\n", $xp, $ex, $maxpower, $xp**$maxpower, $xp**$ex ); # a=q # Given n, k & p, calculate and return P(X=k) # The "S3" suffix means this is the 3rd scalar / short version. # This is the version we currently use for Quick (normal) precision. Sorry about the potential confusion # confusion between "S" for "scalar" (this one) and "S" for "slow" (which uses calc_binom_probR). # Inputs are assumed to be scalars (m=int, k=int, p=float). # Result is the product of three part: # "n choose k" -- which is at risk of overflow # "p^k" -- which is at risk of underflow # "(1-p)^(n-k)" -- which is also at risk of underflow # No bignums are used. # This version is more robust than simpler versions. It fixes the possible # overflow of "n choose k" and the possible underflows of "p^k" and "(1-p)^(n-k)". # Here's a test case that breaks calc_binom_probS2 but this version fixes: # perl binomcalc.pl -n=2000 -k=890 -p=0.2 # a=q result is 2.48298479667e-135 # (a=s result is 2.482984796673508e-135) # (Berman reports "< 0.000001"; Vassarstats reports "<0.000001"; iCalcu reports "NaN"; wolframalpha reports "2.483e-135") # Here's another test case that breaks calc_binom_probS2 but this version fixes: # perl binomcalc.pl -n=3000 -k=1400 -p=0.45 # a=q result is 0.00272066277 = 2.72066276956e-3 # (a=s result is 0.002720662769557 = 2.720662769556848e-3) # (Berman reports "0.00272066277": Vassarstats reports "Method 2. approximation via normal 0.002702"; iCalcu reports "NaN"; wolframalpha reports ".00272066") sub calc_binom_probS3 { my ($n,$k1,$p) = @_; my $k = $k1; my $probability = 0; my $result = 1; my $cntr = 1; my $tmpmsg; # for debug prints if ($dbg>0) { my $n_dspl = dspl($n); my $k_dspl = dspl($k); &wprint("dbg: calc_binom_probS3( n=$n_dspl, k=$k_dspl, p=$p )\n"); } my $one_minus_p = 1 - $p; my $n_minus_k = $n - $k; # my $maxpower_p = max_nonunderflowing_power($p); my $maxpower_p = max_nonunderflowing_power1($p); my $remaining_k = $k; if ($maxpower_p > $k) { $maxpower_p = $k; # p^k won't underflow anyhow } # my $maxpower_1mp = max_nonunderflowing_power($one_minus_p); my $maxpower_1mp = max_nonunderflowing_power1($one_minus_p); my $remaining_n_minus_k = $n_minus_k; if ($maxpower_1mp > ($n_minus_k)) { $maxpower_1mp = $n_minus_k; # (1-p)^(n-k) won't underflow anyhow } if ($dbg >= 2) { &wprint( "p = $p, k=$k, mpp=$maxpower_p; (1-p)=$one_minus_p, (n-k)=$n_minus_k, mp1mp=$maxpower_1mp\n" ); } # my $tmpa = $p; # my $tmpb = $one_minus_p; # $tmpa = $tmpa ** $k; # $tmpb = $tmpb ** ($n_minus_k); # if ($dbg >= 2) { &wprint( "dbg: a=$tmpa, b=$tmpb\n" ); } # $probability = $tmpa * $nchoosek * $tmpb; # Binomial coefficient, "n choose k" is equivalent to .bnok() but for scalars: # n! / ((k!) * ((n-k)!)) my $z = $n_minus_k; if ($z > $k) { ($z, $k) = ($k, $z); # swap 'em, so that $k >= $z } # At this point we know that $n >= $k >= $z # Note that "k" here is no longer necessarily the same "k" used in the p^k # and (1-p)^(n-k) exponents. (I probably should have used new variable names.) my $mul_by_a_done = 0; my $mul_by_b_done = 0; # The result we need is (n choose k) * p^k * (1-p)^(n-k). # (n choose k) is often very, very large. # p^k and (1-p)^(n-k) are often very, very tiny. # This is our (n choose k) calculation loop, mostly. # # When there's no threat of overflow, the following loop just calculates # (n choose k). But when overflow threatens we get trickier. # # n! is a multiple of k!, so we'll skip those multiplications & divisions. # Additionally, we know that n-k = z, so counting k up to n will take the # same number of loops as counting z down to 1, which means we only need to # test one of the two counters. # # To avoid loss of precision in extreme cases, if the intermediate value begins # to approach the limit of what can be represented in a IEEE 754 double float # (a little over 1e308) then we insert the multiplications by the tiny # exponentiated probabilities in the midst of the calculation of (n choose k). my $x = $k+1; while ($x <= $n) { # print "dbg: x = $x, z = $z\n"; $result *= $x; $x += 1; $result /= $z; $z -= 1; while (($result > 1e270) && (($remaining_k > 0) || ($remaining_n_minus_k > 0))) { if ($remaining_k > 0) { if ($dbg>2) { $tmpmsg = "dbg: early mul by a, X before = $result, a=p^$maxpower_p,"; } $result *= ($p ** $maxpower_p); $remaining_k -= $maxpower_p; if ($remaining_k < $maxpower_p) { $maxpower_p = $remaining_k; } if ($dbg>2) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); } } if ($result > 1e200) { if ($remaining_n_minus_k > 0) { if ($dbg>2) { $tmpmsg = "dbg: early mul by b, X before = $result, (1-p)^$maxpower_1mp,"; } $result *= ($one_minus_p ** $maxpower_1mp); $remaining_n_minus_k -= $maxpower_1mp; if ($remaining_n_minus_k < $maxpower_1mp) { $maxpower_1mp = $remaining_n_minus_k; } if ($dbg>2) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); } } } } $cntr++; } while ($remaining_k > 0) { if ($dbg>2) { $tmpmsg = "dbg[1]: at end, X before = $result, a=p^$remaining_k,"; } $result *= ($p ** $maxpower_p); $remaining_k -= $maxpower_p; if ($remaining_k < $maxpower_p) { $maxpower_p = $remaining_k; } if ($dbg>2) { &wprint( "$tmpmsg X after = $result\n" ); } } while ($remaining_n_minus_k > 0) { if ($dbg>2) { $tmpmsg = "dbg[2]: at end, X before = $result, (1-p)^$remaining_n_minus_k,"; } $result *= ($one_minus_p ** $maxpower_1mp); $remaining_n_minus_k -= $maxpower_1mp; if ($remaining_n_minus_k < $maxpower_1mp) { $maxpower_1mp = $remaining_n_minus_k; } if ($dbg>2) { &wprint( "$tmpmsg X after = $result\n" ); } } $probability = $result; if ($dbg >= 2) { &wprint( "The probability of $n choose $k (w/ p=$p) is $probability ($cntr loops)\n" ); } return $probability; } # a=xx # Given n, k & p, calculate and return P(X=k) # The "F" suffix means this is the BigFloat (slow) version. # n & k should be scalar integers, p can be a BigFloat or a scalar float, 0..1. # Result ($probability) will be a BigFloat. # Result is the product of three parts: # "n choose k" -- which is at risk of overflow # "p^k" -- which is at risk of underflow # "(1-p)^(n-k)" -- which is also at risk of underflow # Here's a test case that breaks calc_binom_probS2 but this version fixes: # perl binomcalc.pl -n=2000 -k=890 -p=0.2 # a=s result is 2.482984796673508e-135 # (a=q result is 2.48298479667e-135; Berman reports "< 0.000001"; Vassarstats reports "<0.000001"; iCalcu reports "NaN"; wolframalpha reports "2.483e-135") # Here's another test case that breaks calc_binom_probS2 but this version fixes: # perl binomcalc.pl -n=3000 -k=1400 -p=0.45 # a=s result is 0.002720662769557 = 2.720662769556848e-3 # (a=q result is 0.00272066277 = 2.72066276956e-3; Berman reports "0.00272066277": Vassarstats reports "Method 2. approximation via normal 0.002702"; iCalcu reports "NaN"; wolframalpha reports ".00272066") sub calc_binom_probF3 { my ($n,$k1,$p1) = @_; my $k = $k1; my $p = Math::BigFloat->new($p1); my $scalar_p = $p->numify(); my $probability = Math::BigFloat->new(0); my $result = Math::BigFloat->new(1); my $cntr = 1; my $tmpmsg; # for debug prints if ($dbg>0) { my $tst1 = 1.2345E-20; my $tst2 = Math::BigFloat->new($tst1); my $tst3 = Math::BigFloat->new($tst2); &wprintf("dbg: tst1=%s, tst2=%s, tst3=%s\n", dspl($tst1), dspl($tst2), dspl($tst3) ); my $n_dspl = dspl($n); my $k_dspl = dspl($k); my $p_dspl = dspl($p); my $p1_dspl = dspl($p1); if ($p1_dspl ne $p_dspl) { $p_dspl .= "=$p1_dspl"; } &wprint("dbg: calc_binom_probF3( n=$n_dspl, k=$k_dspl, p=$p_dspl )\n"); } my $one_minus_p = 1 - $p; my $n_minus_k = $n - $k; my $maxpower_p = max_nonunderflowing_power2($p); my $remaining_k = $k; if ($maxpower_p > $k) { $maxpower_p = $k; # p^k won't underflow anyhow } my $maxpower_1mp = max_nonunderflowing_power2($one_minus_p); my $remaining_n_minus_k = $n_minus_k; if ($maxpower_1mp > ($n_minus_k)) { $maxpower_1mp = $n_minus_k; # (1-p)^(n-k) won't underflow anyhow } if ($dbg >= 2) { &wprint( "p = $p, k=$k, mpp=$maxpower_p; (1-p)=$one_minus_p, (n-k)=$n_minus_k, mp1mp=$maxpower_1mp\n" ); } # my $tmpa = $p; # my $tmpb = $one_minus_p; # $tmpa = $tmpa ** $k; # $tmpb = $tmpb ** ($n_minus_k); # if ($dbg >= 2) { &wprint( "dbg: a=$tmpa, b=$tmpb\n" ); } # $probability = $tmpa * $nchoosek * $tmpb; # Binomial coefficient, "n choose k" is equivalent to .bnok() but for scalars: # n! / ((k!) * ((n-k)!)) my $z = $n_minus_k; if ($z > $k) { ($z, $k) = ($k, $z); # swap 'em, so that $k >= $z } # At this point we know that $n >= $k >= $z # Note that "k" here is no longer necessarily the same "k" used in the p^k # and (1-p)^(n-k) exponents. (I probably should have used new variable names.) my $mul_by_a_done = 0; my $mul_by_b_done = 0; # The result we need is (n choose k) * p^k * (1-p)^(n-k). # (n choose k) is often very, very large. # p^k and (1-p)^(n-k) are often very, very tiny. # This is our (n choose k) calculation loop, mostly. # # When there's no threat of overflow, the following loop just calculates # (n choose k). But when overflow threatens we get trickier. # # n! is a multiple of k!, so we'll skip those multiplications & divisions. # Additionally, we know that n-k = z, so counting k up to n will take the # same number of loops as counting z down to 1, which means we only need to # test one of the two counters. # # To avoid loss of precision in extreme cases, if the intermediate value begins # to approach the limit of what can be represented in a IEEE 754 double float # (a little over 1e308) then we insert the multiplications by the tiny # exponentiated probabilities in the midst of the calculation of (n choose k). my $x = $k+1; while ($x <= $n) { # print "dbg: x = $x, z = $z\n"; $result *= $x; $x += 1; $result /= $z; $z -= 1; while (($result > 1e270) && (($remaining_k > 0) || ($remaining_n_minus_k > 0))) { if ($remaining_k > 0) { if ($dbg>0) { $tmpmsg = "dbg: early mul by a, X before = $result, a=p^$maxpower_p,"; } $result *= ($p ** $maxpower_p); $remaining_k -= $maxpower_p; if ($remaining_k < $maxpower_p) { $maxpower_p = $remaining_k; } if ($dbg>0) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); } } if ($result > 1e200) { if ($remaining_n_minus_k > 0) { if ($dbg>0) { $tmpmsg = "dbg: early mul by b, X before = $result, (1-p)^$maxpower_1mp,"; } $result *= ($one_minus_p ** $maxpower_1mp); $remaining_n_minus_k -= $maxpower_1mp; if ($remaining_n_minus_k < $maxpower_1mp) { $maxpower_1mp = $remaining_n_minus_k; } if ($dbg>0) { &wprint( "$tmpmsg X after = $result (loop cntr=$cntr)\n" ); } } } } $cntr++; } while ($remaining_k > 0) { if ($dbg>0) { $tmpmsg = "dbg[1]: at end, X before = $result, a=p^$remaining_k,"; } $result *= ($p ** $maxpower_p); $remaining_k -= $maxpower_p; if ($remaining_k < $maxpower_p) { $maxpower_p = $remaining_k; } if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); } } while ($remaining_n_minus_k > 0) { if ($dbg>0) { $tmpmsg = "dbg[2]: at end, X before = $result, (1-p)^$remaining_n_minus_k,"; } $result *= ($one_minus_p ** $maxpower_1mp); $remaining_n_minus_k -= $maxpower_1mp; if ($remaining_n_minus_k < $maxpower_1mp) { $maxpower_1mp = $remaining_n_minus_k; } if ($dbg>0) { &wprint( "$tmpmsg X after = $result\n" ); } } $probability = $result; if ($dbg >= 2) { &wprint( "The probability of $n choose $k (w/ p=$p) is $probability ($cntr loops)\n" ); } # $probability = $probability->numify(); return $probability; } if ($dbg>0) { &wprint( "Parameters passed to $scriptname = '$parameters'\n" ); &wprint( "dbg=$dbg\n" ); } # For a='q' and a='h' we use the scalar version (the default) our $calc_binom_prob = \&calc_binom_probS3; # calc_binom_probS3 is more robust than calc_binom_probS or calc_binom_probS2 if ('s' eq $p_a) { # # GMP BigRat version is surprisingly fast! # Before I switched to GMP, I used the BigFloat version calc_binom_probF here. $calc_binom_prob = \&calc_binom_probR; } elsif ('x' eq $p_a) { # BigFloat eXperimental version. Slow. $calc_binom_prob = \&calc_binom_probF; } elsif ('xx' eq $p_a) { # another eXperimental version. Slow. $calc_binom_prob = \&calc_binom_probF3; } our $k0 = $p_k + 0; # scalar # $k = Math::BigInt->new($p_k); # BigInt # $k0 = $k->numify(); # (unnecessary, since I don't "use bignum") if ($dbg>0) {&wprint( "p_k='$p_k', k0=" . dspl($k0) . "\n" );} our $n0 = $p_n + 0; # scalar # $n = Math::BigInt->new($p_n); # BigInt # $n0 = $n->numify(); if ($dbg>0) {&wprint( "p_n='$p_n', n0=" . dspl($n0) . "\n" );} # &wprint("dbg: about to evaluate p_p...\n"); our( $p0, $p1, $p2 ); # scalar, BigRat, BigFloat, respectively if ($p_p =~ /\//) { # &wprint("dbg: p_p is rational...\n"); $p1 = Math::BigRat->new($p_p); $p2 = Math::BigFloat->new($p1->numerator()) / $p1->denominator(); $p0 = $p2->numify(); # &wprint("dbg: evaluated rational, p0=$p0, p1=" . dspl($p1) . ", p2=" . dspl($p2) . "\n"); } else { # &wprint("dbg: p_p is float '$p_p' ...\n"); # if ($p_p =~ /[eE]/) { &wprint("dbg: exponential format p_p='$p_p'\n"); } $p2 = Math::BigFloat->new($p_p); # if ($p_p =~ /[eE]/) { &wprint("dbg: exponential format p_p=p2=" . dspl($p2) . "\n");} $p1 = $p2; $p0 = 0.0 + $p_p; # scalar float } if ('s' eq $p_a) { $p = $p1; # BigRat } elsif (('x' eq $p_a) || ('xx' eq $p_a)) { $p = $p2; # BigFloat } elsif (('q' eq $p_a) || ('h' eq $p_a)) { $p = $p0; } if ($dbg>0) {&wprint("dbg: p_p='$p_p', p=" . dspl($p) . "\n");} if ($dbg>0) { &wprint( "n=$n0, k=$k0, p=$p\n"); } if ($n0) { if ($k0 > $n0) { &givehelp("** ERROR: parameter k=$k0 to $scriptname cannot be greater than n=$n0"); } if ($p0 > 1) { &givehelp("** ERROR: parameter p=$p0 to $scriptname must be between 0 and 1"); } } # m=co calculate & display binomial coefficient, only sub do_m_co { my $nchoosek; if ('q' eq $p_a) { $nchoosek = &n_choose_k($n0, $k0); } else { $nchoosek = Math::BigInt->new( $p_n ); $nchoosek = $nchoosek -> bnok( $p_k ); $nchoosek = "$nchoosek"; } if ('inf' eq $nchoosek) { $nchoosek = 'overflow'; if ($cgi_context) { print "<p>n=$n0, k=$k0, (n choose k) = <b style='color:red'><i>overflow</i></b>"; if ('q' eq $p_a) { print "<br>\nTry again, in 'Slow' mode.</p>\n"; } else { print "</p>\n"; } } else { print "n=$n0, k=$k0, (n choose k) = $nchoosek\n"; if ('q' eq $p_a) { print "Try again, in 'Slow' mode.\n"; } } } else { $nchoosek = commafy($nchoosek); &wprint( "n=$n0, k=$k0, (n choose k) = $nchoosek" ); } }#do_m_co # $probability is calculated probability of x=k, $dd is the number of digits of precision to display, # the arrays are for calculating cumulative probabilities our ( $probability, $dd, @saved_probs, @cummu_0_to_i ); # we no longer use @cummu_i_to_k # m=b calculate & display binomial probability of x=k, leaving the result in $probability as a side-effect sub do_m_b { if ($dbg>0) { &wprint( "dbg: sub do_m_b(), dd=$dd\n" ); } if ($p_p =~ /\//) { &wprint( "Probability of success on a single trial (p): " . $p1->bstr() . "\n" ); } else { &wprint( "Probability of success on a single trial (p): $p0\n" ); } &wprint( "Number of trials (n): $n0\n" ); &wprint( "Number of successes (k): $k0\n" ); # In "quick" and "hybrid" (scalar) modes, we'll never use the BigRat and BigFloat versions of p if (('q' eq $p_a) || ('h' eq $p_a)) { $p1 = undef; $p2 = undef; } # First calculate the binomial probability P(X=k) &timeit_start('calc_bprob'); $probability = &$calc_binom_prob( $n0, $k0, $p ); &timeit_end('calc_bprob'); my $fp_prob = &display_prob($probability,2,$dd); &wprint( "Binomial probability P(X = k): $fp_prob\n" ); }#do_m_b # m=cu calculate and display probability of x=k, and cumulative probability of x <= k sub do_m_cu { # Sum the binomial probabilities for lower values of k (down to zero) my $cummu_0_to_im1 = 0; my $max_addend = 0; # we keep track of the maximum addend for estimating the true number of significant digits if ($p_p =~ /\//) { &wprint( "Probability of success on a single trial (p): " . $p1->bstr() . "\n" ); } else { &wprint( "Probability of success on a single trial (p): $p0\n" ); } &wprint( "Number of trials (n): $n0\n" ); &wprint( "Number of successes (k): $k0\n" ); # In "quick" and "hybrid" (scalar) modes, we'll never use the BigRat and BigFloat versions of p if (('q' eq $p_a) || ('h' eq $p_a)) { $p1 = undef; $p2 = undef; } # First calculate the binomial probability P(X=k) &timeit_start('calc_bprob'); $probability = &$calc_binom_prob( $n0, $k0, $p ); &timeit_end('calc_bprob'); my $fp_prob = &display_prob($probability, 2, $dd, undef, ($p > 0) && ($p < 1)); if ($cgi_context) { print( "<p>Probabilities:<br>\n" . " P( X = k ): $fp_prob<br>\n" ); } else { &wprint( "Probabilities:\n" . " P( X = k ): $fp_prob\n" ); } if ($k0 > 0) { for (my $i0=0; $i0 < $k0; $i0++) { &timeit_start('cumu_sum'); my $probability_tmp = &$calc_binom_prob( $n0, $i0, $p ); if ('h' eq $p_a) { # "hybrid" mode: convert from 64-bit floating point to BigFloat for summing $probability_tmp = Math::BigFloat->new($probability_tmp); } $cummu_0_to_im1 += $probability_tmp; if ($probability_tmp > $max_addend) { $max_addend = $probability_tmp; } &timeit_end('cumu_sum'); } } # Then display the cumulative probabilities my $cprob1; &timeit_start('cumu_sum'); if ($n0 == $k0) { $cprob1 = 1; } else { my $probability_tmp = $probability; if ('h' eq $p_a) { # in "hybrid" mode we calculate probabilities in 64-bit floating point, but sum them as BigFloats $probability_tmp = Math::BigFloat->new($probability_tmp); } $cprob1 = $cummu_0_to_im1 + $probability_tmp; if ($probability_tmp > $max_addend) { $max_addend = $probability_tmp; } } my $cprob2 = 1.0 - $cprob1; my $cprob3 = 1.0 - $cummu_0_to_im1; &timeit_end('cumu_sum'); if ($dbg>1) { print "dbg: fp_cprob0 -- p='$p', k0='$k0'\n"; } my $fp_cprob0 = &display_prob( $cummu_0_to_im1, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 > 0) ); if ($dbg>1) { print "dbg: fp_cprob1 -- p='$p', k0='$k0', n0='$n0'\n"; } my $fp_cprob1 = &display_prob( $cprob1, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 < $n0) ); if ($dbg>1) { print "dbg: fp_cprob2 -- p='$p', k0='$k0', n0='$n0'\n"; } my $fp_cprob2 = &display_prob( $cprob2, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 < $n0) ); if ($dbg>1) { print "dbg: fp_cprob3 -- p='$p', k0='$k0'\n"; } my $fp_cprob3 = &display_prob( $cprob3, 2, $dd, $max_addend, ($p > 0) && ($p < 1) && ($k0 > 0) ); if ($cgi_context) { print( " P( X < k ): $fp_cprob0<br>\n" . " P( X ≤ k ): $fp_cprob1<br>\n" . " P( X > k ): $fp_cprob2<br>\n" . " P( X ≥ k ): $fp_cprob3</p>\n" ); } else { &wprint( " P( X < k ): $fp_cprob0\n" . " P( X <= k ): $fp_cprob1\n" . " P( X > k ): $fp_cprob2\n" . " P( X >= k ): $fp_cprob3\n" ); } }#do_m_cu # m=acu calculate and display probability of x=k, and cumulative probabilities of x in 0..k sub do_m_acu { my $max_addend = 0; # we keep track of the maximum addend for estimating the true number of significant digits # Calculate all the binomial probabilities for lower values of k (down to zero) &wprint( "Cumulative probability:\n" ); if ($dbg>0) { &wprint( "dbg: sub do_m_acu(), dd=$dd\n" ); } @saved_probs = (); # we already calculated the binomial probability for x=k, so save it: if ('h' eq $p_a) { # "hybrid" mode: convert from 64-bit floating point to BigFloat for summing $saved_probs[$k0] = Math::BigFloat->new($probability); } else { $saved_probs[$k0] = $probability; } @cummu_0_to_i = (); # cumulative probabilities # Then display the cumulative probabilities if ($cgi_context) { $in_midst_of_generating_acu_table = 1; print "<table class='acu_table' style='width:" . (20+(2*$dd)) . "em'>\n"; print "<tr class='acu_tr1'><th class='acu_th1'>k</th><th class='acu_th2'>P(X=k)</th> " . "<th class='acu_th3'><i>range</i></th> " . "<th class='acu_th4'>P(X in <i>range</i>)</th> <th class='acu_th5'>1 - P(X in <i>range</i>)</th></tr>\n"; for (my $i0=0; $i0 <= $k0; $i0++) { my $prob; if ($i0 < $k0) { &timeit_start('calc_bprob'); $prob = &$calc_binom_prob( $n0, $i0, $p ); if ('h' eq $p_a) { $prob = Math::BigFloat->new($prob); } $saved_probs[$i0] = $prob; &timeit_end('calc_bprob'); } else { $prob = $saved_probs[$i0]; } my $cummu_to_i; if (0 == $i0) { $cummu_to_i = $prob; } else { &timeit_start('cumu_sum'); $cummu_to_i = ($cummu_0_to_i[$i0-1] + $prob); &timeit_end('cumu_sum'); } $cummu_0_to_i[$i0] = $cummu_to_i; if ($prob > $max_addend) { $max_addend = $prob; } &timeit_start('cumu_sum'); my $one_minus_cummu_to_i = 1.0 - $cummu_to_i; &timeit_end('cumu_sum'); # my $cummu_from_i = $cummu_i_to_k[$i0]; # &timeit_start('cumu_sum'); # my $one_minus_cummu_from_i = 1.0 - $cummu_from_i; # &timeit_end('cumu_sum'); my $fp_prob = &display_prob( $prob, 1, $dd, undef, ($p > 0) && ($p < 1) ); # sprintf("%#.15g", $prob); my $fp_cummu_to_i = &display_prob( $cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) ); # sprintf("%#.15g", $cummu_to_i); my $fp_one_minus_cummu_to_i = &display_prob( $one_minus_cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) ); # sprintf("%#.15g", $one_minus_cummu_to_i); # my $fp_cummu_from_i = &display_prob($cummu_from_i,1,$dd); # sprintf("%#.15g", $cummu_from_i); # my $fp_one_minus_cummu_from_i = &display_prob($one_minus_cummu_from_i,1,$dd); # sprintf("%#.15g", $one_minus_cummu_from_i); print "<tr class='acu_tr2'><td class='acu_td1'>$i0</td> <td class='acu_td2'>$fp_prob</td> " . "<td class='acu_td3'>0..$i0</td> " . "<td class='acu_td4'>$fp_cummu_to_i</td> <td class='acu_td5'>$fp_one_minus_cummu_to_i</td></tr>\n"; } print "</table>\n"; $in_midst_of_generating_acu_table = 0; } else { print "|" . center('k',8) . "|" . center('P(X=k)',7+$dd) . "|" . center('range',11) . "|" . center('P(X in range)',7+$dd) . "|" . center('1 - P(X in range)',7+$dd) . "|\n"; for (my $i0=0; $i0 <= $k0; $i0++) { my $prob; if ($i0 < $k0) { &timeit_start('calc_bprob'); $prob = &$calc_binom_prob( $n0, $i0, $p ); if ('h' eq $p_a) { $prob = Math::BigFloat->new($prob); } $saved_probs[$i0] = $prob; &timeit_end('calc_bprob'); } else { $prob = $saved_probs[$i0]; } my $cummu_to_i; if (0 == $i0) { $cummu_to_i = $prob; } else { &timeit_start('cumu_sum'); $cummu_to_i = ($cummu_0_to_i[$i0-1] + $prob); &timeit_end('cumu_sum'); } $cummu_0_to_i[$i0] = $cummu_to_i; if ($prob > $max_addend) { $max_addend = $prob; } &timeit_start('cumu_sum'); my $one_minus_cummu_to_i = 1.0 - $cummu_to_i; &timeit_end('cumu_sum'); # my $cummu_from_i = $cummu_i_to_k[$i0]; # &timeit_start('cumu_sum'); # my $one_minus_cummu_from_i = 1.0 - $cummu_from_i; # &timeit_end('cumu_sum'); if ($dbg>1) { print "dbg: fp_prob -- p='$p', k0='$k0', n0='$n0'\n"; } my $fp_prob = &display_prob( $prob, 1, $dd, undef, ($p > 0) && ($p < 1) ); # sprintf("%#.15g", $prob); if ($dbg>1) { print "dbg: fp_cummu_to_i -- p='$p', i0='$i0', n0='$n0'\n"; } my $fp_cummu_to_i = &display_prob( $cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) ); # sprintf("%#.15g", $cummu_to_i); if ($dbg>1) { printf( "dbg: fp_one_minus_cummu_to_i -- p='$p', i0='$i0', n0='$n0' %d %d %d %d \n", ($p > 0), ($p < 1), ($i0 < $n0), (($p > 0) && ($p < 1) && ($i0 < $n0)) ); } my $fp_one_minus_cummu_to_i = &display_prob( $one_minus_cummu_to_i, 1, $dd, $max_addend, ($p > 0) && ($p < 1) && ($i0 < $n0) ); # sprintf("%#.15g", $one_minus_cummu_to_i); # my $fp_cummu_from_i = &display_prob($cummu_from_i,1,$dd); # sprintf("%#.15g", $cummu_from_i); # my $fp_one_minus_cummu_from_i = &display_prob($one_minus_cummu_from_i,1,$dd); # sprintf("%#.15g", $one_minus_cummu_from_i); print "|" . center($i0,8) . "|" . center($fp_prob,7+$dd) . "|" . center("0..$i0",11) . "|" . center($fp_cummu_to_i,7+$dd) . "|" . center($fp_one_minus_cummu_to_i,7+$dd) . "|\n"; } } }#do_m_acu sub main { if (('q' eq $p_a) || ('h' eq $p_a)) { $dd = 11; # 11 digits is conservative for IEEE 754 64-bit floating point, but if we set it to 12 then the last digit is often off by one } else { $dd = 16; # no. of display digits = 16 for extreme precision settings } if ($dd > ($accuracy - 2)) { $dd = $accuracy - 2; # we can set d= even to a smaller number than the default a=q accuracy (that's really only useful for testing purposes) if ($dd < 2) { $dd = 2; } } if (!$n0) { &wprint( "(Nothing to do.)\n" ); } elsif ('co' eq $p_m) { &do_m_co(); } elsif ('b' eq $p_m) { &do_m_b(); } elsif ('cu' eq $p_m) { &do_m_cu(); } elsif ('acu' eq $p_m) { &do_m_b(); &do_m_acu(); } }#main sub timed_out { if ($in_midst_of_generating_acu_table) { print "\n</table>\n"; } print "\n\n<p><b style='color:red'>ERROR: Server timed out.</b><br>\n" . "The calculation was too computationally intensive to run on my server. But if you " . '<a href="https://sealevel.info/binomcalc.sphp#download">download binomcalc.pl</a> ' . "and run it on your own computer, you can let it run for as long as necessary.</p>\n"; exit 1; } &timeit_start('main'); if ($p_t) { eval { local $SIG{ALRM} = 'timed_out'; alarm 0+$p_t; # start timout timer &main(); alarm 0; # cancel timeout timer } } else { &main(); } &timeit_end('main'); # if b=... unspecified, then default to b=0 if runtime was less than 1 seconds, or b=1 otherwise if (!defined $p_b) { #if ($runtimes{'main'} >= 1.0) { # $p_b = '1'; #} else { $p_b = '0'; # default is b=0 #} } if ('0' ne $p_b) { # benchmark = 1 &timeit_display; } &wprint("\n"); __END__