gstlal  1.4.1
__init__.py
Go to the documentation of this file.
1 # Copyright (C) 2011--2014 Kipp Cannon, Chad Hanna, Drew Keppel
2 # Copyright (C) 2013 Jacob Peoples
3 #
4 # This program is free software; you can redistribute it and/or modify it
5 # under the terms of the GNU General Public License as published by the
6 # Free Software Foundation; either version 2 of the License, or (at your
7 # option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
12 # Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License along
15 # with this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 
18 
32 
33 # - Address the fixed SNR PDF using median PSD which could be pre-computed and stored on disk. (Store database of SNR pdfs for a variety of horizon)
34 # - The binning parameters are hard-coded too; Could it be a problem?
35 # - Chisquare binning hasn't been tuned to be a good representation of the PDFs; could be improved in future
36 
37 
38 
39 
40 #
41 # =============================================================================
42 #
43 # Preamble
44 #
45 # =============================================================================
46 #
47 
48 
49 try:
50  from fpconst import NaN, NegInf, PosInf
51 except ImportError:
52  # fpconst is not part of the standard library and might not be
53  # available
54  NaN = float("nan")
55  NegInf = float("-inf")
56  PosInf = float("+inf")
57 import itertools
58 import math
59 import numpy
60 from scipy.special import ive
61 
62 
63 #
64 # ============================================================================
65 #
66 # Non-central chisquared pdf
67 #
68 # ============================================================================
69 #
70 
71 #
72 # FIXME this is to work around a precision issue in scipy
73 # See: https://github.com/scipy/scipy/issues/1608
74 #
75 
76 def logiv(v, z):
77  # from Abramowitz and Stegun (9.7.1), if mu = 4 v^2, then for large
78  # z:
79  #
80  # Iv(z) ~= exp(z) / (\sqrt(2 pi z)) { 1 - (mu - 1)/(8 z) + (mu - 1)(mu - 9) / (2! (8 z)^2) - (mu - 1)(mu - 9)(mu - 25) / (3! (8 z)^3) ... }
81  # Iv(z) ~= exp(z) / (\sqrt(2 pi z)) { 1 + (mu - 1)/(8 z) [-1 + (mu - 9) / (2 (8 z)) [1 - (mu - 25) / (3 (8 z)) ... ]]}
82  # log Iv(z) ~= z - .5 log(2 pi) log z + log1p((mu - 1)/(8 z) (-1 + (mu - 9)/(16 z) (1 - (mu - 25)/(24 z) ... )))
83 
84  with numpy.errstate(divide = "ignore"):
85  a = numpy.log(ive(v,z))
86 
87  # because this result will only be used for large z, to silence
88  # divide-by-0 complaints from inside log1p() when z is small we
89  # clip z to 1.
90  mu = 4. * v**2.
91  with numpy.errstate(divide = "ignore", invalid = "ignore"):
92  b = -math.log(2. * math.pi) / 2. * numpy.log(z)
93  z = numpy.clip(z, 1., PosInf)
94  b += numpy.log1p((mu - 1.) / (8. * z) * (-1. + (mu - 9.) / (16. * z) * (1. - (mu - 25.) / (24. * z))))
95 
96  return z + numpy.where(z < 1e8, a, b)
97 
98 # See: http://en.wikipedia.org/wiki/Noncentral_chi-squared_distribution
99 def ncx2logpdf(x, k, l):
100  return -math.log(2.) - (x+l)/2. + (k/4.-.5) * (numpy.log(x) - numpy.log(l)) + logiv(k/2.-1., numpy.sqrt(l) * numpy.sqrt(x))
101 
102 def ncx2pdf(x, k, l):
103  return numpy.exp(ncx2logpdf(x, k, l))
104 
105 
106 #
107 # =============================================================================
108 #
109 # Poisson Stuff
110 #
111 # =============================================================================
112 #
113 
114 
115 def assert_probability(f):
116  def g(*args, **kwargs):
117  p = f(*args, **kwargs)
118  if isinstance(p, numpy.ndarray):
119  assert ((0. <= p) & (p <= 1.)).all()
120  else:
121  assert 0. <= p <= 1.
122  return p
123  return g
124 
125 
126 def assert_ln_probability(f):
127  def g(*args, **kwargs):
128  p = f(*args, **kwargs)
129  if isinstance(p, numpy.ndarray):
130  assert (p <= 0.).all()
131  else:
132  assert p <= 0.
133  return p
134  return g
135 
136 
137 @assert_probability
138 @numpy.vectorize
139 def poisson_p_not_0(l):
140  """
141  Return the probability that a Poisson process with a mean rate of l
142  yields a non-zero count. = 1 - exp(-l), but computed in a way that
143  avoids loss of precision and over-/underflows.
144 
145  Example:
146 
147  >>> print(poisson_p_not_0(1e+60))
148  1.0
149  >>> print(poisson_p_not_0(1)
150  0.632120558829
151  >>> print(poisson_p_not_0(1e-60))
152  1e-60
153  >>> print(poisson_p_not_0(0))
154  0.0
155  """
156  assert l >= 0.
157 
158  # need -l everywhere. also make sure we haven't been passed an
159  # integer as that will mess up the arithmetic in the Taylor
160  # expansion below
161 
162  l = -float(l)
163 
164  #
165  # result is closer to 1 than to 0. use direct evaluation.
166  #
167 
168  if l < -0.69314718055994529:
169  return 1. - math.exp(l)
170 
171  #
172  # result is closer to 0 than to 1. use Taylor expansion for exp()
173  # with leading term removed to avoid having to subtract from 1 to
174  # get answer.
175  #
176  # 1 - exp x = -(x + x^2/2! + x^3/3! + ...)
177  #
178 
179  s = [l]
180  term = l
181  threshold = -1e-20 * l
182  for n in itertools.count(2):
183  term *= l / n
184  s.append(term)
185  if abs(term) <= threshold:
186  # smallest term was added last, want to compute sum
187  # from smallest to largest
188  s.reverse()
189  s = -sum(s)
190  # if the sum was identically 0 then we've ended up
191  # with -0.0 which we add a special case for to make
192  # positive.
193  assert s >= 0.
194  return s if s else 0.
195 
196 
197 @assert_probability
198 def poisson_p_0(l):
199  """
200  Return the probability that a Poisson process with a mean rate of l
201  yields a zero count. = exp(-l), but with a sanity check that l is
202  non-negative.
203  """
204  assert l >= 0.
205  return numpy.exp(-l)
206 
207 
208 @assert_ln_probability
209 def poisson_ln_p_0(l):
210  """
211  Return the natural logarithm of the probability that a Poisson
212  process with a mean rate of l yields a zero count. = -l, but with
213  a sanity check that l is non-negative.
214  """
215  assert l >= 0.
216  return -l
217 
218 
219 #
220 # =============================================================================
221 #
222 # Binomial Stuff
223 #
224 # =============================================================================
225 #
226 
227 
228 @assert_probability
229 @numpy.vectorize
230 def fap_after_trials(p, m):
231  """
232  Given the probability, p, that an event occurs, compute the
233  probability of at least one such event occuring after m independent
234  trials.
235 
236  The return value is 1 - (1 - p)^m computed avoiding round-off
237  errors and underflows. m cannot be negative but need not be an
238  integer.
239 
240  Example:
241 
242  >>> fap_after_trials(0.5, 1)
243  0.5
244  >>> fap_after_trials(0.066967008463192584, 10)
245  0.5
246  >>> fap_after_trials(0.0069075045629640984, 100)
247  0.5
248  >>> fap_after_trials(0.00069290700954747807, 1000)
249  0.5
250  >>> fap_after_trials(0.000069312315846428086, 10000)
251  0.5
252  >>> fap_after_trials(.000000006931471781576803, 100000000)
253  0.5
254  >>> fap_after_trials(0.000000000000000069314718055994534, 10000000000000000)
255  0.5
256  >>> "%.15g" % fap_after_trials(0.1, 21.854345326782834)
257  '0.9'
258  >>> "%.15g" % fap_after_trials(1e-17, 2.3025850929940458e+17)
259  '0.9'
260  >>> fap_after_trials(0.1, .2)
261  0.020851637639023216
262  """
263  # when m*p is not >> 1, we can use an expansion in powers of m*p
264  # that allows us to avoid having to compute differences of
265  # probabilities (which would lead to loss of precision when working
266  # with probabilities close to 0 or 1)
267  #
268  # 1 - (1 - p)^m = m p - (m^2 - m) p^2 / 2 +
269  # (m^3 - 3 m^2 + 2 m) p^3 / 6 -
270  # (m^4 - 6 m^3 + 11 m^2 - 6 m) p^4 / 24 + ...
271  #
272  # the coefficient of each power of p is a polynomial in m. if m <<
273  # 1, these polynomials can be approximated by their leading term in
274  # m
275  #
276  # 1 - (1 - p)^m ~= m p + m p^2 / 2 + m p^3 / 3 + m p^4 / 4 + ...
277  # = -m * log(1 - p)
278  #
279  # NOTE: the ratio of the coefficients of higher-order terms in the
280  # polynomials in m to that of the leading-order term grows with
281  # successive powers of p and eventually the higher-order terms will
282  # dominate. we assume that this does not happen until the power of
283  # p is sufficiently high that the entire term (polynomial in m and
284  # all) is negligable and the series has been terminated.
285  #
286  # if m is not << 1, then returning to the full expansion, starting
287  # at 0, the nth term in the series is
288  #
289  # -1^n * (m - 0) * (m - 1) * ... * (m - n) * p^(n + 1) / (n + 1)!
290  #
291  # if the (n-1)th term is X, the nth term in the series is
292  #
293  # X * (n - m) * p / (n + 1)
294  #
295  # this recursion relation allows us to compute successive terms
296  # without explicit evaluation of the full numerator and denominator
297  # expressions (each of which quickly overflow).
298  #
299  # for sufficiently large n the denominator dominates and the terms
300  # in the series become small and the sum eventually converges to a
301  # stable value (and if m is an integer the sum is exact in a finite
302  # number of terms). however, if m*p >> 1 it can take many terms
303  # before the series sum stabilizes, terms in the series initially
304  # grow large and alternate in sign and an accurate result can only
305  # be obtained through careful cancellation of the large values.
306  #
307  # for large m*p we write the expression as
308  #
309  # 1 - (1 - p)^m = 1 - exp(m log(1 - p))
310  #
311  # if p is small, log(1 - p) suffers from loss of precision but the
312  # Taylor expansion of log(1 - p) converges quickly
313  #
314  # m ln(1 - p) = -m p - m p^2 / 2 - m p^3 / 3 - ...
315  # = -m p * (1 + p / 2 + p^2 / 3 + ...)
316  #
317  # math libraries (including Python's) generally provide log1p(),
318  # which evalutes log(1 + p) accurately for small p. we rely on
319  # this function to provide results valid both in the small-p and
320  # not small-p regimes.
321  #
322  # if p = 1, log1p() complains about an overflow error. we trap
323  # these and return the hard-coded answer
324  #
325 
326  if m <= 0.:
327  raise ValueError("m = %g must be positive" % m)
328  if not (0. <= p <= 1.):
329  raise ValueError("p = %g must be between 0 and 1 inclusively" % p)
330 
331  if m * p < 4.:
332  #
333  # expansion of 1 - (1 - p)^m in powers of p
334  #
335 
336  if m < 1e-8:
337  #
338  # small m approximation
339  #
340 
341  try:
342  return -m * math.log1p(-p)
343  except OverflowError:
344  #
345  # p is too close to 1. result is 1
346  #
347 
348  return 1.
349 
350  #
351  # general m
352  #
353 
354  s = []
355  term = -1.
356  for n in itertools.count():
357  term *= (n - m) * p / (n + 1.)
358  s.append(term)
359  # 0th term is always positive
360  if abs(term) <= 1e-18 * s[0]:
361  s.reverse()
362  return sum(s)
363 
364  #
365  # compute result as 1 - exp(m * log(1 - p)). use poisson_p_not_0()
366  # to evaluate 1 - exp() with an algorithm that avoids loss of
367  # precision.
368  #
369 
370  try:
371  x = m * math.log1p(-p)
372  except OverflowError:
373  #
374  # p is very close to 1. we know p <= 1 because it's a
375  # probability, and we know that m*p >= 4 otherwise we
376  # wouldn't have followed this code path, and so because p
377  # is close to 1 and m is not small we can safely assume the
378  # anwer is 1.
379  #
380 
381  return 1.
382 
383  return float(poisson_p_not_0(-x))
384 
385 
386 def trials_from_faps(p0, p1):
387  """
388  Given the probabiity, p0, of an event occuring, and the
389  probability, p1, of at least one such event being observed after
390  some number of independent trials, solve for and return the number
391  of trials, m, that relates the two probabilities. The three
392  quantities are related by p1 = 1 - (1 - p0)^m. Generally the
393  return value is not an integer.
394 
395  See also fap_after_trials(). Note that if p0 is 0 or 1 then p1
396  must be 0 or 1 respectively, and in both cases m is undefined.
397  Otherwise if p1 is 1 then inf is returned.
398  """
399  assert 0. <= p0 <= 1. # p0 must be a valid probability
400  assert 0. <= p1 <= 1. # p1 must be a valid probability
401 
402  if p0 == 0. or p0 == 1.:
403  assert p0 == p1 # require valid relationship
404  # but we still can't solve for m
405  raise ValueError("m undefined")
406  if p1 == 1.:
407  return PosInf
408 
409  #
410  # m log(1 - p0) = log(1 - p1)
411  #
412 
413  return math.log1p(-p1) / math.log1p(-p0)