Source code for irfpy.util.fivenumsum

```''' Module for five value summary.

The five value summary is defined as below:

-   Median (M) of the dataset, i.e. the N/2-th data.
-   The lower 4th value (LF), i.e. the (N+1)/4-th data sorted
from low to high.
-   The highter 4th value (HF), i.e. the (N+1)/4-th data sorted
from high to low.
-   The minimum value inside the inner fence (MI).
Inner fence is determined by [LF-1.5*(HF-LF), HF+1.5*(HF-LF)].
-   The maximum value inside the inner fence (MA).
-   The array of the data in the range betweeen inner fence
and outer fence.  Outer range is defined by
[LF-3.0*(HF-LF), HF+3.0*(HF-LF)]
-   The array of the data in the range far out
(outside of outer fence).

.. codeauthor:: Yoshifumi Futaana

'''

import numpy
from numpy.ma import where
import logging
_logger = logging.getLogger(__name__)

[docs]def fivenumsum(data_array):
''' Calculate the five number summary.

Make a data for box and whisker plot.
Return is [median, lower4th, higher4th, minimum_inside,
maximum_inside, outside(array), farout(array)]

:param:   1-D numpy array to be analyzed.
:returns: The five number summary.  [M, LF, HF, MI, MA, OS, FOS].

>>> v = numpy.array([-100, 1, 2, 3, 4, 5, 6, 7, 8, 9, 20, 180])
>>> print(fivenumsum(v))  # doctest: +NORMALIZE_WHITESPACE
(5.5, 2.25, 8.75, 1, 9, array([20.]), array([-100,  180]))
'''
data = numpy.array(sorted(data_array.copy()))
ndat = len(data)

_logger.debug('Number of data=%d' % ndat)
_logger.debug(data)

if ndat <= 2:
_logger.error('Data length should be >2 (Current=%d)'
% ndat)
return None

### Median
if ndat % 2 == 1:
M = data[(ndat + 1) // 2 - 1]
else:
M0 = data[(ndat) // 2 - 1]
M1 = data[(ndat) // 2]
M = (M0 + M1) / 2.

_logger.debug('Median = %f' % M)

### lower/higher 4-th.
if ndat % 4 == 0:
idx0 = int((ndat + 1) * 0.25) - 1
# -1 is because the index starts from 0
idx1 = idx0 + 1
LF = data[idx0] * 0.75 + data[idx1] * 0.25
idx0 = int((ndat + 1) * 0.75) - 1
idx1 = idx0 + 1
HF = data[idx0] * 0.25 + data[idx1] * 0.75
elif ndat % 4 == 1:
idx0 = int((ndat + 1) * 0.25) - 1
# -1 is because the index starts from 0
idx1 = idx0 + 1
LF = data[idx0] * 0.5 + data[idx1] * 0.5
idx0 = int((ndat + 1) * 0.75) - 1
idx1 = idx0 + 1
HF = data[idx0] * 0.5 + data[idx1] * 0.5
elif ndat % 4 == 2:
idx0 = int((ndat + 1) * 0.25) - 1
# -1 is because the index starts from 0
idx1 = idx0 + 1
LF = data[idx0] * 0.25 + data[idx1] * 0.75
idx0 = int((ndat + 1) * 0.75) - 1
idx1 = idx0 + 1
HF = data[idx0] * 0.75 + data[idx1] * 0.25
else:
idx0 = (ndat + 1) // 4 - 1
LF = data[idx0]
idx0 = 3 * (ndat + 1) // 4 - 1
HF = data[idx0]

_logger.debug('LF=%f : HF=%f' % (LF, HF))

D = HF - LF
_logger.debug('D=%f' % D)

INFENCEL = LF - D * 1.5
INFENCEH = HF + D * 1.5
OUTFENCEL = LF - D * 3
OUTFENCEH = HF + D * 3

_logger.debug('INFENCE=[%f %f]' % (INFENCEL, INFENCEH))
_logger.debug('OUTFENCE=[%f %f]' % (OUTFENCEL, OUTFENCEH))

### lowest value in the fence
idx = where(data >= INFENCEL)[0]
if len(idx) == 0:
raise RuntimeError('Strange... It should not happen...')
MI = None
else:
MI = data[idx[0]]
_logger.debug('Minimum in fence = %f' % MI)

### Higherst value in the fence
idx = where(data <= INFENCEH)[0]
if len(idx) == 0:
raise RuntimeError('Strange... It should not happen...')
MA = None
else:
MA = data[idx[-1]]
_logger.debug('Maximum in fence = %f' % MA)

### Outside
idxh = where(data > INFENCEH)[0]
if len(idxh) == 0:
arrh = numpy.array([])
else:
arrh = numpy.array(data[idxh])
### Remove far out
idxh = where(arrh <= OUTFENCEH)[0]
if len(idxh) == 0:
arrh = numpy.array([])
else:
arrh = numpy.array(arrh[idxh])

idxl = where(data < INFENCEL)[0]
if len(idxl) == 0:
arrl = numpy.array([])
else:
arrl = numpy.array(data[idxl])
idxl = where(arrl >= OUTFENCEL)[0]
if len(idxl) == 0:
arrl = numpy.array([])
else:
arrl = numpy.array(arrl[idxl])
outside = numpy.array(sorted(numpy.concatenate([arrl, arrh])))
_logger.debug('OUTSIDE = %s' % str(outside))

### Far Outside
idxh = where(data > OUTFENCEH)[0]
if len(idxh) == 0:
arrh = numpy.array([])
else:
arrh = numpy.array(data[idxh])
idxl = where(data < OUTFENCEL)[0]
if len(idxl) == 0:
arrl = numpy.array([])
else:
arrl = numpy.array(data[idxl])
#       print idxh,arrh
#       print idxl,arrl

farout = numpy.array(sorted(numpy.concatenate([arrl, arrh])))
_logger.debug('FAR OUT = %s' % str(farout))

return (M, LF, HF, MI, MA, outside, farout)

import unittest
import doctest

[docs]def doctests():
return unittest.TestSuite((
doctest.DocTestSuite(),
))
if __name__ == '__main__':
unittest.main(defaultTest='doctests')

```