Source code for irfpy.vima.scidata_util

''' IMA science data utility.

See also :ref:`tutorial_vima_scidata2`.
'''
import datetime
import logging as _logging
_logger = _logging.getLogger(__name__)
import copy as _copy
import warnings as _warnings

from irfpy.util.exception import  IrfpyException

from irfpy.vima import SENSOR_NAME
from irfpy.vima import scidata_tree
from irfpy.vima import scidata
from irfpy.imacommon import imascipac
from irfpy.imacommon import scidata as cscidata
from irfpy.util import tdict
from irfpy.util import exception as ex
from irfpy.util import ringcache as _ringcache
import irfpy.util.timeseriesdb

_tree = scidata_tree.VimaTree()

_array2d_cache_emu = _ringcache.RingCache(5)    # 5 data to cache. One data roughly costs 200 MB -> 1 GB.
_array2d_cache_nonemu = _ringcache.RingCache(5)   # 5 data to cache.


def _arrange_matlab_file(fn, emulate_full=False):
    """ Load the file, cache, and return the emulated/non-emulated data
    """
    # Cache check
    if emulate_full:
        cache = _array2d_cache_emu
    else:
        cache = _array2d_cache_nonemu

    _logger.debug('Cache: Size={}'.format(cache.size()))

    if cache.hasKey(fn):
        _logger.debug("Cache hit")
        return cache.get(fn)
    else:
        _logger.debug('Cache not hit.')

    # Load and process data
    scimat = scidata.ImaCountFile(fn).mat
    try:
        arr2 = cscidata.matfile2timeseries(scimat, emulate_full=emulate_full)
    except ex.IrfpyException as e:
        _logger.error('irfpy meets an error during the processing of {}.'.format(fn))
        _logger.error('      IrfpyException: {}'.format(str(e)))
        raise e

    # Cache the data
    cache.add(fn, arr2)

    return arr2

[docs]def getarray2d(t0, t1, emulate_full=False): ''' Return the :class:`irfpy.imacommon.TimeSeriesCntMatrix2D` object. Return the dataset of VEX/IMA counts as :class:`irfpy.imacommon.TimeSeriesCntMatrix2D` object. corresponding to the time range ``t0`` and ``t1``. >>> import datetime >>> t0 = datetime.datetime(2012, 10, 5, 0, 1, 23) >>> t1 = datetime.datetime(2012, 10, 5, 0, 10, 41) >>> dataset = getarray2d(t0, t1) >>> print(dataset) <TimeSeriesCntMatrix2D:len=48 from 2012-10-05 00:01:16.398080 to 2012-10-05 00:10:40.491880> ''' ts = imascipac.TimeSeriesCntMatrix2D() fns = _tree.getfiles(t0, t1) arr = imascipac.TimeSeriesCntMatrix2D() for fn in fns: try: arr2 = _arrange_matlab_file(fn, emulate_full=emulate_full) arr = arr + arr2 except ex.IrfpyException as e: _logger.warning('irfpy meets an error. The data block for the file {} is disregarded.'.format(fn)) _logger.warning(' IrfpyException: {}'.format(str(e))) try: arr = arr.clipped(t0, t1) except tdict.DataNotInDbError as e: arr = imascipac.TimeSeriesCntMatrix2D() ### Set the name of the sensor for t, matrix in arr: matrix.name = SENSOR_NAME return _copy.deepcopy(arr)
[docs]def getarray3d(t0, t1, emulate_full=False): """ Get the time series of 3D array (:class:`irfpy.imacommon.TimeSeriesCntMatrix`) for IMA data. :param t0: Start of the time. :param t1: Stop of the time. :param emulate_full: Emulates M24, the full mode, if *True*. :return: :class:`irfpy.imacommon.TimeSeriesCntMatrix` object. You can get time series of 3D data as: >>> import datetime >>> t0 = datetime.datetime(2012, 10, 5, 0 , 0, 23) >>> t1 = datetime.datetime(2012, 10, 5, 0, 10, 41) >>> dataset = getarray3d(t0, t1) >>> print(dataset) <TimeSeriesCntMatrix:len=4 from 2012-10-05 00:00:16.429320 to 2012-10-05 00:07:40.491860> >>> t2, d2 = dataset[2] # Obtain the 2nd 3-D data. >>> print(t2) 2012-10-05 00:04:28.523100 >>> print(d2.matrix.max()) # Max of the counts 168.0 Here ``d2`` is the :class:`irfpy.imacommon.TimeSeriesCntMatrix` object. Note that the first and last 3d data is NOT necessarily bounded. This means that a part of elevation scans for those dataset could be masked. Therefore, it is a user's responsibility to check the completeness of 3D data. Below is the example. You may realize the first 11 elevation is masked. The time obtained is the time of the returned data: in this case the time at elevation index 11. >>> t0, d0 = dataset[0] >>> print(t0) 2012-10-05 00:00:16.429320 >>> print(d0.matrix[31, 15, 95, :]) # M=31, A=15, E=95, and all P is printed. [-- -- -- -- -- -- -- -- -- -- -- 0.0 0.0 0.0 0.0 0.0] """ dat2d = getarray2d(t0, t1, emulate_full=emulate_full) dat3d = imascipac.convert2to3(dat2d, emulate_full=emulate_full) return _copy.deepcopy(dat3d)
[docs]def get3d(t, emulate_full=False): """ Get the 3-D data at the time of t0. :param t: Time of observation :param emulate_full: Emulate M24, the full mode, if *True* :return: A tuple, (time_start, VEX/IMA_data), where time_start is the datetime object and VEX/IMA_Data is an object of :class:`irf.imacommon.imascipac.CntMatrix`. >>> t = datetime.datetime(2012, 10, 5, 3, 0, 0) >>> tstart, data = get3d(t) >>> print(tstart) 2012-10-05 02:57:17.930260 >>> print(data) <<class 'irfpy.imacommon.imascipac.CntMatrix'>(VEX/IMA)@2012-10-05T02:57:17.930260:MOD=24 >>24<<:CNTmax=248> The emulate_full option will emulate the collapsed data (mode not 24) to the full matrix data. >>> t = datetime.datetime(2008, 1, 9, 14, 30, 0) >>> tstart, data = get3d(t) >>> print(data) <<class 'irfpy.imacommon.imascipac.CntMatrix'>(VEX/IMA)@2008-01-09T14:26:51.844060:MOD=25 >>25<<:CNTmax=672> >>> tstart, data = get3d(t, emulate_full=True) # Mode 24 emulation is enabled. >>> print(data) <<class 'irfpy.imacommon.imascipac.CntMatrix'>(VEX/IMA)@2008-01-09T14:26:51.844060:MOD=24 >>25<<:CNTmax=336> The returned data is the 3-D data that was obtained at the time right before the given time. It may sometimes really long time back. >>> t = datetime.datetime(2012, 10, 5, 7, 0, 0) >>> tstart, data = get3d(t) >>> print(tstart) 2012-10-05 03:45:06.336740 >>> print(data) <<class 'irfpy.imacommon.imascipac.CntMatrix'>(VEX/IMA)@2012-10-05T03:45:06.336740:MOD=24 >>24<<:CNTmax=0> """ dt = datetime.timedelta(minutes=10) t0 = t - dt t1 = t + dt timeser3d = getarray3d(t0, t1, emulate_full=emulate_full) tstart, data3d = timeser3d.get(t) return tstart, _copy.deepcopy(data3d)
[docs]class iter3d: ''' An iteration over 3D data for a given time range. To analyze longer data in one session, it may be a good idea to iterate over the time without loading too much data at once. Memory needed will be so large, first, and it takes much time before starting analysis. Here comes the use of :class:`iter3d` functionality. Below to show an example >>> t0 = datetime.datetime(2007, 10, 30, 18) >>> t1 = datetime.datetime(2007, 10, 31, 1) >>> for dat in iter3d(t0, t1): # Iteration over t0 and t1 # doctest: +ELLIPSIS ... print(dat.t) # You may put any processing on ``dat`` each by each 2007-10-30 18:00:15.977120 2007-10-30 18:03:27.977140 2007-10-30 18:06:40.070920 ... 2007-10-31 00:54:27.480320 2007-10-31 00:57:39.574080 >>> datlist = list(iter3d(t0, t1)) # This is a similar syntax as ``getarray3d``, while you get a list of 3D data. >>> print(len(datlist)) 61 ''' glue = datetime.timedelta(seconds=1) def __init__(self, t0, t1, emulate_full=False): _logger.debug('Instance iter3d') _warnings.warn('irfpy.vima.scidata_util.iter3d() shall be deprecated. Use DataCenterCount3d.iter() instead.', DeprecationWarning) self.emulate_full = emulate_full self.t0 = t0 self.t1 = t1 self.filename_list = list(_tree.getfiles(t0, t1)) self.current_filename = None self.arr3d = None self.obstime_list = [] self.current_obstime = None self.previous_obstime = datetime.datetime(2000, 1, 1) # Older than the launch
[docs] def loadfile(self, filename): """ Return the obstime for the given filename """ arr2d = _arrange_matlab_file(filename, emulate_full=self.emulate_full) tlist = arr2d.getobstime() if len(tlist) == 0: return imascipac.TimeSeriesCntMatrix() # Basically no-element array returned. t0 = min(tlist) + self.glue # BUG? Isn't it minus? t1 = max(tlist) + self.glue return getarray3d(t0, t1, emulate_full=self.emulate_full)
# obstime = list(self.arr3d.getobstime()) # return obstime def __iter__(self): return self def __next__(self): if len(self.filename_list) == 0: raise StopIteration() if self.current_obstime is None: if self.current_filename is not None: self.filename_list.pop(0) if len(self.filename_list) == 0: raise StopIteration() self.arr3d = None while self.arr3d is None: try: self.current_filename = self.filename_list[0] self.arr3d = self.loadfile(self.current_filename) self.obstime_list = list(self.arr3d.getobstime()) except IrfpyException: self.filename_list.pop(0) if len(self.filename_list) == 0: raise StopIteration if len(self.obstime_list) == 0: self.current_obstime = None return self.__next__() self.current_obstime = self.obstime_list[0] self.obstime_list.pop(0) while self.current_obstime < self.t0 or self.current_obstime <= self.previous_obstime: self.current_obstime = self.obstime_list[0] self.obstime_list.pop(0) if len(self.obstime_list) == 0: self.current_obstime = None return self.__next__() if self.current_obstime > self.t1: raise StopIteration() self.previous_obstime = self.current_obstime return self.arr3d.get(self.current_obstime + self.glue)[1]
[docs]def get2d(t, emulate_full=False): """ Get the 2-D data at a specific time. :param t: Time of observation :param emulate_full: Emulate M24 if *True*. :return: A tuple, (time_start, VEX/IMA 2D data). The IMA 2D data is an object of :class:`irfpy.imacommon.imascipac.CntMatrix2D`. >>> t, data = get2d(datetime.datetime(2008, 1, 9, 14, 30)) >>> print(t) 2008-01-09 14:29:39.844060 >>> print(data) <<class 'irfpy.imacommon.imascipac.CntMatrix2D'>(VEX/IMA)@2008-01-09T14:29:39.844060:MOD=25 >>25<<:POL=14/15:CNTmax=4> """ dt = datetime.timedelta(minutes=10) t0 = t - dt t1 = t + dt timeser2d = getarray2d(t0, t1, emulate_full=emulate_full) tstart, data2d = timeser2d.get(t) return tstart, _copy.deepcopy(data2d)
[docs]class iter2d: """ Iteration over 2-D data. >>> t0 = datetime.datetime(2011, 11, 5, 22, 34) >>> t1 = datetime.datetime(2011, 11, 5, 23, 15) >>> for dat in iter2d(t0, t1): # doctest: +ELLIPSIS ... print(dat) <<class 'irfpy.imacommon.imascipac.CntMatrix2D'>(VEX/IMA)@2011-11-05T22:34:08.548000:MOD=24 >>24<<:POL=11/11:CNTmax=14> <<class 'irfpy.imacommon.imascipac.CntMatrix2D'>(VEX/IMA)@2011-11-05T22:34:20.548000:MOD=24 >>24<<:POL=12/12:CNTmax=23> <<class 'irfpy.imacommon.imascipac.CntMatrix2D'>(VEX/IMA)@2011-11-05T22:34:32.548000:MOD=24 >>24<<:POL=13/13:CNTmax=11> ... <<class 'irfpy.imacommon.imascipac.CntMatrix2D'>(VEX/IMA)@2011-11-05T23:14:44.860660:MOD=24 >>24<<:POL=06/06:CNTmax=10> <<class 'irfpy.imacommon.imascipac.CntMatrix2D'>(VEX/IMA)@2011-11-05T23:14:56.860640:MOD=24 >>24<<:POL=07/07:CNTmax=11> """ _logger = _logging.getLogger(__name__ + ".iter2d") glue = datetime.timedelta(seconds=1) def __init__(self, t0, t1, emulate_full=False): _warnings.warn('irfpy.vima.scidata_util.iter2d() shall be deprecated. Use DataCenterCount2d.iter() instead.', DeprecationWarning) self.t0 = t0 self.t1 = t1 self.emulate_full = emulate_full self.filename_list = list(_tree.getfiles(t0, t1)) self.arr2d = None self.current_file_contents = None self.current_file_position = -1 self.current_obstime = self.t0 def __iter__(self): return self
[docs] def loadfile(self, filename): try: arr2d = _arrange_matlab_file(filename, emulate_full=self.emulate_full) except ex.IrfpyException: self._logger.warning('Skip loading file {}'.format(filename)) return None, [] # If data cannot be read, empty list is returned tlist = arr2d.getobstime() return arr2d, tlist
def __next__(self): while len(self.filename_list) > 0: if self.current_file_contents is None: # Called only self.arr2d, self.current_file_contents = self.loadfile(self.filename_list[0]) # The time list of the file self.current_file_position = -1 self.current_file_position += 1 try: t = self.current_file_contents[self.current_file_position] if t <= self.current_obstime: continue # If the data is old by some reason elif t > self.t1: raise StopIteration() # Now iteration is over dat2d = self.arr2d.get(t) dat2d[1].name = SENSOR_NAME self.current_obstime = t return dat2d[1] except IndexError: # All the data has been iterated self.filename_list.pop(0) self.current_file_contents = None self.current_file_position = -1 raise StopIteration()
### Processing that is used for both MEX-VEX. from irfpy.imacommon.imascipac_util import interpolate_zero_azimuth, noise_reduction_constant, noise_reduction_isolated_counts