Source code for irfpy.mima.imaextra

'''IMA extra data file access

See :ref:`ima_extra` to start using the IMA extra data.

This module provides the access to the MEX/IMA extra dataset.
The database location should be specified by [mima] section imaddncbase entry
of .irfpyrc.

>>> mimaddncpath = irfpy.util.irfpyrc.Rc().get('mima', 'imaddncbase')
>>> mimaextrapath = os.path.join(mimaddncpath, 'IMAEXTRA')
>>> mimaparampath = os.path.join(mimaddncpath, 'IMAPARAM')

The simplest way of getting the file is to specify the file name to
:func:`getImaExtraFile`.

>>> fe = getImaExtraFile('imaextra20040240408.nc.gz')
>>> fp = getImaParamFile('imaparam20070121932.nc.gz')

You may also use the time directly.

>>> fe = getImaExtraFile(t=datetime.datetime(2004, 1, 25, 5, 0))
>>> fp = getImaParamFile(t=datetime.datetime(2007, 1, 13, 20, 0))

*More on module*

Each IMAEXTRA file corresponds to :class:`ImaExtraFile`.
This can be instanced manually, but users are recommended to use
:class:`ImaExtraFileFactory`, which provides accessor and cache to
:class:`ImaExtraFile` object.

>>> fact = ImaExtraFileFactory.createFactory()
>>> iex = fact.getImaExtraFile(os.path.join(mimaextrapath, 'imaextra20040240408.nc.gz'))
>>> print(len(iex.var['Time']))
34

>>> fact2 = ImaParamFileFactory.createFactory()
>>> ipr = fact2.getImaParamFile(os.path.join(mimaparampath, 'imaparam20070121932.nc.gz'))
>>> print(len(ipr.var['Time']))
38

The dataset include many files (thus, :class:`ImaExtraFile` objects).
The :class:`ImaExtraDatabase` provides a simple database of the dataset.
This class has functionality converting the time to filename
(:meth:`get_filename`).

>>> db = ImaExtraDatabase.createDatabase()
>>> fn = db.get_filename(datetime.datetime(2004, 1, 25, 5, 0))
>>> print(os.path.basename(fn))    # The filename
imaextra20040240408.nc.gz

>>> db2 = ImaParamDatabase.createDatabase()
>>> fn = db2.get_filename(datetime.datetime(2007, 1, 13, 20, 0))
>>> print(os.path.basename(fn))
imaparam20070121932.nc.gz
'''


import os
import datetime
import logging

import gzip
import tempfile

import numpy as np

import logging as _logging

import irfpy.imacommon.imaextra2
from irfpy.imacommon.imaextra2 import ImaDdNcFileFactory, ImaDdNcDatabase

import irfpy.util.irfpyrc
import irfpy.util.timeseriesdb
import irfpy.util.ringcache
from irfpy.util import exception as _ex
from irfpy.util import datacenter as _dc

_logger = _logging.getLogger(__name__)

[docs]class DataCenterImaParam(_dc.BaseDataCenter): """ ImaParam data's data center The :class:`ImaParam` object is produced from this data center. >>> from irfpy.mima import imaextra >>> dc = imaextra.DataCenterImaParam() >>> import datetime >>> t0 = datetime.datetime(2011, 6, 18, 6, 30) >>> tobs, iparam = dc.nearest(t0) >>> print(tobs) 2011-06-18 06:30:00.236000 >>> print(iparam.ndat) 1 >>> print(iparam.data['Density_P']) [1.1828649] """ def __init__(self): _dc.BaseDataCenter.__init__(self)
[docs] def search_files(self): rc = irfpy.util.irfpyrc.Rc() database = rc.get('mima', 'imaddncbase') database = os.path.join(database, 'IMAPARAM') prefix_name = 'imaparam' filelist = [] for pp, dd, ff in os.walk(database): for f in ff: if f.startswith(prefix_name) and f.endswith('.nc.gz'): filelist.append(os.path.join(pp, f)) return filelist
[docs] def approximate_starttime(self, filename): s = 8 f = os.path.basename(filename) yr = int(f[s:s + 4]) dddoi = int(f[s + 4:s + 7]) hr = int(f[s + 7:s + 9]) mi = int(f[s + 9:s + 11]) tt = datetime.datetime(yr, 1, 1, hr, mi) + datetime.timedelta(days=dddoi) return tt
[docs] def read_file(self, filename): try: ipf = ImaParamFileFactory.createFactory().getImaParamFile(filename) except OSError as e: _logger.warning('The file {} looks corrupted. Ignoring the file and continue.'.format(filename)) return [], [] ip = ipf.getImaParam() iplist = [] for t in ip.obstime: iip = ipf.getImaParam() iip.trim(t - datetime.timedelta(minutes=1), t + datetime.timedelta(minutes=1)) iplist.append(iip) return (ip.obstime, iplist)
[docs]def isdb(): rc = irfpy.util.irfpyrc.Rc() database = rc.get('mima', 'imaddncbase') if database is None: return False if os.path.isdir(database): return True else: return False
[docs]class ImaExtra(irfpy.imacommon.imaextra2.ImaExtraCommon): """ MEX/IMA extra data for a specific time range. This class is for the IMA extra data for a specific time range. It is generally produced by :func:`getImaExtra` or by :func:`iter_imaextra` iterator. See respective functions how to get the data. Once produced, the following methods will give you mass-separated count spectra. - :meth:`getHpSpec` for proton spectra - :meth:`getHeavySpec` for heavy spectra - :meth:`getRestRm` for rest matrix - :meth:`getHGSpec` for proton ghost - :meth:`getobstime` for observation time """ data_keys = ['AngTableN', 'ETableN', 'Noise', 'HGhostSpec', 'FracO2', 'HeavySpec', 'HpSpec', 'Pacc', 'RestRm', 'Time'] def __init__(self): irfpy.imacommon.imaextra2.ImaExtraCommon.__init__(self, self.data_keys) def __add__(self, other): new = ImaExtra() new.append(self) new.append(other) return new
[docs] def getHGspec(self): """ Return the proton ghost spectra. Counts. Order is AEPT (irfpy standard) :return: Proton spectra, in the shape of (A16, E96, P16, Time). Counts. """ spec = self.data['HGhostSpec'] # It is (T, P, E, A) order. spec = np.transpose(spec, (3, 2, 1, 0)) return spec
def __str__(self): s = "<MEX/IMA extra: With {} data from {} to {}>".format(self.ndat, min(self.obstime), max(self.obstime)) return s
[docs]class ImaExtraFile(irfpy.imacommon.imaextra2.ImaExtraFileCommon): ''' An IMA extra file for MEX. IMA extra file class. Usually, you want to instance via :class:`ImaDdNcFileFactory` then caching will be effective. Member of ``dim`` contains the dimension information and ``var`` for variables. '''
[docs] @classmethod def get_sample_filename(cls): ''' ... fn = ImaExtraFile.get_sample_filename() ... im = ImaExtraFile(fn) ... print len(im.dim['TimeLength']) 17 ... print len(im.dim['Time']) 17 ... t0 = im.var['Time'][0] # t0 will be masked array. ... print t0.compressed().data 2009136192402367 Getting time. Getting time of the observations can be done via :meth:`ImaExtraFile.getobstime` method. ... t = im.getobstime() ... print len(t) 17 ... print t[0] 2009-05-17 19:24:02.367000 ''' from pkg_resources import resource_filename fn = resource_filename(__name__, os.path.join('sample', 'imaextra20091361924.nc.gz')) return fn
def __init__(self, filename, gunzip=True): ''' Open the file and read the data. ''' irfpy.imacommon.imaextra2.ImaExtraFileCommon.__init__(self, filename, gunzip=gunzip)
[docs] def getImaExtra(self): ''' Obtain the :class:`ImaExtra` object in the data file :return: Data :rtype: :class:`ImaExtra` ''' ie = ImaExtra() ie.obstime = self.getobstime() ie.ndat = len(ie.obstime) for k in list(ie.data.keys()): ie.data[k] = np.array(self.var[k], copy=True) ie.trim_none() return ie
[docs]class ImaParam(irfpy.imacommon.imaextra2.ImaParamCommon): ''' The object stores data of IMA parameter for MEX. ... t0 = datetime.datetime(2010, 7, 11, 16, 12) ... file0 = getImaParamFile(t=t0) ... ip0 = file0.getImaParam() ... print ip0.ndat 32 ... t1 = datetime.datetime(2010, 7, 13, 10, 25) ... file1 = getImaParamFile(t=t1) ... ip1 = file1.getImaParam() ... print ip1.ndat 38 ... ip01 = ip0 + ip1 ... print ip01.data['Velocity_P'].shape (70, 3) ... ip10 = ip1 + ip0 ... print ip01.data['Velocity_O'].shape (70, 3) ... print (ip01.data['Temperature_P'] == ip10.data['Temperature_P']).all() True ... print ip01.obstime == ip10.obstime True If you specify the same dataset for adding, duplication check works. ... ip11 = ip1 + ip1 # If you add two identical dataset ... print ip11.data['ScanFlag'].shape # the result is the same as before. (38,) ... print ip1.obstime == ip11.obstime True ... t0 = datetime.datetime(2010, 7, 11, 16, 12) ... file0 = getImaParamFile(t=t0) ... ip0 = file0.getImaParam() ... print ip0.ndat 32 ... t1 = datetime.datetime(2010, 7, 11, 17) ... t2 = datetime.datetime(2010, 7, 11, 17, 15) ... ip0.trim(t1, t2) ... print ip0.ndat 5 ''' data_keys = ['ScanFlag', 'Quality_P', 'Density_P', 'Velocity_P', 'Temperature_P', 'EnergyThermal_P', 'Quality_O', 'Density_O', 'Velocity_O', 'Temperature_O', 'EnergyThermal_O', 'Quality_O2', 'Density_O2', 'Velocity_O2', 'Temperature_O2', 'EnergyThermal_O2', ] def __init__(self): irfpy.imacommon.imaextra2.ImaParamCommon.__init__(self, self.data_keys) def __add__(self, other): new = ImaParam() new.append(self) new.append(other) return new
[docs]class ImaParamFile(irfpy.imacommon.imaextra2.ImaParamFileCommon): ''' An IMA param file for VEX. IMA param file class. Usually, you want to instance via :class:`ImaDdNcFileFactory` then caching will be effective. Member of ``dim`` contains the dimension information and ``var`` for variables. ''' def __init__(self, filename, gunzip=True): ''' Open the file and read the data. ''' irfpy.imacommon.imaextra2.ImaParamFileCommon.__init__(self, filename, gunzip=gunzip)
[docs] def getImaParam(self): ''' Obtain the :class:`ImaParam` object ''' ip = ImaParam() ip.obstime = self.getobstime() ip.ndat = len(ip.obstime) for k in list(ip.data.keys()): try: ip.data[k] = np.array(self.var[k], copy=True) except KeyError as e: _logger.warning("The database does not contain informationon {} at {}. NaN inserted.".format(k, self.filename)) ip.data[k] = np.empty([ip.ndat]) + np.nan return ip
[docs]class ImaExtraFileFactory(ImaDdNcFileFactory): __singleton_instance = None def __init__(self, *args, **kwds): ImaDdNcFileFactory.__init__(self, ImaExtraFile, *args, **kwds)
[docs] @classmethod def createFactory(cls): ''' Return the "master" factory. ''' if cls.__singleton_instance is None: cls.__singleton_instance = ImaExtraFileFactory() return cls.__singleton_instance
[docs] def getImaExtraFile(self, filename, gunzip=True): return self.getDdNcFile(filename, gunzip=gunzip)
[docs]class ImaParamFileFactory(ImaDdNcFileFactory): __singleton_instance = None def __init__(self, *args, **kwds): ImaDdNcFileFactory.__init__(self, ImaParamFile, *args, **kwds)
[docs] @classmethod def createFactory(cls): ''' Return the "master" factory. ''' if cls.__singleton_instance is None: cls.__singleton_instance = ImaParamFileFactory() return cls.__singleton_instance
[docs] def getImaParamFile(self, filename, gunzip=True): return self.getDdNcFile(filename, gunzip=gunzip)
[docs]class ImaExtraDatabase(ImaDdNcDatabase): ''' ... db = ImaExtraDatabase.createDatabase() ... t = datetime.datetime(2004, 1, 25, 5, 0) ... print os.path.basename(db.get_filename(t)) imaextra20040240408.nc.gz ''' __singleton_instance = None def __init__(self, database=None): if database is None: rc = irfpy.util.irfpyrc.Rc() database = rc.get('mima', 'imaddncbase') database = os.path.join(database, 'IMAEXTRA') ImaDdNcDatabase.__init__(self, database, 'imaextra', ImaExtraFileFactory())
[docs] @classmethod def createDatabase(cls, refresh=False): ''' Create the database. ''' if cls.__singleton_instance is None or refresh: cls.__singleton_instance = ImaExtraDatabase() return cls.__singleton_instance
[docs]class ImaParamDatabase(ImaDdNcDatabase): __singleton_instance = None def __init__(self, database=None): if database is None: rc = irfpy.util.irfpyrc.Rc() database = rc.get('mima', 'imaddncbase') database = os.path.join(database, 'IMAPARAM') ImaDdNcDatabase.__init__(self, database, 'imaparam', ImaParamFileFactory())
[docs] @classmethod def createDatabase(cls, refresh=False): ''' Create the database. ''' if cls.__singleton_instance is None or refresh: cls.__singleton_instance = ImaParamDatabase() return cls.__singleton_instance
[docs]def getImaExtraFile(filename=None, t=None, gunzip=True): ''' An easy accessor to ImaExtraFile getter. With a support of cache (:class:`ImaExtraFileFactory`), an optimized ImaExtraFile getter. ''' if filename is not None: try: return ImaExtraFileFactory.createFactory().getImaExtraFile(filename, gunzip=gunzip) except IOError as e: logger = logging.getLogger('getImaExtraFile') logger.warning('No file found %s.' % filename) rc = irfpy.util.irfpyrc.Rc() dbpath = rc.get('mima', 'imaddncbase') filename = os.path.join(dbpath, 'IMAEXTRA', filename) logger.warning('Try %s now.' % filename) return ImaExtraFileFactory.createFactory().getImaExtraFile(filename, gunzip=gunzip) elif t is not None: db = ImaExtraDatabase.createDatabase() filename = db.get_filename(t) return ImaExtraFileFactory.createFactory().getImaExtraFile(filename, gunzip=gunzip) else: raise ValueError('Either ``filename`` or ``t`` should be specified')
[docs]def getImaParamFile(filename=None, t=None, gunzip=True): ''' An easy accessor to ImaParamFile getter. With a support of cache (:class:`ImaParamFileFactory`), an optimized ImaParamFile getter. ''' if filename is not None: try: return ImaParamFileFactory.createFactory().getImaParamFile(filename, gunzip=gunzip) except IOError as e: logger = logging.getLogger('getImaParamFile') logger.warning('No file found %s.' % filename) rc = irfpy.util.irfpyrc.Rc() dbpath = rc.get('mima', 'imaddncbase') filename = os.path.join(dbpath, 'IMAPARAM', filename) logger.warning('Try %s now.' % filename) return ImaParamFileFactory.createFactory().getImaParamFile(filename, gunzip=gunzip) elif t is not None: db = ImaParamDatabase.createDatabase() filename = db.get_filename(t) return ImaParamFileFactory.createFactory().getImaParamFile(filename, gunzip=gunzip) else: raise ValueError('Either ``filename`` or ``t`` should be specified')
[docs]def getImaParam(t0, t1): ''' Return the :class:`ImaParam` object. :param t0: Time to start :param t1: Time to end ''' db = ImaParamDatabase.createDatabase() # Data base for IMA parameter files if len(db) == 0: raise _ex.IrfpyException('Data file not found. Check the setup (e.g., .irfpyrc, or the data file)') fns = db.get_filenames(t0, t1) # Return the file name covering t0 to t1. ip = ImaParam() for fn in fns: iprm = ImaParamFileFactory.createFactory().getImaParamFile(fn).getImaParam() ip.append(iprm) ip.trim(t0, t1) return ip
[docs]def getImaExtra(t0, t1): ''' Get the IMA extra data in a form of :class:`ImaExtra` object :param t0: Time to start :param t1: Time to end :return: IMA extra object :rtype: :class:`ImaExtra` >>> imaExtra = getImaExtra(datetime.datetime(2012, 10, 5, 6), datetime.datetime(2012, 10, 6, 8)) >>> t = imaExtra.obstime >>> print(len(t)) 167 >>> hp = imaExtra.getHpSpec() >>> print(hp.shape) (16, 96, 16, 167) >>> print(hp.max()) 494.02887 >>> op = imaExtra.getHeavySpec() >>> print(op.shape) (16, 96, 16, 167) >>> print(op.max()) 5126.1104 ''' db = ImaExtraDatabase.createDatabase() # Create a database for IMA extra files. if len(db) == 0: raise _ex.IrfpyException('Data file not found. Check the setup (e.g., .irfpyrc, or the data file)') fns = db.get_filenames(t0, t1) # Return the file names to read ie = ImaExtra() for fn in fns: ierm = ImaExtraFileFactory.createFactory().getImaExtraFile(fn).getImaExtra() ie.append(ierm) ie.trim(t0, t1) return ie
[docs]class iterparam: """ Iterate the IMA parameter file. Iterated is a list of ``(t, dat)`` were ``t`` is the time of the observation start, and ``dat`` is a dictionary that contains the data. The key of ``dat`` is :attr:`ImaParam.data_keys`. The ``dat`` is *not* a :class:`ImaParam` object, but rather, :attr:`ImaParam.data` data. :param t0: Time to start. Iterated data's start time is always later than or equal to t0. :param t1: Time to stop. Iterated data's stop time is always earlier than or equal to t1. :returns: A list, ``(t, dat)``. ``t`` is time, and ``dat`` is a dictionary containing the IMAPARAM data. Its key is :attr:`ImaParam.data_keys`. :rtype: ``list`` of (``datetime.datetime``, ``dict``) If you want to iterate data from 2011-11-05T05:15 for 10 minutes, you can do as follows: >>> t0 = datetime.datetime(2010, 11, 7, 5, 15) >>> t1 = datetime.datetime(2010, 11, 7, 5, 25) >>> for t, mom in iterparam(t0, t1): ... print(t, mom['Density_P']) 2010-11-07 05:17:53.821000 0.61079055 2010-11-07 05:21:05.789000 0.39960635 2010-11-07 05:24:17.851000 0.21043392 """ data_keys = ImaParam.data_keys """ Key of the data """ _logger = logging.getLogger(__name__ + '.iterparam') def __init__(self, t0, t1): self.t0 = t0 self.t1 = t1 db = ImaParamDatabase.createDatabase() # Create a database for IMA extra files. if len(db) == 0: raise _ex.IrfpyException('Data file not found. Check the setup (e.g., .irfpyrc, or the data file)') self.filename_list = db.get_filenames(self.t0, self.t1) # Return the file names to read self.current_file_contents = None self.current_file_position = -1 self.current_obstime = self.t0 def __iter__(self): return self def __next__(self): while len(self.filename_list) > 0: self._logger.debug('Remained files = {}'.format(len(self.filename_list))) self._logger.debug('The target file = {}'.format(self.filename_list[0])) # If current file contents are not loaded if self.current_file_contents is None: self._logger.debug('No file contents have been loaded. Start to load') self.current_file_contents = getImaParamFile(filename=self.filename_list[0]).getImaParam() self.current_file_position = -1 ## HERE YOU SHOULD PUT if FAILED! self._logger.debug('File contents read finished.') # You proceed 1 data, and return. self._logger.debug('Progress 1 data') self.current_file_position += 1 try: self._logger.debug('Get the time the loaded contents') t = self.current_file_contents.obstime[self.current_file_position] self._logger.debug('Finish time list: Result {}'.format(t)) if t <= self.current_obstime: self._logger.debug('The obtainted time is not appropriate. Skip this') continue if t > self.t1: self._logger.debug('The obtainted time is past the required range. Stop iteration') raise StopIteration() self._logger.debug('Start loading the IMA param file.') dat = self.current_file_contents self._logger.debug('Finished.') self._logger.debug('Preparation of the returned data') dat2 = {} for k in self.data_keys: dat2[k] = dat.data[k][self.current_file_position] self._logger.debug('Finish preparation') self.current_obstime = t return t, dat2 except IndexError: self._logger.debug('Whole the data is returned. Go next file') self.filename_list.pop(0) # Remove the current file self.current_file_contents = None self.current_file_position = -1 raise StopIteration()
[docs]class iter_imaextra: """ Iterate the IMA extra data. Iterate the IMA extra data, as a tuple of ``(time, data)``. Here data is an object of :class:`ImaExtra`. >>> t0 = datetime.datetime(2010, 11, 7, 5, 15) >>> t1 = datetime.datetime(2010, 11, 7, 5, 25) >>> for t, d in iter_imaextra(t0, t1): ... print(t, d.getHeavySpec().shape, d.getHpSpec().max()) 2010-11-07 05:17:53.821000 (16, 96, 16, 1) 42.640816 2010-11-07 05:21:05.789000 (16, 96, 16, 1) 63.204727 2010-11-07 05:24:17.852000 (16, 96, 16, 1) 57.680725 """ data_keys = ImaExtra.data_keys _logger = logging.getLogger(__name__ + '.iter_imaextra') def __init__(self, t0, t1): self.t0 = t0 self.t1 = t1 db = ImaExtraDatabase.createDatabase() # Create a database for IMA extra files. if len(db) == 0: raise _ex.IrfpyException('Data file not found. Check the setup (e.g., .irfpyrc, or the data file)') self.filename_list = db.get_filenames(self.t0, self.t1) # Return the file names to read self.current_file_contents = None self.current_file_position = -1 self.current_obstime = self.t0 def __iter__(self): return self def __next__(self): while len(self.filename_list) > 0: self._logger.debug('Remained files = {}'.format(len(self.filename_list))) self._logger.debug('The target file = {}'.format(self.filename_list[0])) # If current file contents are not loaded if self.current_file_contents is None: self._logger.debug('No file contents have been loaded. Start to load') self.current_file_contents = getImaExtraFile(filename=self.filename_list[0]).getImaExtra() self.current_file_position = -1 ## HERE YOU SHOULD PUT if FAILED! self._logger.debug('File contents read finished.') # You proceed 1 data, and return. self._logger.debug('Progress 1 data') self.current_file_position += 1 try: self._logger.debug('Get the time the loaded contents') t = self.current_file_contents.obstime[self.current_file_position] self._logger.debug('Finish time list: Result {}'.format(t)) if t <= self.current_obstime: self._logger.debug('The obtainted time is not appropriate. Skip this') continue if t > self.t1: self._logger.debug('The obtainted time is past the required range. Stop iteration') raise StopIteration() self._logger.debug('Start loading the IMA extra file.') dat = self.current_file_contents import copy as _copy dat2 = _copy.deepcopy(dat) dat2.trim(t - datetime.timedelta(seconds=1), t + datetime.timedelta(seconds=1)) self._logger.debug('Finished.') self.current_obstime = t return t, dat2 except IndexError: self._logger.debug('Whole the data is returned. Go next file') self.filename_list.pop(0) # Remove the current file self.current_file_contents = None self.current_file_position = -1 raise StopIteration()