Source code for irfpy.mima.imaextra
'''IMA extra data file access
See :ref:`ima_extra` to start using the IMA extra data.
This module provides the access to the MEX/IMA extra dataset.
The database location should be specified by [mima] section imaddncbase entry
of .irfpyrc.
>>> mimaddncpath = irfpy.util.irfpyrc.Rc().get('mima', 'imaddncbase')
>>> mimaextrapath = os.path.join(mimaddncpath, 'IMAEXTRA')
>>> mimaparampath = os.path.join(mimaddncpath, 'IMAPARAM')
The simplest way of getting the file is to specify the file name to
:func:`getImaExtraFile`.
>>> fe = getImaExtraFile('imaextra20040240408.nc.gz')
>>> fp = getImaParamFile('imaparam20070121932.nc.gz')
You may also use the time directly.
>>> fe = getImaExtraFile(t=datetime.datetime(2004, 1, 25, 5, 0))
>>> fp = getImaParamFile(t=datetime.datetime(2007, 1, 13, 20, 0))
*More on module*
Each IMAEXTRA file corresponds to :class:`ImaExtraFile`.
This can be instanced manually, but users are recommended to use
:class:`ImaExtraFileFactory`, which provides accessor and cache to
:class:`ImaExtraFile` object.
>>> fact = ImaExtraFileFactory.createFactory()
>>> iex = fact.getImaExtraFile(os.path.join(mimaextrapath, 'imaextra20040240408.nc.gz'))
>>> print(len(iex.var['Time']))
34
>>> fact2 = ImaParamFileFactory.createFactory()
>>> ipr = fact2.getImaParamFile(os.path.join(mimaparampath, 'imaparam20070121932.nc.gz'))
>>> print(len(ipr.var['Time']))
38
The dataset include many files (thus, :class:`ImaExtraFile` objects).
The :class:`ImaExtraDatabase` provides a simple database of the dataset.
This class has functionality converting the time to filename
(:meth:`get_filename`).
>>> db = ImaExtraDatabase.createDatabase()
>>> fn = db.get_filename(datetime.datetime(2004, 1, 25, 5, 0))
>>> print(os.path.basename(fn)) # The filename
imaextra20040240408.nc.gz
>>> db2 = ImaParamDatabase.createDatabase()
>>> fn = db2.get_filename(datetime.datetime(2007, 1, 13, 20, 0))
>>> print(os.path.basename(fn))
imaparam20070121932.nc.gz
'''
import os
import datetime
import logging
import gzip
import tempfile
import numpy as np
import logging as _logging
import irfpy.imacommon.imaextra2
from irfpy.imacommon.imaextra2 import ImaDdNcFileFactory, ImaDdNcDatabase
import irfpy.util.irfpyrc
import irfpy.util.timeseriesdb
import irfpy.util.ringcache
from irfpy.util import exception as _ex
from irfpy.util import datacenter as _dc
_logger = _logging.getLogger(__name__)
[docs]class DataCenterImaParam(_dc.BaseDataCenter):
""" ImaParam data's data center
The :class:`ImaParam` object is produced from this data center.
>>> from irfpy.mima import imaextra
>>> dc = imaextra.DataCenterImaParam()
>>> import datetime
>>> t0 = datetime.datetime(2011, 6, 18, 6, 30)
>>> tobs, iparam = dc.nearest(t0)
>>> print(tobs)
2011-06-18 06:30:00.236000
>>> print(iparam.ndat)
1
>>> print(iparam.data['Density_P'])
[1.1828649]
"""
def __init__(self):
_dc.BaseDataCenter.__init__(self)
[docs] def search_files(self):
rc = irfpy.util.irfpyrc.Rc()
database = rc.get('mima', 'imaddncbase')
database = os.path.join(database, 'IMAPARAM')
prefix_name = 'imaparam'
filelist = []
for pp, dd, ff in os.walk(database):
for f in ff:
if f.startswith(prefix_name) and f.endswith('.nc.gz'):
filelist.append(os.path.join(pp, f))
return filelist
[docs] def approximate_starttime(self, filename):
s = 8
f = os.path.basename(filename)
yr = int(f[s:s + 4])
dddoi = int(f[s + 4:s + 7])
hr = int(f[s + 7:s + 9])
mi = int(f[s + 9:s + 11])
tt = datetime.datetime(yr, 1, 1, hr, mi) + datetime.timedelta(days=dddoi)
return tt
[docs] def read_file(self, filename):
try:
ipf = ImaParamFileFactory.createFactory().getImaParamFile(filename)
except OSError as e:
_logger.warning('The file {} looks corrupted. Ignoring the file and continue.'.format(filename))
return [], []
ip = ipf.getImaParam()
iplist = []
for t in ip.obstime:
iip = ipf.getImaParam()
iip.trim(t - datetime.timedelta(minutes=1), t + datetime.timedelta(minutes=1))
iplist.append(iip)
return (ip.obstime, iplist)
[docs]def isdb():
rc = irfpy.util.irfpyrc.Rc()
database = rc.get('mima', 'imaddncbase')
if database is None:
return False
if os.path.isdir(database):
return True
else:
return False
[docs]class ImaExtra(irfpy.imacommon.imaextra2.ImaExtraCommon):
""" MEX/IMA extra data for a specific time range.
This class is for the IMA extra data for a specific time range.
It is generally produced by :func:`getImaExtra` or by :func:`iter_imaextra` iterator.
See respective functions how to get the data.
Once produced, the following methods will give you mass-separated count spectra.
- :meth:`getHpSpec` for proton spectra
- :meth:`getHeavySpec` for heavy spectra
- :meth:`getRestRm` for rest matrix
- :meth:`getHGSpec` for proton ghost
- :meth:`getobstime` for observation time
"""
data_keys = ['AngTableN', 'ETableN', 'Noise', 'HGhostSpec', 'FracO2', 'HeavySpec', 'HpSpec', 'Pacc', 'RestRm', 'Time']
def __init__(self):
irfpy.imacommon.imaextra2.ImaExtraCommon.__init__(self, self.data_keys)
def __add__(self, other):
new = ImaExtra()
new.append(self)
new.append(other)
return new
[docs] def getHGspec(self):
""" Return the proton ghost spectra. Counts. Order is AEPT (irfpy standard)
:return: Proton spectra, in the shape of (A16, E96, P16, Time). Counts.
"""
spec = self.data['HGhostSpec'] # It is (T, P, E, A) order.
spec = np.transpose(spec, (3, 2, 1, 0))
return spec
def __str__(self):
s = "<MEX/IMA extra: With {} data from {} to {}>".format(self.ndat, min(self.obstime), max(self.obstime))
return s
[docs]class ImaExtraFile(irfpy.imacommon.imaextra2.ImaExtraFileCommon):
''' An IMA extra file for MEX.
IMA extra file class. Usually, you want to instance via
:class:`ImaDdNcFileFactory` then caching will be effective.
Member of ``dim`` contains the dimension information and ``var`` for variables.
'''
[docs] @classmethod
def get_sample_filename(cls):
'''
... fn = ImaExtraFile.get_sample_filename()
... im = ImaExtraFile(fn)
... print len(im.dim['TimeLength'])
17
... print len(im.dim['Time'])
17
... t0 = im.var['Time'][0] # t0 will be masked array.
... print t0.compressed().data
2009136192402367
Getting time.
Getting time of the observations can be done via :meth:`ImaExtraFile.getobstime` method.
... t = im.getobstime()
... print len(t)
17
... print t[0]
2009-05-17 19:24:02.367000
'''
from pkg_resources import resource_filename
fn = resource_filename(__name__, os.path.join('sample', 'imaextra20091361924.nc.gz'))
return fn
def __init__(self, filename, gunzip=True):
''' Open the file and read the data.
'''
irfpy.imacommon.imaextra2.ImaExtraFileCommon.__init__(self, filename, gunzip=gunzip)
[docs] def getImaExtra(self):
''' Obtain the :class:`ImaExtra` object in the data file
:return: Data
:rtype: :class:`ImaExtra`
'''
ie = ImaExtra()
ie.obstime = self.getobstime()
ie.ndat = len(ie.obstime)
for k in list(ie.data.keys()):
ie.data[k] = np.array(self.var[k], copy=True)
ie.trim_none()
return ie
[docs]class ImaParam(irfpy.imacommon.imaextra2.ImaParamCommon):
''' The object stores data of IMA parameter for MEX.
... t0 = datetime.datetime(2010, 7, 11, 16, 12)
... file0 = getImaParamFile(t=t0)
... ip0 = file0.getImaParam()
... print ip0.ndat
32
... t1 = datetime.datetime(2010, 7, 13, 10, 25)
... file1 = getImaParamFile(t=t1)
... ip1 = file1.getImaParam()
... print ip1.ndat
38
... ip01 = ip0 + ip1
... print ip01.data['Velocity_P'].shape
(70, 3)
... ip10 = ip1 + ip0
... print ip01.data['Velocity_O'].shape
(70, 3)
... print (ip01.data['Temperature_P'] == ip10.data['Temperature_P']).all()
True
... print ip01.obstime == ip10.obstime
True
If you specify the same dataset for adding, duplication check works.
... ip11 = ip1 + ip1 # If you add two identical dataset
... print ip11.data['ScanFlag'].shape # the result is the same as before.
(38,)
... print ip1.obstime == ip11.obstime
True
... t0 = datetime.datetime(2010, 7, 11, 16, 12)
... file0 = getImaParamFile(t=t0)
... ip0 = file0.getImaParam()
... print ip0.ndat
32
... t1 = datetime.datetime(2010, 7, 11, 17)
... t2 = datetime.datetime(2010, 7, 11, 17, 15)
... ip0.trim(t1, t2)
... print ip0.ndat
5
'''
data_keys = ['ScanFlag',
'Quality_P', 'Density_P', 'Velocity_P', 'Temperature_P', 'EnergyThermal_P',
'Quality_O', 'Density_O', 'Velocity_O', 'Temperature_O', 'EnergyThermal_O',
'Quality_O2', 'Density_O2', 'Velocity_O2', 'Temperature_O2', 'EnergyThermal_O2',
]
def __init__(self):
irfpy.imacommon.imaextra2.ImaParamCommon.__init__(self, self.data_keys)
def __add__(self, other):
new = ImaParam()
new.append(self)
new.append(other)
return new
[docs]class ImaParamFile(irfpy.imacommon.imaextra2.ImaParamFileCommon):
''' An IMA param file for VEX.
IMA param file class. Usually, you want to instance via
:class:`ImaDdNcFileFactory` then caching will be effective.
Member of ``dim`` contains the dimension information and ``var`` for variables.
'''
def __init__(self, filename, gunzip=True):
''' Open the file and read the data.
'''
irfpy.imacommon.imaextra2.ImaParamFileCommon.__init__(self, filename, gunzip=gunzip)
[docs] def getImaParam(self):
''' Obtain the :class:`ImaParam` object
'''
ip = ImaParam()
ip.obstime = self.getobstime()
ip.ndat = len(ip.obstime)
for k in list(ip.data.keys()):
try:
ip.data[k] = np.array(self.var[k], copy=True)
except KeyError as e:
_logger.warning("The database does not contain informationon {} at {}. NaN inserted.".format(k, self.filename))
ip.data[k] = np.empty([ip.ndat]) + np.nan
return ip
[docs]class ImaExtraFileFactory(ImaDdNcFileFactory):
__singleton_instance = None
def __init__(self, *args, **kwds):
ImaDdNcFileFactory.__init__(self, ImaExtraFile, *args, **kwds)
[docs] @classmethod
def createFactory(cls):
''' Return the "master" factory.
'''
if cls.__singleton_instance is None:
cls.__singleton_instance = ImaExtraFileFactory()
return cls.__singleton_instance
[docs] def getImaExtraFile(self, filename, gunzip=True):
return self.getDdNcFile(filename, gunzip=gunzip)
[docs]class ImaParamFileFactory(ImaDdNcFileFactory):
__singleton_instance = None
def __init__(self, *args, **kwds):
ImaDdNcFileFactory.__init__(self, ImaParamFile, *args, **kwds)
[docs] @classmethod
def createFactory(cls):
''' Return the "master" factory.
'''
if cls.__singleton_instance is None:
cls.__singleton_instance = ImaParamFileFactory()
return cls.__singleton_instance
[docs] def getImaParamFile(self, filename, gunzip=True):
return self.getDdNcFile(filename, gunzip=gunzip)
[docs]class ImaExtraDatabase(ImaDdNcDatabase):
'''
... db = ImaExtraDatabase.createDatabase()
... t = datetime.datetime(2004, 1, 25, 5, 0)
... print os.path.basename(db.get_filename(t))
imaextra20040240408.nc.gz
'''
__singleton_instance = None
def __init__(self, database=None):
if database is None:
rc = irfpy.util.irfpyrc.Rc()
database = rc.get('mima', 'imaddncbase')
database = os.path.join(database, 'IMAEXTRA')
ImaDdNcDatabase.__init__(self, database, 'imaextra', ImaExtraFileFactory())
[docs] @classmethod
def createDatabase(cls, refresh=False):
''' Create the database.
'''
if cls.__singleton_instance is None or refresh:
cls.__singleton_instance = ImaExtraDatabase()
return cls.__singleton_instance
[docs]class ImaParamDatabase(ImaDdNcDatabase):
__singleton_instance = None
def __init__(self, database=None):
if database is None:
rc = irfpy.util.irfpyrc.Rc()
database = rc.get('mima', 'imaddncbase')
database = os.path.join(database, 'IMAPARAM')
ImaDdNcDatabase.__init__(self, database, 'imaparam', ImaParamFileFactory())
[docs] @classmethod
def createDatabase(cls, refresh=False):
''' Create the database.
'''
if cls.__singleton_instance is None or refresh:
cls.__singleton_instance = ImaParamDatabase()
return cls.__singleton_instance
[docs]def getImaExtraFile(filename=None, t=None, gunzip=True):
''' An easy accessor to ImaExtraFile getter.
With a support of cache (:class:`ImaExtraFileFactory`),
an optimized ImaExtraFile getter.
'''
if filename is not None:
try:
return ImaExtraFileFactory.createFactory().getImaExtraFile(filename, gunzip=gunzip)
except IOError as e:
logger = logging.getLogger('getImaExtraFile')
logger.warning('No file found %s.' % filename)
rc = irfpy.util.irfpyrc.Rc()
dbpath = rc.get('mima', 'imaddncbase')
filename = os.path.join(dbpath, 'IMAEXTRA', filename)
logger.warning('Try %s now.' % filename)
return ImaExtraFileFactory.createFactory().getImaExtraFile(filename, gunzip=gunzip)
elif t is not None:
db = ImaExtraDatabase.createDatabase()
filename = db.get_filename(t)
return ImaExtraFileFactory.createFactory().getImaExtraFile(filename, gunzip=gunzip)
else:
raise ValueError('Either ``filename`` or ``t`` should be specified')
[docs]def getImaParamFile(filename=None, t=None, gunzip=True):
''' An easy accessor to ImaParamFile getter.
With a support of cache (:class:`ImaParamFileFactory`),
an optimized ImaParamFile getter.
'''
if filename is not None:
try:
return ImaParamFileFactory.createFactory().getImaParamFile(filename, gunzip=gunzip)
except IOError as e:
logger = logging.getLogger('getImaParamFile')
logger.warning('No file found %s.' % filename)
rc = irfpy.util.irfpyrc.Rc()
dbpath = rc.get('mima', 'imaddncbase')
filename = os.path.join(dbpath, 'IMAPARAM', filename)
logger.warning('Try %s now.' % filename)
return ImaParamFileFactory.createFactory().getImaParamFile(filename, gunzip=gunzip)
elif t is not None:
db = ImaParamDatabase.createDatabase()
filename = db.get_filename(t)
return ImaParamFileFactory.createFactory().getImaParamFile(filename, gunzip=gunzip)
else:
raise ValueError('Either ``filename`` or ``t`` should be specified')
[docs]def getImaParam(t0, t1):
''' Return the :class:`ImaParam` object.
:param t0: Time to start
:param t1: Time to end
'''
db = ImaParamDatabase.createDatabase() # Data base for IMA parameter files
if len(db) == 0:
raise _ex.IrfpyException('Data file not found. Check the setup (e.g., .irfpyrc, or the data file)')
fns = db.get_filenames(t0, t1) # Return the file name covering t0 to t1.
ip = ImaParam()
for fn in fns:
iprm = ImaParamFileFactory.createFactory().getImaParamFile(fn).getImaParam()
ip.append(iprm)
ip.trim(t0, t1)
return ip
[docs]def getImaExtra(t0, t1):
''' Get the IMA extra data in a form of :class:`ImaExtra` object
:param t0: Time to start
:param t1: Time to end
:return: IMA extra object
:rtype: :class:`ImaExtra`
>>> imaExtra = getImaExtra(datetime.datetime(2012, 10, 5, 6), datetime.datetime(2012, 10, 6, 8))
>>> t = imaExtra.obstime
>>> print(len(t))
167
>>> hp = imaExtra.getHpSpec()
>>> print(hp.shape)
(16, 96, 16, 167)
>>> print(hp.max())
494.02887
>>> op = imaExtra.getHeavySpec()
>>> print(op.shape)
(16, 96, 16, 167)
>>> print(op.max())
5126.1104
'''
db = ImaExtraDatabase.createDatabase() # Create a database for IMA extra files.
if len(db) == 0:
raise _ex.IrfpyException('Data file not found. Check the setup (e.g., .irfpyrc, or the data file)')
fns = db.get_filenames(t0, t1) # Return the file names to read
ie = ImaExtra()
for fn in fns:
ierm = ImaExtraFileFactory.createFactory().getImaExtraFile(fn).getImaExtra()
ie.append(ierm)
ie.trim(t0, t1)
return ie
[docs]class iterparam:
""" Iterate the IMA parameter file.
Iterated is a list of ``(t, dat)`` were ``t`` is the time of the observation start,
and ``dat`` is a dictionary that contains the data.
The key of ``dat`` is :attr:`ImaParam.data_keys`. The ``dat`` is *not* a :class:`ImaParam` object, but
rather, :attr:`ImaParam.data` data.
:param t0: Time to start. Iterated data's start time is always later than or equal to t0.
:param t1: Time to stop. Iterated data's stop time is always earlier than or equal to t1.
:returns: A list, ``(t, dat)``. ``t`` is time, and ``dat`` is a dictionary containing the IMAPARAM data.
Its key is :attr:`ImaParam.data_keys`.
:rtype: ``list`` of (``datetime.datetime``, ``dict``)
If you want to iterate data from 2011-11-05T05:15 for 10 minutes,
you can do as follows:
>>> t0 = datetime.datetime(2010, 11, 7, 5, 15)
>>> t1 = datetime.datetime(2010, 11, 7, 5, 25)
>>> for t, mom in iterparam(t0, t1):
... print(t, mom['Density_P'])
2010-11-07 05:17:53.821000 0.61079055
2010-11-07 05:21:05.789000 0.39960635
2010-11-07 05:24:17.851000 0.21043392
"""
data_keys = ImaParam.data_keys
""" Key of the data
"""
_logger = logging.getLogger(__name__ + '.iterparam')
def __init__(self, t0, t1):
self.t0 = t0
self.t1 = t1
db = ImaParamDatabase.createDatabase() # Create a database for IMA extra files.
if len(db) == 0:
raise _ex.IrfpyException('Data file not found. Check the setup (e.g., .irfpyrc, or the data file)')
self.filename_list = db.get_filenames(self.t0, self.t1) # Return the file names to read
self.current_file_contents = None
self.current_file_position = -1
self.current_obstime = self.t0
def __iter__(self):
return self
def __next__(self):
while len(self.filename_list) > 0:
self._logger.debug('Remained files = {}'.format(len(self.filename_list)))
self._logger.debug('The target file = {}'.format(self.filename_list[0]))
# If current file contents are not loaded
if self.current_file_contents is None:
self._logger.debug('No file contents have been loaded. Start to load')
self.current_file_contents = getImaParamFile(filename=self.filename_list[0]).getImaParam()
self.current_file_position = -1
## HERE YOU SHOULD PUT if FAILED!
self._logger.debug('File contents read finished.')
# You proceed 1 data, and return.
self._logger.debug('Progress 1 data')
self.current_file_position += 1
try:
self._logger.debug('Get the time the loaded contents')
t = self.current_file_contents.obstime[self.current_file_position]
self._logger.debug('Finish time list: Result {}'.format(t))
if t <= self.current_obstime:
self._logger.debug('The obtainted time is not appropriate. Skip this')
continue
if t > self.t1:
self._logger.debug('The obtainted time is past the required range. Stop iteration')
raise StopIteration()
self._logger.debug('Start loading the IMA param file.')
dat = self.current_file_contents
self._logger.debug('Finished.')
self._logger.debug('Preparation of the returned data')
dat2 = {}
for k in self.data_keys:
dat2[k] = dat.data[k][self.current_file_position]
self._logger.debug('Finish preparation')
self.current_obstime = t
return t, dat2
except IndexError:
self._logger.debug('Whole the data is returned. Go next file')
self.filename_list.pop(0) # Remove the current file
self.current_file_contents = None
self.current_file_position = -1
raise StopIteration()
[docs]class iter_imaextra:
""" Iterate the IMA extra data.
Iterate the IMA extra data, as a tuple of ``(time, data)``.
Here data is an object of :class:`ImaExtra`.
>>> t0 = datetime.datetime(2010, 11, 7, 5, 15)
>>> t1 = datetime.datetime(2010, 11, 7, 5, 25)
>>> for t, d in iter_imaextra(t0, t1):
... print(t, d.getHeavySpec().shape, d.getHpSpec().max())
2010-11-07 05:17:53.821000 (16, 96, 16, 1) 42.640816
2010-11-07 05:21:05.789000 (16, 96, 16, 1) 63.204727
2010-11-07 05:24:17.852000 (16, 96, 16, 1) 57.680725
"""
data_keys = ImaExtra.data_keys
_logger = logging.getLogger(__name__ + '.iter_imaextra')
def __init__(self, t0, t1):
self.t0 = t0
self.t1 = t1
db = ImaExtraDatabase.createDatabase() # Create a database for IMA extra files.
if len(db) == 0:
raise _ex.IrfpyException('Data file not found. Check the setup (e.g., .irfpyrc, or the data file)')
self.filename_list = db.get_filenames(self.t0, self.t1) # Return the file names to read
self.current_file_contents = None
self.current_file_position = -1
self.current_obstime = self.t0
def __iter__(self):
return self
def __next__(self):
while len(self.filename_list) > 0:
self._logger.debug('Remained files = {}'.format(len(self.filename_list)))
self._logger.debug('The target file = {}'.format(self.filename_list[0]))
# If current file contents are not loaded
if self.current_file_contents is None:
self._logger.debug('No file contents have been loaded. Start to load')
self.current_file_contents = getImaExtraFile(filename=self.filename_list[0]).getImaExtra()
self.current_file_position = -1
## HERE YOU SHOULD PUT if FAILED!
self._logger.debug('File contents read finished.')
# You proceed 1 data, and return.
self._logger.debug('Progress 1 data')
self.current_file_position += 1
try:
self._logger.debug('Get the time the loaded contents')
t = self.current_file_contents.obstime[self.current_file_position]
self._logger.debug('Finish time list: Result {}'.format(t))
if t <= self.current_obstime:
self._logger.debug('The obtainted time is not appropriate. Skip this')
continue
if t > self.t1:
self._logger.debug('The obtainted time is past the required range. Stop iteration')
raise StopIteration()
self._logger.debug('Start loading the IMA extra file.')
dat = self.current_file_contents
import copy as _copy
dat2 = _copy.deepcopy(dat)
dat2.trim(t - datetime.timedelta(seconds=1), t + datetime.timedelta(seconds=1))
self._logger.debug('Finished.')
self.current_obstime = t
return t, dat2
except IndexError:
self._logger.debug('Whole the data is returned. Go next file')
self.filename_list.pop(0) # Remove the current file
self.current_file_contents = None
self.current_file_position = -1
raise StopIteration()