Source code for irfpy.util.tdict

''' This module provides an implementation of time series of data

.. codeauthor:: Yoshifumi Futaana

It provides an implementation of time series data handling.
This is similar to a dictionary with a key of ``datetime.datetime`` object,
but provides a rahter dedicated interface.

.. autosummary::

    TimeDict

.. note::

    If you want to use the time-float pair, you may also consider using
    :class:`irfpy.util.timeseries.ScalarSeries` class.

    If you want to use the time-(x, y, z) pair, you may also consider using
    :class:`irfpy.util.timeseries.VectorSeries` class.

    If you want to use the time-string pair, you may also consider using
    :mod:`irfpy.util.timeseriesdb` module, which uses similar
    algorithm to this module.
    The difference is that the  :mod:`pyana.util.timeseriesdb` module
    provide "bi-direction" key, which means that both time->string and
    time<-string reference can be possible.

.. todo::

    Write a use case.
'''

import datetime
import bisect
import logging

from irfpy.util import exception as ex


[docs]class DataNotInDbError(ex.PyanaError): def __init__(self, value): self.value = value def __str__(self): return repr(self.value)
[docs]class DuplicatedError(ex.PyanaError): def __init__(self, value): self.value = value def __str__(self): return repr(self.value)
[docs]class TimeDict(object): ''' Implementation of the timeseries database. ''' logger = logging.getLogger(__name__ + '.TimeDict') def __init__(self): self.dbdict = {} self._flush_sortedkeys() def _flush_sortedkeys(self): ''' For consistency, remove sorted key. ''' self.dbdict_sortedkey = None def _make_sortedkeys(self): ''' For performance sorted key is generated. For performance sorted key is generated only when sortedkey is None. If you want to re-generate sorted keys, first flush with :meth:`_flush_sortedkey`. ''' if self.dbdict_sortedkey is None: self.dbdict_sortedkey = sorted(self.dbdict.keys())
[docs] def append(self, starttime, data, dup='raise'): ''' Append the data into the database together with the start time. :param starttime: Start time (:class:`datetime.datetime` object) :param data: Data :keyword dup: If the start time is duplicated, what action is taken? *'raise'* will raise the exception (default). *'ignore'* will ignore the given data, and old data is kept. *'update'* will update the data, and old data is disregarded. >>> td = TimeDict() >>> td.append(datetime.datetime(2010, 3, 10, 15), {"a": 30, "b": 70}) >>> td.append(datetime.datetime(2010, 3, 11, 15), {"a": 20, "b": 90}) >>> td.append(datetime.datetime(2010, 3, 12, 15), {"a": 30, "b": 70}) ''' if starttime in self.dbdict: if dup == 'raise': raise DuplicatedError('Time %s exists in DB.' % (starttime)) elif dup == 'ignore': return elif dup == 'update': pass else: raise ex.IrfpyError('No option ``%s`` for ``dup`` keyword' % dup) self.dbdict[starttime] = data # If you added an entry, the stored sorted keys are flushed. self._flush_sortedkeys()
[docs] def getobstime(self): '''Return the list of observation time in the database. >>> td = TimeDict() >>> td.append(datetime.datetime(2010, 3, 10, 15), {"a": 30, "b": 70}) >>> td.append(datetime.datetime(2010, 3, 11, 15), {"a": 20, "b": 90}) >>> td.append(datetime.datetime(2010, 3, 12, 15), {"a": 30, "b": 70}) >>> print(td.getobstime()) [datetime.datetime(2010, 3, 10, 15, 0), datetime.datetime(2010, 3, 11, 15, 0), datetime.datetime(2010, 3, 12, 15, 0)] ''' self._make_sortedkeys() return self.dbdict_sortedkey
[docs] def keys(self): ''' Return the list of observation time in the database. :return: List of the observation time. This is just an alias to :meth:`getobstime`. ''' return self.getobstime()
[docs] def t0(self): ''' Return the start time of the first data. >>> td = TimeDict() >>> td.append(datetime.datetime(2010, 3, 10, 15), {"a": 30, "b": 70}) >>> td.append(datetime.datetime(2010, 3, 11, 15), {"a": 20, "b": 90}) >>> td.append(datetime.datetime(2010, 3, 12, 15), {"a": 30, "b": 70}) >>> print(td.t0()) 2010-03-10 15:00:00 ''' self._make_sortedkeys() return self.dbdict_sortedkey[0]
[docs] def t1(self): ''' Return the start time of the last data Note that this is the *start* time of the last data. The *end* time of the last data is not defined (so that the user should handle this in their script). >>> td = TimeDict() >>> td.append(datetime.datetime(2010, 3, 10, 15), {"a": 30, "b": 70}) >>> td.append(datetime.datetime(2010, 3, 11, 15), {"a": 20, "b": 90}) >>> td.append(datetime.datetime(2010, 3, 12, 15), {"a": 30, "b": 70}) >>> print(td.t1()) 2010-03-12 15:00:00 ''' self._make_sortedkeys() return self.dbdict_sortedkey[-1]
[docs] def get(self, t): ''' Return the data of time ``t`` and the start time. Return the data of time ``t`` and the start time. If the specified time is before the DB start time, :class:`DataNotInDbError` is raised. :param t: Time (``datetime.datetime``) :returns: (t0, data) 2004-01-05T15:30:00 contains in orb0001. >>> td = TimeDict() >>> td.append(datetime.datetime(2010, 3, 10, 15), ["a", 30, "b", 70]) >>> td.append(datetime.datetime(2010, 3, 11, 15), ["a", 20, "b", 90]) >>> td.append(datetime.datetime(2010, 3, 12, 15), ["a", 30, "b", 70]) >>> td.append(datetime.datetime(2010, 3, 13, 15), ["a", 20, "b", 90, "c", 50]) >>> t0 = datetime.datetime(2010, 3, 11, 23, 0) >>> print(td.get(t0)) (datetime.datetime(2010, 3, 11, 15, 0), ['a', 20, 'b', 90]) >>> t1 = datetime.datetime(2010, 3, 13, 0) >>> print(td.get(t1)) (datetime.datetime(2010, 3, 12, 15, 0), ['a', 30, 'b', 70]) 2004-01-05T00:00:00 is before the database. >>> t2 = datetime.datetime(2004, 1, 5, 0) >>> print(td.get(t2)) # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): tdict.DataNotInDbError: 'This is the first file in the DB (file0).' 2030-01-01T00:00:00 is, in a common sense, not included in this data base, but it returns the last data. Thus, it is a user's responsibility to validate the end time of the data. >>> t3 = datetime.datetime(2030, 1, 1) >>> print(td.get(t3)) (datetime.datetime(2010, 3, 13, 15, 0), ['a', 20, 'b', 90, 'c', 50]) ''' self._make_sortedkeys() keys = self.dbdict_sortedkey idx = bisect.bisect(keys, t) if idx == 0: raise DataNotInDbError("Time %s is before the DB time." % t) key = keys[idx - 1] return (key, self.dbdict[key])
[docs] def getbetween(self, tmin=None, tmax=None, strict=False): ''' Return the tuple the start time tuple and data tuple. :keyword tmin: Start. (``datetime.datetime``) :keyword tmax: End. (``datetime.datetime``) Inclusive. :keyword strict: If *True*, the strict mode is used. By default *False*. See :meth:`clipped` function for what is the strict mode. :returns: ((ti), (di)) where (ti) is the tuple of time and (di) is the tuple of data. >>> db = TimeDict() >>> db.append(datetime.datetime(2004, 1, 5, 15, 30), "A") >>> db.append(datetime.datetime(2004, 1, 5, 18, 30), "AB") >>> db.append(datetime.datetime(2004, 1, 6, 0, 30), "ABC") >>> db.append(datetime.datetime(2004, 1, 6, 9, 30), "D") >>> db.append(datetime.datetime(2004, 1, 6, 18, 30), "DE") >>> db.append(datetime.datetime(2004, 1, 7, 3, 30), "DEF") >>> db.append(datetime.datetime(2004, 1, 12, 11, 30), "D") >>> db.append(datetime.datetime(2004, 1, 12, 21, 30), "GH") >>> print(db.getbetween(datetime.datetime(2004, 1, 6), datetime.datetime(2004, 1, 7))) ((datetime.datetime(2004, 1, 5, 18, 30), datetime.datetime(2004, 1, 6, 0, 30), datetime.datetime(2004, 1, 6, 9, 30), datetime.datetime(2004, 1, 6, 18, 30)), ('AB', 'ABC', 'D', 'DE')) >>> print(db.getbetween(datetime.datetime(2004, 1, 6), datetime.datetime(2004, 1, 7), strict=True)) ((datetime.datetime(2004, 1, 6, 0, 30), datetime.datetime(2004, 1, 6, 9, 30), datetime.datetime(2004, 1, 6, 18, 30)), ('ABC', 'D', 'DE')) >>> print(db.getbetween(datetime.datetime(2004, 1, 1), datetime.datetime(2004, 1, 5, 18, 30))) ((datetime.datetime(2004, 1, 5, 15, 30), datetime.datetime(2004, 1, 5, 18, 30)), ('A', 'AB')) >>> t_all, d_all = db.getbetween() >>> print(len(t_all), len(d_all)) 8 8 ''' # self._make_sortedkeys() # keys = self.dbdict_sortedkey # if tmin > tmax: # raise ValueError('t0 must be earlier than t1.') # idx = bisect.bisect(keys, tmin) - 1 # if idx == -1: # idx = 0 # idx2 = bisect.bisect(keys, tmax) - 1 # if idx2 == -1: # raise DataNotInDbError('Tmax is before the start of the dataset') # if tmin is None: tmin = self.t0() if tmax is None: tmax = self.t1() clipped_dict = self.clipped(tmin, tmax, strict=strict) tlist = tuple(clipped_dict.getobstime()) dlist = tuple([clipped_dict.dbdict[t] for t in tlist]) return (tlist, dlist)
[docs] def clipped(self, tstart, tend, strict=False): ''' Return the clipped object. :param tstart: Start. (``datetime.datetime``) :param tsend: End. (``datetime.datetime``) Inclusive. :param strict: Enable/Disable strict mode. By default, *False*. :returns: :class:`TimeDict` object that is clipped by tmin and tmax. A newly generated, clipped :class:`TimeDict` is returned. *Strict mode* The strict mode considers the time in the original TimeDict as a point. The default (non-strict) mode considers the time in the original TimeDict as a start time of the interval. >>> db = TimeDict() >>> db.append(datetime.datetime(2004, 1, 5, 15, 30), "A") >>> db.append(datetime.datetime(2004, 1, 5, 18, 30), "AB") >>> db.append(datetime.datetime(2004, 1, 6, 0, 30), "ABC") >>> db.append(datetime.datetime(2004, 1, 6, 9, 30), "D") >>> db.append(datetime.datetime(2004, 1, 6, 18, 30), "DE") >>> db.append(datetime.datetime(2004, 1, 7, 3, 30), "DEF") >>> db.append(datetime.datetime(2004, 1, 12, 11, 30), "D") >>> db.append(datetime.datetime(2004, 1, 12, 21, 30), "GH") Let's try to clip the data. >>> db_clipped = db.clipped(datetime.datetime(2004, 1, 6), datetime.datetime(2004, 1, 7)) >>> print(len(db_clipped)) 4 >>> print(db_clipped[0]) (datetime.datetime(2004, 1, 5, 18, 30), 'AB') >>> print(db_clipped[-1]) (datetime.datetime(2004, 1, 6, 18, 30), 'DE') Note that the first element is the data in Jan 5th, not Jan 6th. This feature is because the data of Jan 6th 0:00:00 may be contained in the dataset starting from Jan 5th 18:30. >>> db_clipped2 = db.clipped(datetime.datetime(2012, 10, 5), datetime.datetime(2012, 11, 5), strict=True) >>> print(len(db_clipped2)) 0 >>> db_clipped2 = db.clipped(datetime.datetime(2012, 10, 5), datetime.datetime(2012, 11, 5), strict=False) >>> print(len(db_clipped2)) 1 >>> db_clipped2 = db.clipped(datetime.datetime(1912, 10, 5), datetime.datetime(1912, 11, 5), strict=True) >>> print(len(db_clipped2)) 0 >>> db_clipped2 = db.clipped(datetime.datetime(1912, 10, 5), datetime.datetime(1912, 11, 5), strict=False) >>> print(len(db_clipped2)) 0 ''' if tstart > tend: raise ValueError('t0 must be earlier than t1.') db = self.__class__() if len(self) == 0: return db self._make_sortedkeys() sorted_time = self.dbdict_sortedkey ### If the tend is before the start of original data, zero-sized db is returned (regardless of the strict mode) if tend < sorted_time[0]: return db ### If the tmin is after the last data, zero-sized db is returned, if the strict mode is on. if strict and tstart > sorted_time[-1]: return db idx = bisect.bisect(sorted_time, tstart) if not strict: idx = idx - 1 # Rewind if non-strict mode if idx == -1: idx = 0 idx2 = bisect.bisect(sorted_time, tend) - 1 tlist = sorted_time[idx:idx2 + 1] for t in tlist: d = self.dbdict[t] db.append(t, d) return db
def __len__(self): ''' Return the number of entries of the data base. >>> db = TimeDict() >>> db.append(datetime.datetime(2004, 1, 5, 15, 30), 50) >>> db.append(datetime.datetime(2004, 1, 5, 18, 30), 25.9) >>> print(len(db)) 2 ''' return len(self.dbdict) def __getitem__(self, index): ''' >>> db = TimeDict() >>> db.append(datetime.datetime(2004, 1, 5, 18, 30), "AB") >>> db.append(datetime.datetime(2004, 1, 6, 0, 30), "ABC") >>> db.append(datetime.datetime(2004, 1, 6, 9, 30), "D") >>> db.append(datetime.datetime(2004, 1, 6, 18, 30), "DE") >>> db.append(datetime.datetime(2004, 1, 7, 3, 30), "DEF") >>> db.append(datetime.datetime(2004, 1, 12, 11, 30), "D") >>> db.append(datetime.datetime(2004, 1, 12, 21, 30), "GH") >>> print(len(db)) 7 >>> print(db[0]) (datetime.datetime(2004, 1, 5, 18, 30), 'AB') >>> print(db[3:-2]) [(datetime.datetime(2004, 1, 6, 18, 30), 'DE'), (datetime.datetime(2004, 1, 7, 3, 30), 'DEF')] ''' tlist = self.getobstime() tlist = tlist[index] if isinstance(tlist, datetime.datetime): return self.get(tlist) vals = [] for t in tlist: vals.append(self.get(t)) return vals
[docs] def clear(self): self.dbdict = {} self.invdict = {} self._flush_sortedkeys()
def __add__(self, other): # v = TimeDict() v = self.__class__() elem0 = self[:] for e in elem0: v.append(e[0], e[1]) elem1 = other[:] for e in elem1: v.append(e[0], e[1]) return v def __str__(self): tl = self.getobstime() if len(tl) == 0: return '<%s:len=0>' % self.__class__.__name__ return '<%s:len=%d from %s to %s>' % ( self.__class__.__name__, len(self), tl[0], tl[-1] )
import unittest import doctest
[docs]def doctests(): return unittest.TestSuite(( doctest.DocTestSuite(), ))
if __name__ == '__main__': unittest.main(defaultTest='doctests')