''' This module provides an implementation of time series of data
.. codeauthor:: Yoshifumi Futaana
It provides an implementation of time series data handling.
This is similar to a dictionary with a key of ``datetime.datetime`` object,
but provides a rahter dedicated interface.
.. autosummary::
TimeDict
.. note::
If you want to use the time-float pair, you may also consider using
:class:`irfpy.util.timeseries.ScalarSeries` class.
If you want to use the time-(x, y, z) pair, you may also consider using
:class:`irfpy.util.timeseries.VectorSeries` class.
If you want to use the time-string pair, you may also consider using
:mod:`irfpy.util.timeseriesdb` module, which uses similar
algorithm to this module.
The difference is that the :mod:`pyana.util.timeseriesdb` module
provide "bi-direction" key, which means that both time->string and
time<-string reference can be possible.
.. todo::
Write a use case.
'''
import datetime
import bisect
import logging
from irfpy.util import exception as ex
[docs]class DataNotInDbError(ex.PyanaError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
[docs]class DuplicatedError(ex.PyanaError):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
[docs]class TimeDict(object):
''' Implementation of the timeseries database.
'''
logger = logging.getLogger(__name__ + '.TimeDict')
def __init__(self):
self.dbdict = {}
self._flush_sortedkeys()
def _flush_sortedkeys(self):
''' For consistency, remove sorted key.
'''
self.dbdict_sortedkey = None
def _make_sortedkeys(self):
''' For performance sorted key is generated.
For performance sorted key is generated only
when sortedkey is None. If you want to re-generate sorted keys,
first flush with :meth:`_flush_sortedkey`.
'''
if self.dbdict_sortedkey is None:
self.dbdict_sortedkey = sorted(self.dbdict.keys())
[docs] def append(self, starttime, data, dup='raise'):
''' Append the data into the database together with the start time.
:param starttime: Start time (:class:`datetime.datetime` object)
:param data: Data
:keyword dup: If the start time is duplicated, what action is taken?
*'raise'* will raise the exception (default).
*'ignore'* will ignore the given data, and old data is kept.
*'update'* will update the data, and old data is disregarded.
>>> td = TimeDict()
>>> td.append(datetime.datetime(2010, 3, 10, 15), {"a": 30, "b": 70})
>>> td.append(datetime.datetime(2010, 3, 11, 15), {"a": 20, "b": 90})
>>> td.append(datetime.datetime(2010, 3, 12, 15), {"a": 30, "b": 70})
'''
if starttime in self.dbdict:
if dup == 'raise':
raise DuplicatedError('Time %s exists in DB.' % (starttime))
elif dup == 'ignore':
return
elif dup == 'update':
pass
else:
raise ex.IrfpyError('No option ``%s`` for ``dup`` keyword' % dup)
self.dbdict[starttime] = data
# If you added an entry, the stored sorted keys are flushed.
self._flush_sortedkeys()
[docs] def getobstime(self):
'''Return the list of observation time in the database.
>>> td = TimeDict()
>>> td.append(datetime.datetime(2010, 3, 10, 15), {"a": 30, "b": 70})
>>> td.append(datetime.datetime(2010, 3, 11, 15), {"a": 20, "b": 90})
>>> td.append(datetime.datetime(2010, 3, 12, 15), {"a": 30, "b": 70})
>>> print(td.getobstime())
[datetime.datetime(2010, 3, 10, 15, 0), datetime.datetime(2010, 3, 11, 15, 0), datetime.datetime(2010, 3, 12, 15, 0)]
'''
self._make_sortedkeys()
return self.dbdict_sortedkey
[docs] def keys(self):
''' Return the list of observation time in the database.
:return: List of the observation time.
This is just an alias to :meth:`getobstime`.
'''
return self.getobstime()
[docs] def t0(self):
''' Return the start time of the first data.
>>> td = TimeDict()
>>> td.append(datetime.datetime(2010, 3, 10, 15), {"a": 30, "b": 70})
>>> td.append(datetime.datetime(2010, 3, 11, 15), {"a": 20, "b": 90})
>>> td.append(datetime.datetime(2010, 3, 12, 15), {"a": 30, "b": 70})
>>> print(td.t0())
2010-03-10 15:00:00
'''
self._make_sortedkeys()
return self.dbdict_sortedkey[0]
[docs] def t1(self):
''' Return the start time of the last data
Note that this is the *start* time of the last data.
The *end* time of the last data is not defined (so that the user should handle this in their script).
>>> td = TimeDict()
>>> td.append(datetime.datetime(2010, 3, 10, 15), {"a": 30, "b": 70})
>>> td.append(datetime.datetime(2010, 3, 11, 15), {"a": 20, "b": 90})
>>> td.append(datetime.datetime(2010, 3, 12, 15), {"a": 30, "b": 70})
>>> print(td.t1())
2010-03-12 15:00:00
'''
self._make_sortedkeys()
return self.dbdict_sortedkey[-1]
[docs] def get(self, t):
''' Return the data of time ``t`` and the start time.
Return the data of time ``t`` and the start time.
If the specified time is before the DB start time,
:class:`DataNotInDbError` is raised.
:param t: Time (``datetime.datetime``)
:returns: (t0, data)
2004-01-05T15:30:00 contains in orb0001.
>>> td = TimeDict()
>>> td.append(datetime.datetime(2010, 3, 10, 15), ["a", 30, "b", 70])
>>> td.append(datetime.datetime(2010, 3, 11, 15), ["a", 20, "b", 90])
>>> td.append(datetime.datetime(2010, 3, 12, 15), ["a", 30, "b", 70])
>>> td.append(datetime.datetime(2010, 3, 13, 15), ["a", 20, "b", 90, "c", 50])
>>> t0 = datetime.datetime(2010, 3, 11, 23, 0)
>>> print(td.get(t0))
(datetime.datetime(2010, 3, 11, 15, 0), ['a', 20, 'b', 90])
>>> t1 = datetime.datetime(2010, 3, 13, 0)
>>> print(td.get(t1))
(datetime.datetime(2010, 3, 12, 15, 0), ['a', 30, 'b', 70])
2004-01-05T00:00:00 is before the database.
>>> t2 = datetime.datetime(2004, 1, 5, 0)
>>> print(td.get(t2)) # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
tdict.DataNotInDbError: 'This is the first file in the DB (file0).'
2030-01-01T00:00:00 is, in a common sense, not included
in this data base, but it returns the last data.
Thus, it is a user's responsibility to validate the
end time of the data.
>>> t3 = datetime.datetime(2030, 1, 1)
>>> print(td.get(t3))
(datetime.datetime(2010, 3, 13, 15, 0), ['a', 20, 'b', 90, 'c', 50])
'''
self._make_sortedkeys()
keys = self.dbdict_sortedkey
idx = bisect.bisect(keys, t)
if idx == 0:
raise DataNotInDbError("Time %s is before the DB time." % t)
key = keys[idx - 1]
return (key, self.dbdict[key])
[docs] def getbetween(self, tmin=None, tmax=None, strict=False):
''' Return the tuple the start time tuple and data tuple.
:keyword tmin: Start. (``datetime.datetime``)
:keyword tmax: End. (``datetime.datetime``) Inclusive.
:keyword strict: If *True*, the strict mode is used. By default *False*. See :meth:`clipped` function for
what is the strict mode.
:returns: ((ti), (di)) where (ti) is the tuple of time
and (di) is the tuple of data.
>>> db = TimeDict()
>>> db.append(datetime.datetime(2004, 1, 5, 15, 30), "A")
>>> db.append(datetime.datetime(2004, 1, 5, 18, 30), "AB")
>>> db.append(datetime.datetime(2004, 1, 6, 0, 30), "ABC")
>>> db.append(datetime.datetime(2004, 1, 6, 9, 30), "D")
>>> db.append(datetime.datetime(2004, 1, 6, 18, 30), "DE")
>>> db.append(datetime.datetime(2004, 1, 7, 3, 30), "DEF")
>>> db.append(datetime.datetime(2004, 1, 12, 11, 30), "D")
>>> db.append(datetime.datetime(2004, 1, 12, 21, 30), "GH")
>>> print(db.getbetween(datetime.datetime(2004, 1, 6), datetime.datetime(2004, 1, 7)))
((datetime.datetime(2004, 1, 5, 18, 30), datetime.datetime(2004, 1, 6, 0, 30), datetime.datetime(2004, 1, 6, 9, 30), datetime.datetime(2004, 1, 6, 18, 30)), ('AB', 'ABC', 'D', 'DE'))
>>> print(db.getbetween(datetime.datetime(2004, 1, 6), datetime.datetime(2004, 1, 7), strict=True))
((datetime.datetime(2004, 1, 6, 0, 30), datetime.datetime(2004, 1, 6, 9, 30), datetime.datetime(2004, 1, 6, 18, 30)), ('ABC', 'D', 'DE'))
>>> print(db.getbetween(datetime.datetime(2004, 1, 1), datetime.datetime(2004, 1, 5, 18, 30)))
((datetime.datetime(2004, 1, 5, 15, 30), datetime.datetime(2004, 1, 5, 18, 30)), ('A', 'AB'))
>>> t_all, d_all = db.getbetween()
>>> print(len(t_all), len(d_all))
8 8
'''
# self._make_sortedkeys()
# keys = self.dbdict_sortedkey
# if tmin > tmax:
# raise ValueError('t0 must be earlier than t1.')
# idx = bisect.bisect(keys, tmin) - 1
# if idx == -1:
# idx = 0
# idx2 = bisect.bisect(keys, tmax) - 1
# if idx2 == -1:
# raise DataNotInDbError('Tmax is before the start of the dataset')
#
if tmin is None:
tmin = self.t0()
if tmax is None:
tmax = self.t1()
clipped_dict = self.clipped(tmin, tmax, strict=strict)
tlist = tuple(clipped_dict.getobstime())
dlist = tuple([clipped_dict.dbdict[t] for t in tlist])
return (tlist, dlist)
[docs] def clipped(self, tstart, tend, strict=False):
''' Return the clipped object.
:param tstart: Start. (``datetime.datetime``)
:param tsend: End. (``datetime.datetime``) Inclusive.
:param strict: Enable/Disable strict mode. By default, *False*.
:returns: :class:`TimeDict` object that is clipped by tmin and tmax.
A newly generated, clipped :class:`TimeDict` is returned.
*Strict mode*
The strict mode considers the time in the original TimeDict as a point.
The default (non-strict) mode considers the time in the original TimeDict as a start time of the interval.
>>> db = TimeDict()
>>> db.append(datetime.datetime(2004, 1, 5, 15, 30), "A")
>>> db.append(datetime.datetime(2004, 1, 5, 18, 30), "AB")
>>> db.append(datetime.datetime(2004, 1, 6, 0, 30), "ABC")
>>> db.append(datetime.datetime(2004, 1, 6, 9, 30), "D")
>>> db.append(datetime.datetime(2004, 1, 6, 18, 30), "DE")
>>> db.append(datetime.datetime(2004, 1, 7, 3, 30), "DEF")
>>> db.append(datetime.datetime(2004, 1, 12, 11, 30), "D")
>>> db.append(datetime.datetime(2004, 1, 12, 21, 30), "GH")
Let's try to clip the data.
>>> db_clipped = db.clipped(datetime.datetime(2004, 1, 6), datetime.datetime(2004, 1, 7))
>>> print(len(db_clipped))
4
>>> print(db_clipped[0])
(datetime.datetime(2004, 1, 5, 18, 30), 'AB')
>>> print(db_clipped[-1])
(datetime.datetime(2004, 1, 6, 18, 30), 'DE')
Note that the first element is the data in Jan 5th, not Jan 6th.
This feature is because the data of Jan 6th 0:00:00 may be contained
in the dataset starting from Jan 5th 18:30.
>>> db_clipped2 = db.clipped(datetime.datetime(2012, 10, 5), datetime.datetime(2012, 11, 5), strict=True)
>>> print(len(db_clipped2))
0
>>> db_clipped2 = db.clipped(datetime.datetime(2012, 10, 5), datetime.datetime(2012, 11, 5), strict=False)
>>> print(len(db_clipped2))
1
>>> db_clipped2 = db.clipped(datetime.datetime(1912, 10, 5), datetime.datetime(1912, 11, 5), strict=True)
>>> print(len(db_clipped2))
0
>>> db_clipped2 = db.clipped(datetime.datetime(1912, 10, 5), datetime.datetime(1912, 11, 5), strict=False)
>>> print(len(db_clipped2))
0
'''
if tstart > tend:
raise ValueError('t0 must be earlier than t1.')
db = self.__class__()
if len(self) == 0:
return db
self._make_sortedkeys()
sorted_time = self.dbdict_sortedkey
### If the tend is before the start of original data, zero-sized db is returned (regardless of the strict mode)
if tend < sorted_time[0]:
return db
### If the tmin is after the last data, zero-sized db is returned, if the strict mode is on.
if strict and tstart > sorted_time[-1]:
return db
idx = bisect.bisect(sorted_time, tstart)
if not strict:
idx = idx - 1 # Rewind if non-strict mode
if idx == -1:
idx = 0
idx2 = bisect.bisect(sorted_time, tend) - 1
tlist = sorted_time[idx:idx2 + 1]
for t in tlist:
d = self.dbdict[t]
db.append(t, d)
return db
def __len__(self):
''' Return the number of entries of the data base.
>>> db = TimeDict()
>>> db.append(datetime.datetime(2004, 1, 5, 15, 30), 50)
>>> db.append(datetime.datetime(2004, 1, 5, 18, 30), 25.9)
>>> print(len(db))
2
'''
return len(self.dbdict)
def __getitem__(self, index):
'''
>>> db = TimeDict()
>>> db.append(datetime.datetime(2004, 1, 5, 18, 30), "AB")
>>> db.append(datetime.datetime(2004, 1, 6, 0, 30), "ABC")
>>> db.append(datetime.datetime(2004, 1, 6, 9, 30), "D")
>>> db.append(datetime.datetime(2004, 1, 6, 18, 30), "DE")
>>> db.append(datetime.datetime(2004, 1, 7, 3, 30), "DEF")
>>> db.append(datetime.datetime(2004, 1, 12, 11, 30), "D")
>>> db.append(datetime.datetime(2004, 1, 12, 21, 30), "GH")
>>> print(len(db))
7
>>> print(db[0])
(datetime.datetime(2004, 1, 5, 18, 30), 'AB')
>>> print(db[3:-2])
[(datetime.datetime(2004, 1, 6, 18, 30), 'DE'), (datetime.datetime(2004, 1, 7, 3, 30), 'DEF')]
'''
tlist = self.getobstime()
tlist = tlist[index]
if isinstance(tlist, datetime.datetime):
return self.get(tlist)
vals = []
for t in tlist:
vals.append(self.get(t))
return vals
[docs] def clear(self):
self.dbdict = {}
self.invdict = {}
self._flush_sortedkeys()
def __add__(self, other):
# v = TimeDict()
v = self.__class__()
elem0 = self[:]
for e in elem0:
v.append(e[0], e[1])
elem1 = other[:]
for e in elem1:
v.append(e[0], e[1])
return v
def __str__(self):
tl = self.getobstime()
if len(tl) == 0:
return '<%s:len=0>' % self.__class__.__name__
return '<%s:len=%d from %s to %s>' % (
self.__class__.__name__,
len(self),
tl[0],
tl[-1]
)
import unittest
import doctest
[docs]def doctests():
return unittest.TestSuite((
doctest.DocTestSuite(),
))
if __name__ == '__main__':
unittest.main(defaultTest='doctests')