Source code for irfpy.util.intermfile

""" Intermediate file handling.

.. codeauthor:: Futaana

It handles the intermediate file.
Here intermediate file means any data file that stores the results
of any calculation or processing.

Example use cases:

    - You read a hugedata set but needed only a part of it in the next processing.
      Therefore, the needed part should be saved into a separated file in my
      local harddisk.
    - (More will come)


Sample usage:

1. Using :func:`intermfile` function:

.. code-block:: python

    from irfpy.util.intermfile import intermfile

    def my_function(a, b, c=0, d=0):
        return (a + b, c * d)

    dat = intermfile('datfile.pickle', my_function, args=[5, 10], kwds={'c': 3, 'd': 7})


2. Using decorator, :func:`interm`:

.. code-block:: python

    from irfpy.util.intermfile import interm

    @interm('datafile.pickle')
    def my_function(a, b, c=0, d=0):
        return (a + b, c * d)

    my_function(5, 10, c=3, d=7)


In either cases, the call my_function returns (15, 21) always.
The value is obtaine from running my_function() if the 'datafile.pickle' is not found,
or obained from the intermediate file 'datafile.pickle'.

Which to choose? Function (option 1) is flexible but less intuitive.  Decorator (option 2) is easy but less flexible.


.. note::

    - Good point of using :func:`interm` decorator is the simple syntax for users.
      The call is trivial.
      However, the decorator is less flexible, because the meta data (intermediate filename,
      version, or expiration time) should be defined at compile of function, not runtime.

    - Good point of using :func:`intermfile` function is flexibility.
      User can determine the intermediate file name at runtime, indicating that the user can
      dump multiple intermediate files from the same function (e.g. using different parameters).
      The weak point is that the syntax is less trivial, and the function's argumenets or
      keyword should be provided in a non-standard way.


.. note::

    Developer note:

    It is inspired by :mod:`irfpy.util.filepairv`.

    Difference is that

        - This module is for more generic purpose.
        - :mod:`irfpy.util.filepairv` is only for reading and caching file.

    Which to recommend to user? As of 2016-11-15, the :mod:`irfpy.util.filepairv` is more tested and
    used so that it is more stable.  This module may replace the
    :mod:`irfpy.util.filepairv` module in the future.
"""
import os
import pickle
import gzip
import bz2
import time
import logging
_logger = logging.getLogger(__name__)

from collections import namedtuple

import numpy as np

def _data_from_function(processing_function, args=None, kwds=None):
    if args is None: args = []
    if kwds is None: kwds = {}
    return processing_function(*args, **kwds)    


IntermFileMetaData = namedtuple('IntermFileMetaData', ['version', 'creation_time'])
IntermFileMetaData.__doc__ = """ Class to store the intermediate file's meta data.

:param version: The version number
:param creation_time: Creation time
"""


class _IntermFileReader:
    """ Represent reading intermediate file.
    """
    def __init__(self, intermediate_filename):
        if intermediate_filename.endswith('.gz'):
            fp = gzip.open(intermediate_filename, 'rb')
        elif intermediate_filename.endswith('.bz2'):
            fp = bz2.open(intermediate_filename, 'rb')
        else:
            fp = open(intermediate_filename, 'rb')

        self._meta = pickle.load(fp)
        self._dat = None       # Data is not read in this stage. You should call dat() method.
        self._fp = fp

    def dat(self):
        if self._dat is None:
            self._dat = pickle.load(self._fp)
            self._fp.close()
        return self._dat
    
    def version(self):
        return self._meta.version

    def is_expired(self, expire):
        now = time.time()
        pickle_mtime = self._meta.creation_time
        return (now - pickle_mtime > expire)


def _pickle_to(intermediate_filename, meta, dat, compresslevel=9):
    if intermediate_filename.endswith('.gz'):
        fp = gzip.open(intermediate_filename, 'wb', compresslevel=compresslevel)
    elif intermediate_filename.endswith('.bz2'):
        fp = bz2.open(intermediate_filename, 'wb', compresslevel=compresslevel)
    else:
        fp = open(intermediate_filename, 'wb')
    pickle.dump(meta, fp)
    pickle.dump(dat, fp)
    fp.close()


[docs]def intermfile(intermediate_filename, processing_function, args=None, kwds=None, version=0, refresh=False, expire=np.inf, compresslevel=9): """ Process the data, and save to the given file, or read from the file. :param intermediate_filename: Intermediate file name. :param processing_function: A function that should be called. :keyword args: Argument to be given to processing function. :type args: None or ``list`` :keyword kwds: Keyword to be given to processing function. :type kwds: None or ``dict`` :keyword version: Version number of the intermediate data file. If the version number in the data file is different from the given version, reprocessed. :keyword refresh: If set to *True*, reprocessed. :keyword expire: After the given value (in seconds), the intermediate file will be invalid. :keyword compresslevel: Gzip / Bzip2 compression level. It is only valid if the intermediate file name ends with ".gz" or ".bz2". :returns: The resulting object, either the object read from the ``intermediate_filename`` or the results of ``processing_function`` The call .. code-block:: python from irfpy.util.intermfile import intermfile dat = intermfile('datfile.pickle', my_function) is equivalent to .. code-block:: python import pickle if os.path.exists('datafile.pickle'): with open('datafile.pickle') as fp: dat = pickle.load(fp) else: dat = my_function() with open('datafile.pickle', 'wb') as fp: pickle.dump(dat, fp) """ ### Data is read from pickle file. if (not refresh) and os.path.exists(intermediate_filename): _logger.info('... Loading from the intermediate file: {}'.format(intermediate_filename)) try: freader = _IntermFileReader(intermediate_filename) # Metadata is read if freader.version() == version and (not freader.is_expired(expire)): _logger.info('... Intermediate file is valid. Read from the file ``{}``.'.format(intermediate_filename)) dat = freader.dat() _logger.info('... Done.') return dat # If successfully loaded, the data will be returned. else: _logger.info('... Intermediate file ``{}`` has been expired.'.format(intermediate_filename)) except KeyboardInterrupt as e: _logger.warning('User stopped the reading file.') raise except Exception as e: _logger.warning('') _logger.warning('Read from intermediate file ``{}`` failed.'.format(intermediate_filename)) _logger.warning('Reason: {}'.format(str(e))) _logger.warning('Please do not worry. Re-processing will be soon initiated') _logger.warning('') ### Data is read from original function _logger.info('... (Re-)processing data') dat = _data_from_function(processing_function, args, kwds) meta = IntermFileMetaData(version=version, creation_time=time.time()) try: _logger.info('... Intermediate file ``{}`` is producing.'.format(intermediate_filename)) _pickle_to(intermediate_filename, meta, dat, compresslevel=compresslevel) _logger.info('... Done') except Exception as e: _logger.warning('') _logger.warning('Write to intermediate file ``{}`` failed.'.format(intermediate_filename)) _logger.warning('Reason: {}'.format(str(e))) _logger.warning('Please do not worry. The intermediate file was not produced, but the data is returned') _logger.warning('') if os.path.exists(intermediate_filename): try: os.remove(intermediate_filename) except: pass return dat
[docs]def interm(filename, version=0, expire=np.inf, compresslevel=9): """ Decorator version of intermediat file. :param filename: The name of the intermediate file. :keyword version: Specify the version of the program. If the given version is different from the version in the intermediate file (``filename``), the function is re-run. :keyword expire: Expiration time in seconds. :keyword compresslevel: For .gz or .bz2, compression level is settled. The :meth:`interm` is the decoration version of :meth:`intermfile`. Assume you have a function ``user_function``, returning some of the data. .. code-block:: python def user_function(a, b, c=0, d=0): return (a + b, c * d) This function can be decorated such as .. code-block:: python from irfpy.util.intermfile import interm @interm('user_function.dat', version=1, expire=86400) def user_function(a, b, c=0, d=0): return (a + b, c * d) Then, the ``user_function()`` return the tuple, but in addition, dump the pickle file of the returned value to ``user_function.dat`` for the first call. .. code-block:: python > user_function(1, 5, c=10, d=40) # => (6, 400) .. code-block:: sh % ls ... user_function.dat ... The second call of the ``user_function`` then rely on the pickle file. .. code-block:: python > user_function(1, 5, c=10, d=40) # => (6, 400) Indeed, this tuple is read from the pickle file. It means that changing the argument/keyword will not reflect the results as long as ``user_function.dat`` file exists. .. code-block:: python > user_function(2, 7, c=1, d=4) # => (6, 400) You may expect to get ``(9, 4)`` for this call, however, it is not. It is because, again, the data is read from the intermediate file. """ def _interm(func): import functools @functools.wraps(func) def wrapper(*args, **kwds): if os.path.exists(filename): _logger.info('... Loading from the intermediate file: {}'.format(filename)) try: freader = _IntermFileReader(filename) # Metadata is read if freader.version() == version and (not freader.is_expired(expire)): _logger.info('... Intermediate file is valid. Read from the file ``{}``.'.format(filename)) dat = freader.dat() _logger.info('... Done.') return dat else: _logger.info('... Intermediate file ``{}`` has been expired.'.format(filename)) _logger.info('... version in file = {} version given = {}'.format(freader.version(), version)) _logger.info('... expiration is {}'.format(freader.is_expired(expire))) except KeyboardInterrupt as e: _logger.warn('User stopped the reading file.') raise except Exception as e: _logger.warning('') _logger.warning('Read from intermediate file ``{}`` failed.'.format(filename)) _logger.warning('Reason: {}'.format(str(e))) _logger.warning('Please do not worry. Re-processing will be soon initiated') _logger.warning('') ### Data is read from original function _logger.info('... (Re-)processing data') dat = _data_from_function(func, args, kwds) meta = IntermFileMetaData(version=version, creation_time=time.time()) try: _logger.info('... Intermediate file ``{}`` is producing.'.format(filename)) _pickle_to(filename, meta, dat, compresslevel=compresslevel) _logger.info('... Done') except Exception as e: _logger.warning('') _logger.warning('Write to intermediate file ``{}`` failed.'.format(filename)) _logger.warning('Reason: {}'.format(str(e))) _logger.warning('Please do not worry. The intermediate file was not produced, but the data is returned') _logger.warning('') if os.path.exists(filename): try: os.remove(filename) except: pass return dat return wrapper return _interm