apps131026_ltsvtools.ltsvΒΆ

A commandline tool for ltsc

''' A commandline tool for ltsc
'''
import sys
from collections import OrderedDict

from optparse import OptionParser

import re

class Ltsv:
    ''' A simple parser.

    >>> from io import StringIO
    >>> s = StringIO()
    >>> s.write('a:100\tb:200\na:120\tb:220')
    >>> s.seek(0)
    >>> print(Ltsv(s).d)
    [{'a': '100', 'b': '200'}, {'a': '120', 'b': '220'}]
    '''

    def __init__(self, fp):

        self.d = self.parse(fp)
        self._allkey = None

        
    @staticmethod
    def parse(fp):
        d = []

        for line in fp:
            try:
                keyval = [e.split(':', 1) for e in line.strip().split('\t')]
                d.append(OrderedDict(keyval))
            except ValueError:
                pass

        return d

    def allkeys(self):
        ''' Obtain all the keys
        '''
        if not self._allkey is None:
            return self._allkey

### Fast, but order not conserved.
#        self._allkey = set()
#        for line in self.d:
#            self._allkey = self._allkey.union(set(line.keys()))
#

        self._allkey = []
        for line in self.d:
            for key in list(line.keys()):
                if not key in self._allkey:
                    self._allkey.append(key)

        return self._allkey

    def allkey_count(self):
        ''' Count the occurrence of the key
        '''
        keys = self.allkeys()
        count = OrderedDict((key, 0) for key in keys)
        for line in self.d:
            for k in list(line.keys()):
                count[k] += 1

        return count

    def printf(self, keys=None, nolabel=False):
        ''' Print a LTSV file.
        '''
        if keys is None:
            keys = slef.allkeys()

        for line in self.d:
            strlist = []
            for key in keys:
                try:
                    val = line[key]
                except KeyError:
                    val = '--N/A--'
                if nolabel:
                    strlist.append('%s' % val)
                else:
                    strlist.append('%s:%s' % (key, val))
            print('\t'.join(strlist))


def mainloop(lt, include_regexp_list, exclude_regexp_list, nolabel=False):

    if include_regexp_list == None:   # In this case, you have to choose everything.
        inckeys = lt.allkeys()
    else:
        inckeys = []
        allkeys = lt.allkeys()
        for key in allkeys:
            for include_regexp in include_regexp_list:
                if not re.search(include_regexp, key) is None:
                    inckeys.append(key)
                    continue
            
    if exclude_regexp_list == None:   # No exclusion
        pass
    else:
        allkeys = lt.allkeys()
        for exclude_regexp in exclude_regexp_list:
            for key in allkeys:
                if key in inckeys and not re.search(exclude_regexp, key) is None:
                    inckeys.remove(key)

    lt.printf(keys=inckeys, nolabel=nolabel)

def main():
    '''Main script'''

    usage = "%prog [options] [ltscfile]"

    parser = OptionParser(usage)

    parser.add_option('-k', '--show-keys',
            action='store_true', dest='showkeys',
            default=False,
            help='Show all the keys')

    parser.add_option('-c', '--show-key-counts',
            action='store_true', dest='showkeycounts',
            default=False,
            help='Show all the keys with statistics')

    parser.add_option('-e', '--regexp',
            action='append', dest='regexp', default=None,
            help='Use PATTERN as the key pattern to include')

    parser.add_option('-x', '--exclude',
            action='append', dest='ex_regexp', default=[],
            help='Use PATTERN as the key pattern to exclude')

    parser.add_option('-n', '--no-label',
            action='store_true', dest='nolabel', default=False,
            help='Do not print label.')


#    parser.add_option('-v', '--invert-match',
#            action='store_true', dest='invert',
#            default=False,
#            help='Invert the match')


    options, args = parser.parse_args()

    if len(args) == 0:
        fp = sys.stdin
    else:
        fp = open(args[0])

    lt = Ltsv(fp)

    if options.showkeys:
        for key in lt.allkeys():
            print(key)
    elif options.showkeycounts:
        keycnts = lt.allkey_count()
        for key in list(keycnts.keys()):
            print(key, keycnts[key])
    else:
        mainloop(lt, include_regexp_list=options.regexp, exclude_regexp_list=options.ex_regexp, nolabel=options.nolabel)
    

if __name__ == "__main__":
    main()