apps131026_ltsvtools.ltsv
¶
A commandline tool for ltsc
''' A commandline tool for ltsc
'''
import sys
from collections import OrderedDict
from optparse import OptionParser
import re
class Ltsv:
''' A simple parser.
>>> from io import StringIO
>>> s = StringIO()
>>> s.write('a:100\tb:200\na:120\tb:220')
>>> s.seek(0)
>>> print(Ltsv(s).d)
[{'a': '100', 'b': '200'}, {'a': '120', 'b': '220'}]
'''
def __init__(self, fp):
self.d = self.parse(fp)
self._allkey = None
@staticmethod
def parse(fp):
d = []
for line in fp:
try:
keyval = [e.split(':', 1) for e in line.strip().split('\t')]
d.append(OrderedDict(keyval))
except ValueError:
pass
return d
def allkeys(self):
''' Obtain all the keys
'''
if not self._allkey is None:
return self._allkey
### Fast, but order not conserved.
# self._allkey = set()
# for line in self.d:
# self._allkey = self._allkey.union(set(line.keys()))
#
self._allkey = []
for line in self.d:
for key in list(line.keys()):
if not key in self._allkey:
self._allkey.append(key)
return self._allkey
def allkey_count(self):
''' Count the occurrence of the key
'''
keys = self.allkeys()
count = OrderedDict((key, 0) for key in keys)
for line in self.d:
for k in list(line.keys()):
count[k] += 1
return count
def printf(self, keys=None, nolabel=False):
''' Print a LTSV file.
'''
if keys is None:
keys = slef.allkeys()
for line in self.d:
strlist = []
for key in keys:
try:
val = line[key]
except KeyError:
val = '--N/A--'
if nolabel:
strlist.append('%s' % val)
else:
strlist.append('%s:%s' % (key, val))
print('\t'.join(strlist))
def mainloop(lt, include_regexp_list, exclude_regexp_list, nolabel=False):
if include_regexp_list == None: # In this case, you have to choose everything.
inckeys = lt.allkeys()
else:
inckeys = []
allkeys = lt.allkeys()
for key in allkeys:
for include_regexp in include_regexp_list:
if not re.search(include_regexp, key) is None:
inckeys.append(key)
continue
if exclude_regexp_list == None: # No exclusion
pass
else:
allkeys = lt.allkeys()
for exclude_regexp in exclude_regexp_list:
for key in allkeys:
if key in inckeys and not re.search(exclude_regexp, key) is None:
inckeys.remove(key)
lt.printf(keys=inckeys, nolabel=nolabel)
def main():
'''Main script'''
usage = "%prog [options] [ltscfile]"
parser = OptionParser(usage)
parser.add_option('-k', '--show-keys',
action='store_true', dest='showkeys',
default=False,
help='Show all the keys')
parser.add_option('-c', '--show-key-counts',
action='store_true', dest='showkeycounts',
default=False,
help='Show all the keys with statistics')
parser.add_option('-e', '--regexp',
action='append', dest='regexp', default=None,
help='Use PATTERN as the key pattern to include')
parser.add_option('-x', '--exclude',
action='append', dest='ex_regexp', default=[],
help='Use PATTERN as the key pattern to exclude')
parser.add_option('-n', '--no-label',
action='store_true', dest='nolabel', default=False,
help='Do not print label.')
# parser.add_option('-v', '--invert-match',
# action='store_true', dest='invert',
# default=False,
# help='Invert the match')
options, args = parser.parse_args()
if len(args) == 0:
fp = sys.stdin
else:
fp = open(args[0])
lt = Ltsv(fp)
if options.showkeys:
for key in lt.allkeys():
print(key)
elif options.showkeycounts:
keycnts = lt.allkey_count()
for key in list(keycnts.keys()):
print(key, keycnts[key])
else:
mainloop(lt, include_regexp_list=options.regexp, exclude_regexp_list=options.ex_regexp, nolabel=options.nolabel)
if __name__ == "__main__":
main()