#!/usr/bin/env  python

import os
import sys

# main function
def main():
	"""Applies a merge spec to a TSV file.

The merge spec and the input file path, and the implied output file
path are obtained by user input via the console.
As well as permuting the columns, this script strips enclosing
quotes from any entries that have them (e.g. because they were
added when the data was saved as TSV from a spreadsheet).
"""

	#test_merge()
	#return

	if __name__ == "__main__":
		dir, junka, junkb = sys.argv[0].rpartition("/")
		os.chdir( dir )
	else:
		dir = os.getcwd()
	print "dir: ", dir

	dfltfs = 'LSF_IdGlossHns'
	print "Transforming 'STEM-tsv.txt' to 'STEM-hnsu.txt':"
	print 'TSV file stem? (default is {0})'.format( dfltfs ),
	tsvstem = sys.stdin.readline().strip()
	if (tsvstem == None or len( tsvstem ) == 0):
		tsvstem = dfltfs

	dfltmgs = '1,0:1:3'
	print 'Column merge spec.? (default is {0})'.format( dfltmgs ),
	mgstr = sys.stdin.readline().strip()
	if (mgstr == None or len( mgstr ) == 0):
		mgstr = dfltmgs

	mgspec = makeMergeSpec( mgstr )

	inp = "{0}/{1}".format( dir, "{0}-tsv.txt".format( tsvstem ) )
	outp = "{0}/{1}".format( dir, "{0}-hnsu.txt".format( tsvstem ) )

	print 'input:      ', inp
	print 'output:     ', outp
	print 'merge spec: ', mgspec

	mergeCols( inp, outp, mgspec )

#----------------

def test_merge():
	"""Basic test function for the merging apparatus."""
	mgspec = makeMergeSpec( '1,3,0:3:8' )
	print mgspec
	fields = [ "fred", "joe", "bert", "harry", "reg", "pete" ]
	mfields = mergedFields( mgspec, fields )
	print mfields
	#  Expecting:
	#    ([1, 3, 0], 3, 8)
	#    ['bert', 'reg', 'pete', 'joe_harry_fred', '', '']

#----------------

def makeMergeSpec( mgstr ):
	"""Decodes a merge spec. string, returning the decoded spec.

A merge spec. has three components:
(i)   the list of (input) columns to be merged;
(ii)  the (output) column in which the merge results are to be
      placed;
(iii) the minimum expected number of input columns; each input
      line will be padded with empty strings to meet this
	  expectation if necessary.

The merge string consists of the three components, separated by
colons. The first component is represented as a comma-separated
list of column numbers.
"""
	mglist = mgstr.split( ':' )
	mgmap = [ int( colstr ) for colstr in mglist[ 0 ].split( ',' ) ]
	mgout, mginmin = int( mglist[ 1 ] ), int( mglist[ 2 ] )
	return ( mgmap, mgout, mginmin )

def mergedFields( mgspec, fields ):
	"""Applies a merge spec to a field list, returning a new one."""

	mgmap, mgout, mginmin = mgspec
	M, N = len( mgmap ), len( fields )
	xfields = fields
	if (N < mginmin):
		xfields.extend( (mginmin - N) * [ '' ] )
	mgfun = lambda a,b: "{0}_{1}".format( a, b )
	mgfld = lambda f: fields[ mgmap[ f ] ]
	merged = reduce( mgfun, map( mgfld, range( M ) ) )
	fmap = filter( (lambda i: i not in mgmap), range( len( xfields ) ) )
	fmap.insert( mgout, -1 )
	newfldfun = lambda i: fmap[ i ] < 0 and merged or xfields[ fmap[ i ] ]
	return map( newfldfun, range( len( fmap ) ) )

def cleanField( fld ):
	"""Strips any enclosing quotes from a field string."""

	cfld = fld
	if 2 <= len( fld ):
		quoted = \
			(fld[0]=='"' and fld[-1]=='"') or \
			(fld[0]=="'" and fld[-1]=="'")
		if quoted: cfld = fld[ 1 : -1 ]
	return  cfld

def mergeLine( mgspec, ln ):
	"""Applies the given merge spec to the given TSV line."""

	fields = map( cleanField, ln.rstrip().split( '\t' ) )
	return '\t'.join( mergedFields( mgspec, fields ) )+'\n'

def mergeCols( inpath, outpath, mgspec ):
	"""Applies a merge spec to a TSV file, giving a new output file."""

	inf = open( inpath, 'r' )
	outf = open( outpath, 'w' )
	n = 0
	for ln in inf:
		oln = (ln == '\n' and ln) or mergeLine( mgspec, ln )
		outf.write( oln )
		n += 1
	inf.close()
	outf.flush()
	outf.close()
	print 'Line count: ', n


#----------------
if __name__ == "__main__":
	main()
#----------------


#---
