#!/usr/bin/env  python

import os
import sys

# main function
def main():
	"""Applies a column permutation spec to a TSV file.

The stem for the input and output file names, and the column
permutation map are obtained by user input via the console.
As well as permuting the columns, this script strips enclosing
quotes from any entries that have them (e.g. because they were
added when the data was saved as TSV from a spreadsheet).
"""

	#test_makeMap( [ '3,1,,5,,,', '3,2,,8,,', '2,1,0', '2,1,1', '2', ''] )
	#return

	if __name__ == "__main__":
		dir, junka, junkb = sys.argv[0].rpartition("/")
		os.chdir( dir )
	else:
		dir = os.getcwd()
	print "dir: ", dir

	dflt = 'LSF_IdGlossHns'
	print "Transforming 'STEM-tsv.txt' to 'STEM-hnsu.txt':"
	print 'TSV file stem? (default is {0}) '.format( dflt ),
	tsvstem = sys.stdin.readline().strip()
	if (tsvstem == None or len( tsvstem ) == 0):
		tsvstem = dflt

	inp = "{0}/{1}".format( dir, "{0}-tsv.txt".format( tsvstem ) )
	outp = "{0}/{1}".format( dir, "{0}-hnsu.txt".format( tsvstem ) )

	dfltmap = '2,1'
	print 'Column permutation spec.? (default is {0})'.format( dfltmap ),
	mapstr = sys.stdin.readline().strip()
	if (mapstr == None or len( mapstr ) == 0):
		mapstr = dfltmap
	themap = makeMap( mapstr )

	print 'input:    ', inp
	print 'output:   ', outp
	print 'col. map: ', themap

	mapFileCols( inp, outp, themap )

#----------------

def test_makeMap( mstrs ):
	"""Tests the makeMap() function on a list of tests map spec strings."""

	for mapstr in mstrs:
		newmap = makeMap( mapstr )
		print mapstr, '-->', newmap

#----------------

def makeMap( mapstr ):
	"""Creates a column permutation map from a map spec string.

The map is represented as a list, effectively mapping each new column
index to the corresponding column index in the input file.

The map string consists of a comma-separated list of original column
numbers.  Some entries in this list may be empty: such entries are
filled in order with the original column numbers whose indices are
not explicitly mentioned.  For example, the spec '2,0,,1,' is
interpreted as an abbreviation for '2,0,3,1,4' -- with the implication
that the input has at least 5 columns.  If an input line has fewer
than this implied number of fields, it will be padded with the required
number of empty fields.
"""

	if mapstr == '':
		bmap = [ ]
	else:
		toint = lambda istr: len( istr ) == 0 and -1 or int( istr )
		# bmap: backwards map, new column no. --> original column no.;
		bmap = [ toint( i ) for i in mapstr.split( ',' ) ]
		cmax = max( bmap )
		if len( bmap ) <= cmax:
			N_ext =  cmax + 1 - len( bmap )
			bmap.extend( N_ext * [ -1 ] )
		N = len( bmap )
		# ffree: old column nos. not appearing in bmap;
		# bfree: new column nos. for which there is no explicit (old) col. no.
		ffree = [ i for i in range( N ) if not (i in bmap) ]  
		bfree = [ i for i in range( N ) if bmap[ i ] < 0 ]  
		if len( ffree ) == len( bfree ):
			for j, i in zip( bfree, ffree ):  bmap[ j ] = i
		else:
			print 'fwd map unused indices: ', ffree
			print 'col map unused indices: ', bfree
			print 'Incompatible unused index set lengths; no map created.'
			bmap = None
	return bmap

def getField( c, cmap, flds, doclean=True ):
	"""Uses a column map and column number to select a field."""

	col, cfield = c, ''
	if c < len( cmap ):
		col = cmap[ c ]
	if col < len( flds ):
		cfield = flds[ col ]
	if doclean:
		cfield = cleanField( cfield )
	return cfield

def cleanField( fld ):
	"""Strips any enclosing quotes from a field string."""

	cfld = fld
	if 2 <= len( fld ):
		quoted = \
			(fld[0]=='"' and fld[-1]=='"') or \
			(fld[0]=="'" and fld[-1]=="'")
		if quoted: cfld = fld[ 1 : -1 ]
	return  cfld

def mapLine( ln, cmap ):
	"""Applies a column map to a TSV data line."""

	flds = ln.rstrip().split( '\t' )
	N, NFI = len( cmap ), len( flds )
	NFO = max( N, NFI )
	newfields = [ getField( i, cmap, flds ) for i in range( NFO ) ]
	return '\t'.join( newfields )+'\n'

def mapFileCols( inpath, outpath, cmap ):
	"""Applies a column permutation spec to a TSV file."""

	N = len( cmap )
	inf = open( inpath, 'r' )
	outf = open( outpath, 'w' )
	n = 0
	for ln in inf:
		oln = (ln == '\n' and ln) or mapLine( ln, cmap )
		outf.write( oln )
		n += 1
	inf.close()
	outf.close()
	print 'Line count: ', n


#----------------
if __name__ == "__main__":
	main()
#----------------


#---
