/*	2010-10-29
 */
package sigmldoc2signdocs;


import java.io.IOException;
import java.io.File;
import java.io.BufferedReader;
import java.io.PrintStream;


import static util.HNSUtils.getStdBufReader;
import static util.HNSUtils.getBufReader;
import static util.HNSUtils.readLines;
import static util.HNSUtils.signsFromSiGMLDoc;
import static util.HNSUtils.signDocsFromSigns;
import static util.HNSUtils.glossesFromSigns;
import static util.HNSUtils.numStr;
import static util.HNSUtils.indentXML;
import static util.HNSUtils.filePrintStream;
import static util.HNSUtils.toStdOut;
import static util.HNSUtils.toStream;

import static util.ToSafeASCIIMap.getToSafeASCIIMap;


/** Main class for the SiGML document-to-signs splitter. */
public class Main {

/** Splits a SiGML document into separate SiGML documents, one per sign.
 * If the document comes from standard input then the results are printed
 * to standard output; if it comes from a named file then the results
 * are output to individual files in a new subdirectory of the original
 * file's parent directory, the new directory's name ending with "-signs".
 * <strong>NB</strong> This is <strong>not</strong> proper XML processing,
 * just simple text processing, intended primarily for use with
 * machine-generated input files.  It can probably be fooled by signs
 * in XML comments, and such like -- and if fooled will be silent about it.
 * The source SiGML document is assumed to be UTF-8, and is taken from
 * standard input, unless an argument is provided, defining a file
 * system path for it.
 * The results, whether in multiple files or on standard output, are
 * provided as UTF-8.
 */
	public static void main(String[] args) throws IOException {

		final String PATH =
			args == null || args.length == 0 ? null : args[0];

		BufferedReader srdr =
			PATH == null ? getStdBufReader() : getBufReader(PATH);

		String[] sigml = readLines(srdr);
		String[][] signs = signsFromSiGMLDoc(sigml);
		String[][] sdocs = signDocsFromSigns(signs);
		String[] glosses = glossesFromSigns(signs);

		if (PATH == null) {
			// Results to standard output.
			for (int i=0; i!=sdocs.length; ++i) {
				System.out.println(
					"--------  "+i+": "+glosses[i]+"  --------");
				toStdOut(indentXML(sdocs[i]));
			}
		}
		else {
			// Results to a new sibling subdirectory.
			File rdir = resultDir(PATH);
			if (rdir != null) {
				final int N = sdocs.length;
				for (int j=0; j!=N; ++j) {
					// Name is something like 00j_GLOSS.sigml .
					final String SIGN_FILE_NAME =
						numStr(j, N)+"_"+cleanForFS(glosses[j])+".sigml";
					// NB Default encoding on Mac OS X is not UTF-8,
					// so we cannot directly construct the print stream
					// from the File.
					PrintStream sfstrm =
						filePrintStream(rdir, SIGN_FILE_NAME);
					toStream(indentXML(sdocs[j]), sfstrm);
					sfstrm.close();
				}
			}
		}
	}

	private static File resultDir(final String SIGML_PATH)
	throws IOException {

		File spf = new File(SIGML_PATH).getCanonicalFile();
		File dir = spf.getParentFile();

		// Assume the file is foo.sigml or just plain foo: in either case
		// the new sibling subdirectory will be foo-signs/.
		String sname = spf.getName();
		final int i = sname.lastIndexOf('.');
		String subdir = (i < 0 ? sname : sname.substring(0, i))+"-signs";

		File subdirf = new File(dir, subdir);

		// Be conservative: bail out if the directory already exists;
		// otherwise, create it.
		if (subdirf.exists()) {
			System.err.println("Directory "+subdir+" already exists.");
			subdirf = null;
		}
		else {
			subdirf.mkdir();
		}

		return subdirf;
	}

	private static String cleanForFS(String name) {

		// Clean the given name for use by the file system.
		return getToSafeASCIIMap().safe(name);
	}
}
