/*	2010-10-25
 */
package util;


import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.PrintStream;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.io.File;
import java.io.PrintWriter;

import java.util.List;
import java.util.ArrayList;
import java.util.LinkedList;


import util.HamNameMap;
import util.HNSOctetUCSMap;


/** Static support methods for manual HamNoSys/SiGML text processing. */
public class HNSUtils {

/** Unix end-of-line. */
	public static final String				EOL = "\n";
/** SiGML tag name: {@code sigml}. */
	public static final String				SIGML = "sigml";
/** SiGML tag name: {@code hns_sign}. */
	public static final String				HNS_SIGN = "hns_sign";
/** SiGML tag name: {@code hamnosys_manual}. */
	public static final String				HNS_MANUAL = "hamnosys_manual";
/** SiGML tag name: {@code hamgestural_sign}. */
	public static final String				HAM_GEST_SIGN = "hamgestural_sign";
/** SiGML tag name: {@code sign_manual}. */
	public static final String				SIGN_MANUAL = "sign_manual";

/** SiGML attribute name: {@code sign_manual}. */
	public static final String				GLOSS = "gloss";

/** "Standard" Unicode encoding: {@code utf-8} -- but note this is not
 * actually standard on Mac OS X, MacRoman is.
 */
	public static final String				STD_ENCODING = "utf-8";
/** XML declaration with the standard encoding. */
	public static final String				STD_XML_DECL =
											XML_DECL(STD_ENCODING);


/** Converts the given SiGML sign element ({@code <hns_sign>} and
 * {@code <hamgestural_sign>}) texts into SiGML documents, one per sign.
 */
	public static String[][] signDocsFromSigns(String[][] signs) {

		final int N = signs.length;
		String[][] sdocs = new String[N][];
		for (int i=0; i!=N; ++i) {
			sdocs[i] =
				prefix(STD_XML_DECL, EL_WRAP(SIGML, trim(signs[i])));
		}

		return sdocs;
	}

/** Breaks the given SiGML document text into its individual sign
 * element ({@code <hns_sign>} and {@code <hamgestural_sign>}) texts.
 */
	public static String[][] signsFromSiGMLDoc(String[] sigml) {

		String[] signels = { HNS_SIGN, HAM_GEST_SIGN };

		return elSeqFromSimpleXML(signels, sigml);
	}

/** Returns the lines of the SiGML text for the given sequence of
 * manual HamNoSys (Unicode) items.  Each item is assumed to consist of
 * a manual HNS (Unicode) string, optionally followed by whitespace and
 * a gloss name.
 */
	public static String[] sigmlFromHNSUItems(String[] hnsui) {

		final int N = hnsui.length;

		String[] glosses = glossesFromHNSItems(hnsui);
		String[] hnsu = cleanHNSU(hnsFromHNSItems(hnsui));
		String[][] hnvecs = hnsNameVecsFromHNSU(hnsu);
		String[][] hsigns = new String[N][];
		for (int i=0; i!=N; ++i) {
			hsigns[i] = hnsSignFromHNSNameVec(hnvecs[i], glosses[i]);
		}

		return prefix(STD_XML_DECL, EL_WRAP(SIGML, merge(hsigns)));
	}

/** Returns the lines of the SiGML text for the given sequence of
 * classic (8-bit) manual HamNoSys items.  Each item is assumed
 * to consist of a string of classic manual HNS octet values, optionally
 * followed by whitespace and a gloss name.
 */
	public static String[] sigmlFromHNS8Items(String[] hns8i) {

		final int N = hns8i.length;

		String[] glosses = glossesFromHNSItems(hns8i);
		String[] hns8 = cleanHNS8(hnsFromHNSItems(hns8i));
		String[][] hnvecs = hnsNameVecsFromHNS8(hns8);
		String[][] hsigns = new String[N][];
		for (int i=0; i!=N; ++i) {
			hsigns[i] = hnsSignFromHNSNameVec(hnvecs[i], glosses[i]);
		}

		return prefix(STD_XML_DECL, EL_WRAP(SIGML, merge(hsigns)));
	}

/** Converts the given sequence of manual HamNoSys items from the
 * classic (8-bit) HNS encoding to HNS (Unicode), assuming that
 * each item starts with a string of classic manual HNS octet values.
 */
	public static String[] hnsuItemsFromHNS8Items(String[] hns8i) {

		String[] hns8 = cleanHNS8(hnsFromHNSItems(hns8i));
		String[] hnsui = hnsuFromHNS8(hns8);

		return zip(hnsui, tailsFromHNSItems(hns8i));
	}

/** Extracts strings of classic 8-bit manual HamNoSys codes from the
 * given sequence of HNS items, assuming that each item starts with such
 * a string.
 */
	public static String[] hns8ItemsFromSymHNSItems(String[] symhnsi) {

		return zip(hns8FromSymHNSItems(symhnsi), tailsFromHNSItems(symhnsi));
	}

/** Extracts the sequence of strings of classic 8-bit manual HamNoSys
 * codes from the given sequence of HNS items, assuming that each item
 * starts with such a string.
 */
	public static String[] hns8FromSymHNSItems(String[] symhnsi) {

		return
			hns8FromHNSNameVecs(
				hnsNameVecsFromSymHNS(hnsFromHNSItems(symhnsi)));
	}


/** Extracts the symbolic (manual) HamNoSys strings from the given
 * sequence of HNS items, assuming that each item starts with such a
 * string.
 */
	public static String[] symHNSItemsFromHNSUItems(String[] hnsui) {

		return zip(symHNSFromHNSUItems(hnsui), tailsFromHNSItems(hnsui));
	}

/** Extracts the (manual) HamNoSys (Unicode) strings from the given
 * sequence of HNS items, assuming that each item starts with such a
 * string.
 */
	public static String[] hnsuItemsFromSymHNSItems(String[] symhnsi) {

		return zip(hnsuFromSymHNSItems(symhnsi), tailsFromHNSItems(symhnsi));
	}

/** Returns the sequence of symbolic (manual) HamNoSys strings
 * corresponding to the given sequence of HNS (Unicode) items.
 * Each item is assumed to start with a manual HNS (Unicode) string, and
 * the corresponding result string consists of the corresponding
 * sequence of HNS symbol names separated by commas.
 */
	public static String[] symHNSFromHNSUItems(String[] hnsui) {

		String[] hnsu = cleanHNSU(hnsFromHNSItems(hnsui));

		return symHNSFromHNSNameVecs(hnsNameVecsFromHNSU(hnsu));
	}

/** Extracts the sequence of manual HamNoSys (Unicode) strings from
 * the given sequence of HNS items, assuming that each item starts with
 * such a string.
 */
	public static String[] hnsuFromSymHNSItems(String[] symhnsi) {

		return
			hnsuFromHNSNameVecs(
				hnsNameVecsFromSymHNS(hnsFromHNSItems(symhnsi)));
	}

/** Converts the given symbolic (manual) HamNoSys strings to sequences of
 * HamNoSys symbol names, assuming that adjacent pairs of HNS names in
 * the strings are separated by the comma character.
 */
	public static String[][] hnsNameVecsFromSymHNS(String[] symhns) {

		return hnsNameVecsFromSymHNS(symhns, ",");
	}

/** Converts the given symbolic (manual) HNS strings to sequences of
 * HamNoSys symbol names, assuming that adjacent pairs of HNS names in
 * the strings are separated by the given string.
 */
	public static String[][] hnsNameVecsFromSymHNS(String[] symhns, String sep) {

		final int N = symhns.length;

		final String[][] hnsnv = new String[N][];
		for (int i=0; i!=N; ++i) {
			final String SYMH = symhns[i];
			hnsnv[i] = SYMH.length()==0 ? new String[0] : SYMH.split(sep);
		}

		return cleanHamNameVecs(hnsnv);
	}

/** Converts the given sequences of (manual) HamNoSys symbol names to
 * symbolic HNS strings, using the comma character to separate the
 * symbolic names in each string.
 */
	public static String[] symHNSFromHNSNameVecs(String[][] hnsnv) {

		return symHNSFromHNSNameVecs(hnsnv, ",");
	}

/** Converts the given sequences of (manual) HamNoSys symbol names to
 * symbolic HNS strings, using the given string to separate the symbolic
 * names in each string.
 */
	public static String[] symHNSFromHNSNameVecs(String[][] hnsnv, String sep) {

		final int N = hnsnv.length;

		String[] symhns = new String[N];
		for (int i=0; i!=N; ++i) { symhns[i] = join(hnsnv[i], sep); }

		return symhns;
	}

/** Converts the given sequences of HamNoSys symbol names to the
 * corresponding manual HamNoSys (Unicode) strings.
 */
	public static String[] hnsuFromHNSNameVecs(String[][] hnsnv) {

		return hnsuFromHNS8(hns8FromHNSNameVecs(hnsnv));
	}

/** Converts the given sequences of HamNoSys symbol names to the
 * corresponding classic (8-bit) manual HamNoSys strings.
 * So, conceptually, each string in the result contains a sequence
 * not of genuine (low-valued) Unicode character codes, but of classic
 * HamNoSys octet values.
 */
	public static String[] hns8FromHNSNameVecs(String[][] hnsnv) {

		HamNameMap hnmmap = HamNameMap.getStdHamNameMap();

		final int N = hnsnv.length;
		String[] hns8 = new String[N];

		for (int s=0; s!=N; ++s) {

			final String[] H_N_VEC = hnsnv[s];
			final int N_CH = H_N_VEC.length;
			char[] hoctets = new char[N_CH];
			for (int i=0; i!=N_CH; ++i) {
				hoctets[i] = (char) hnmmap.octet(H_N_VEC[i]);
			}
			hns8[s] = new String(hoctets);
		}

		return hns8;
	}

	public static String[][] hnsNameVecsFromHNSU(String[] hnsu) {

		return hnsNameVecsFromHNS8(hns8FromHNSU(hnsu));
	}

/** Converts the given classic (8-bit) manual HamNoSys strings
 * to the corresponding sequences of HamNoSys symbol names.
 * So, conceptually, each input string contains a sequence not of
 * genuine (low-valued) Unicode character codes, but of classic
 * HamNoSys octet values.
 */
	public static String[][] hnsNameVecsFromHNS8(String[] hns8) {

		HamNameMap hnmmap = HamNameMap.getStdHamNameMap();

		final int N = hns8.length;
		String[][] hnamevecs = new String[N][];

		for (int s=0; s!=N; ++s) {

			final char[] HNS8_VEC = hns8[s].toCharArray();
			final int N_CH = HNS8_VEC.length;
			String[] hnames = new String[N_CH];
			for (int i=0; i!=N_CH; ++i) {
				hnames[i] = hnmmap.hamName(HNS8_VEC[i]);
			}
			hnamevecs[s] = hnames;
		}

		return hnamevecs;
	}

//	public static void main(String[] args) {
//		String[] symhtest = { "hamcircleo", "hamceeall", "hamfist" };
//		String[][] symhtestx = { symhtest };
//		String[] h8x = hns8FromHNSNameVecs(symhtestx);
//		String h8 = h8x[0];
//		final int N = symhtest.length;
//		byte[] h8b = new byte[N];
//		for (int i=0; i!=N; ++i) {
//			h8b[i] = (byte)h8.charAt(i);
//		}
//		HNSOctetUCSMap h8map = HNSOctetUCSMap.getHNSMapOctetAllUCS();
//		String hucs = h8map.ucs(h8b);
//		for (int j=0; j!=hucs.length(); ++j) {
//			System.out.println("at "+j+": "+
//				(String.format("%4x", (int)hucs.charAt(j))));
//		}
//	}

/** Converts the given manual HamNoSys (Unicode) strings
 * to the corresponding classic (8-bit) manual HamNoSys strings.
 * So, conceptually, each string in the result contains a sequence
 * not of genuine (low-valued) Unicode character codes, but of classic
 * HamNoSys octet values.
 */
	public static String[] hns8FromHNSU(String[] hnsu) {

		HNSOctetUCSMap hu8map = HNSOctetUCSMap.getHNSMapOctetAllUCS();

		final int N = hnsu.length;
		String[] hns8 = new String[N];

		for (int i=0; i!=N; ++i) { hns8[i] = hu8map.hns8(hnsu[i]); }

		return hns8;
	}

/** Cleans the given set of manual HamNoSys Unicode strings, that is,
 * removes every character that is not a genuine HamNoSys symbol,
 * generating a message on the standard error stream for each one removed.
 */
	public static String[] cleanHNS8(String[] hns8) {

		HNSOctetUCSMap h8umap = HNSOctetUCSMap.getHNSMapOctetAllUCS();

		final int N = hns8.length;
		for (int i=0; i!=N; ++i) {
			hns8[i] = h8umap.cleanHNS8(hns8[i], i);
		}

		return hns8;
	}

/** Cleans the given set of manual HamNoSys Unicode strings, that is,
 * removes every character that is not a genuine HamNoSys symbol,
 * generating a message on the standard error stream for each one removed.
 */
	public static String[] cleanHNSU(String[] hnsu) {

		HNSOctetUCSMap h8umap = HNSOctetUCSMap.getHNSMapOctetAllUCS();

		final int N = hnsu.length;
		for (int i=0; i!=N; ++i) {
			hnsu[i] = h8umap.cleanHNSU(hnsu[i], i);
		}

		return hnsu;
	}

/** Cleans the given set of HamNoSys symbol name sequences, that is,
 * removes invalid names, generating a message on the standard error
 * stream for each one removed.
 */
	public static String[][] cleanHamNameVecs(String[][] hamnvs) {

		HamNameMap hnmmap = HamNameMap.getStdHamNameMap();

		final int N = hamnvs.length;
		for (int i=0; i!=N; ++i) {
			hamnvs[i] = hnmmap.cleanHamNameVec(hamnvs[i], i);
		}

		return hamnvs;
	}

/** Converts the given classic (8-bit) manual HamNoSys strings
 * to the corresponding HamNoSys (Unicode) strings.
 * So, conceptually, each input string contains a sequence not of
 * genuine (low-valued) Unicode character codes, but of classic
 * HamNoSys octet values.
 */
	public static String[] hnsuFromHNS8(String[] hns8) {

		HNSOctetUCSMap hu8map = HNSOctetUCSMap.getHNSMapOctetAllUCS();

		final int N = hns8.length;
		String[] hnsu = new String[N];

		for (int i=0; i!=N; ++i) {hnsu[i] = hu8map.ucs(hns8[i]); }

		return hnsu;
	}

/** Extracts the manual HamNoSys strings from the given sequence of HNS
 * items, each item being assumed to start with such a string.
 */
	public static String[] hnsFromHNSItems(String[] hnsitems) {

		final int N = hnsitems.length;
		String[] hns = new String[N];

		for (int i=0; i!=N; ++i) {

		//	String[] hparts = hnsitems[i].split("[\\s]+");
			String[] hparts = hnsitems[i].split("[\\t]");
			hns[i] = hparts[0].trim();
		}

		return hns;
	}

/** Extracts the tails from the given sequence of HNS items, the tail
 * of an item being all text after the initial manual HamNoSys string.
 */
	public static String[] tailsFromHNSItems(String[] hnsitems) {

		final int N = hnsitems.length;
		String[] tails = new String[N];

		for (int i=0; i!=N; ++i) {

			String[] hparts = hnsitems[i].split("[\\t]");
			final int HNS_LEN = hparts[0].length();
			tails[i] = hnsitems[i].substring(HNS_LEN);
		}

		return tails;
	}

/** Extracts the gloss names from the given sequence of HNS items,
 * each item assumed to consist of a manual HamNoSys string optionally
 * followed by whitespace and the gloss name.  If item n consists solely
 * of the HNS string, then a gloss name of the form {@code item_n}
 * is used instead.
 */
	public static String[] glossesFromHNSItems(String[] hnsitems) {

		final int N = hnsitems.length;
		String[] glosses = new String[N];

		for (int i=0; i!=N; ++i) {

			String[] hparts = hnsitems[i].split("[\\t]");
			glosses[i] =
				(2 <= hparts.length ? hparts[1] : glossForItem(i, N));
		}

		return glosses;
	}

/** Returns the lines of the {@code <hns_sign>} SiGML element for
 * the given sequence of manual HamNoSys symbol names, using the given
 * gloss name.
 */
	public static String[] hnsSignFromHNSNameVec(
		String[] hnsnv, String gloss) {

		return
			EL_WRAP(HNS_SIGN, GLOSS, gloss,
				hnsManElFromHNSNameVec(hnsnv));
	}

/** Returns the lines of the {@code <hamnosys_manual>} SiGML element for
 * the given sequence of manual HamNoSys symbol names.
 */
	public static String[] hnsManElFromHNSNameVec(String[] hnsnv) {

		final int N = hnsnv.length;

		String[] manels = new String[N];
		for (int i=0; i!=N; ++i) { manels[i] = EL_EMPTY(hnsnv[i]); }

		return EL_WRAP(HNS_MANUAL, manels);
	}

/** Returns a sequence of HNS (Unicode) items, one for each
 * {@code <hns_sign>} element in the given SiGML text (presented as a
 * sequence of lines).  Each item consists of the manual HNS (Unicode)
 * string for the sign and its gloss name, separated by a single
 * tab {@code \t} character.
 */
	public static String[] hnsuItemsFromHSiGML(String[] hslines) {

		String[][] hsigns = elSeqFromSimpleXML(HNS_SIGN, hslines);
		final int N = hsigns.length;

		String[][] hmans = hnsMansFromHNSSigns(hsigns);
		String[] hnsu = hnsuFromHNSMans(hmans);
		String[] glosses = glossesFromSigns(hsigns);

		// Prefex each gloss with a tab separator, before
		// zipping HNS-u and gloss strings together.
		for (int i=0; i!=N; ++i) { glosses[i] = "\t"+glosses[i]; }
		String[] hnsui = zip(hnsu, glosses);

		return hnsui;
	}

/** Returns a sequence of manual HNS (Unicode) strings, one for each
 *  of the given sequence of SiGML {@code <hamnosys_manual>} element
 * texts (each text being presented as a sequence of lines).
 */
	public static String[] hnsuFromHNSMans(String[][] hmans) {

		String[][] hnsnv = hnsNameVecsFromHNSMans(hmans);

		// (Do we really need to clean here?)
		return hnsuFromHNSNameVecs(cleanHamNameVecs(hnsnv));
	}

/** Constructs and returns a sequence of HNS symbol names for each
 * of the given {@code <hamnosys_manual>} SiGML element texts.
 */
	public static String[][] hnsNameVecsFromHNSMans(String[][] hmans) {

		final int N = hmans.length;

		String[][] hnvecs = new String[N][];
		for (int i=0; i!=N; ++i) {

			final String[] H_MAN = hmans[i];
			final int NM = H_MAN.length;

			if (2 <= NM) {
				// Assume H_MAN consists of initial and final
				// hamnosys_man tag lines with HNS name elements (each
				// on its own line) in between.
				final ArrayList<String>  HN_LIST =
					new ArrayList<String>(NM - 2);
				for (int k=1; k!=NM-1; ++k) {
					final String HN = elNameFromEmptyEl(H_MAN[k].trim());
					// A null result means the line is something other
					// than an empty element -- ignore it.
					if (HN != null) { HN_LIST.add(HN); }
				}
				final int N_HN = HN_LIST.size();
				hnvecs[i] = new String[N_HN];
				HN_LIST.toArray(hnvecs[i]);
			}
			else {
				hnvecs[i] = new String[0];
			}
		}

		return hnvecs;
	}

/** Extracts the gloss names from the given sequence of SiGML
 * {@code <hns_sign>} or {@code <hamgestural_sign>} texts, assuming
 * the gloss definition appears in the first line of text.
 * A gloss name of the form {@code item_n} is provided if the n'th
 * sign fails to conform to this assumption.
 */
	public static String[] glossesFromSigns(String[][] signs) {

		final String GLOSS_START = "gloss=\"";
		final int GS_LEN = GLOSS_START.length();

		final int N = signs.length;

		String[] glosses = new String[N];
		for (int i=0; i!=N; ++i) {

			String gloss = null;

			// Look for an explicit gloss string in the <hns_sign ...>
			// tag, assumed to be in the initial line of the sign.
			final String HS_TAG_LN = signs[i][0];
			int j = HS_TAG_LN.indexOf(GLOSS_START);
			if (0 <= j) {
				int g = (j + GS_LEN), gg = HS_TAG_LN.indexOf('"', g);
				if (g < gg) { gloss = HS_TAG_LN.substring(g, gg); }
			}
			glosses[i] = glossForItem(gloss, i, N);
		}

		return glosses;
	}

/** Extracts the {@code <hamnosys_manual>} elements from the given
 * sequence of {@code <hns_sign>} SiGML elements.
 * CAVEAT EMPTOR: The SiGML input is assumed
 * to be "simple", i.e. it has no troublesome things like nesting of
 * sign elements, nor element occurrences in comments, etc.
 */
	public static String[][] hnsMansFromHNSSigns(String[][] hsigns) {

		final int NS = hsigns.length;

		ArrayList<String[]> mlist = new ArrayList<String[]>(NS);
		for (String[] sign : hsigns) {
			String[] man = singleElFromSimpleXML(HNS_MANUAL, sign);
			if (man != null) { mlist.add(man); }
		}

		final int NM = mlist.size();
		String[][] mans = new String[NM][];
		mlist.toArray(mans);

		return mans;
	}

/** Returns a (gloss) name of the form {@code item_ix}
 * where {@code ix} is the given index value, made to occupy a field
 * whose width is that required to accommodate the given maximum value.
 */
	public static String glossForItem(int ix, int max) {

		return glossForItem(null, ix, max);
	}

/** Simply returns the given gloss name if it is non-null;
 * otherwise, constructs a new gloss name of the form {@code item_ix}
 * where {@code ix} is the given index value, made to occupy a field
 * whose width is that required to accommodate the given maximum value.
 */
	public static String glossForItem(String gstr, int ix, int max) {

		return (gstr != null ? gstr : "item_"+numStr(ix, max));
	}

/** Extracts from the given sequence of simple XML lines the subsegments
 * representing all occurrences of the given element.
 * CAVEAT EMPTOR: "Simple XML"
 * means no troublesome things like nested occurrences of the required
 * element, nor occurrences in comments, etc.
 */
	public static String[][] elSeqFromSimpleXML(String el, String[] xlines) {

		String[] els = { el };
		return elSeqFromSimpleXML(els, xlines);
	}

	public static void main(String[] args) {
		String[] xlns = {
			"<thing>",
			"<this>",
			"<stuff>",
			"<this/>",
			"</stuff>",
			"<that/>",
			"</this>",
			"<stuff>",
			"  <stiff>",
			"    <stuff>",
			"    </stuff>",
			"  </stiff>",
			"</stuff>",
			"</thing>"
		};
		String[] els = { "that", "stiff" };

		String[][] elseq = elSeqFromSimpleXML(els, xlns);
		for (String[] el : elseq) {
			System.out.println("----------------");
			for (String ln : el) { System.out.println(ln); }
		}
			System.out.println("----------------");
	}

/** Extracts from the given sequence of simple XML lines the subsegments
 * representing all non-nested occurrences of elements in the given list.
 * CAVEAT EMPTOR:
 * "Simple XML" means that there's at most one tag per line, and no
 * troublesome things like occurrences of elements embedded in
 * comments, etc.
 */
	public static String[][] elSeqFromSimpleXML(String[] els, String[] xlines) {

		ArrayList<String[]> elist = new ArrayList<String[]>();

		final String EMPTY_END = "/>";

		final int N = xlines.length;
		LinkedList<String> elstack = new LinkedList<String>();
		String elend = null; // cache top of elstack
		int elo = -1;  int ehi = -1;
		for(int i=0; i!=N; ++i) {
			String xln = xlines[i].trim();
			String el = elBeginTest(els, xln);
			if (el != null) {
				if (xln.endsWith(EMPTY_END)) {
					// This line has a complete (empty) element of
					// interest.  If it's not nested, we need to
					// save it; otherwise we can ignore it.
					if (elstack.isEmpty()) { elo = i; ehi = i + 1; }
				}
				else {
					// We're at the start of a new element of interest.
					// If it's not nested, record its start index.
					// In any case push its details to the stack.
					if (elstack.isEmpty()) { elo = i; }
					elend = EL_END(el);
					elstack.push(elend);
				}
			}
			else if (elend != null && xln.equals(elend)) {
				// We're at the end of an element of interest.
				// Pop its details from the stack.
				// If it's not nested, save its limit index to flag
				// the fact that it must be saved.
				elstack.pop();
				if (!elstack.isEmpty()) { elend = elstack.peekFirst(); }
				else { elend = null;  ehi = i + 1; }
			}

			// If we've just completed a non-nested element of
			// interest then we need to save it.
			if (0 <= ehi) {
				final int NE = ehi - elo;
				String[] elns = new String[NE];
				for (int j=0; j!=NE; ++j) { elns[j] = xlines[elo+j]; }
				elist.add(elns);
				elo = ehi = -1;
			}
		}

		// If we end up in a mess, then log that fact to stderr.
		if (!elstack.isEmpty()) {
			System.err.println(
				"HNSUtils.elSeqFromSimpleXML(): incomplete element: "+
				UN_EL_END(elstack.peekFirst()));
		}
//		int i = 0;
//		while (i != N) {
//		while (i != N) {
//			String xln = xlines[i].trim();
//			final String EL = elBeginTest(els, xln);
//			if (EL == null) { ++ i; }
//			else {
//				int elo = i; // element base index
//				// Advance to the final line of the element.
//				if (!xln.endsWith(EMPTY_END)) {
//					// Advance to element end tag.
//					int ii = N;
//					while (i != ii) {
//						xln = xlines[i].trim();
//						if (xln.equals(EL_END(EL))) { ii = i; } else { ++ i; }
//					}
//				}
//				if (i != N) {
//					// We have a complete element: advance past its
//					// final line and save it in elist.
//					++ i;
//					final int NE = i - elo;
//					String[] elns = new String[NE];
//					for (int j=0; j!=NE; ++j) { elns[j] = xlines[elo+j]; }
//					elist.add(elns);
//				}
//				//else the final element is incomplete -- just ignore it.
//			}
//		}

		// Turn the the list of arrays into an array of arrays.
		final int N_EL = elist.size();
		String[][] evecs = new String[N_EL][];
		elist.toArray(evecs);

		return evecs;
	}

/** Determines which of the given XML element names matches the given
 * XML element tag, and returns the matching element name, or {@code
 * null} if there is no match.
 */
	private static String elBeginTest(String[] els, String xln) {

		final int N = els.length;

		int i = 0, ii = N;
		while (i != ii) {
			if (xln.startsWith("<"+els[i])) { ii = i; } else { ++ i; }
		}

		return (i != N ? els[i] : null);
	}

/** Extracts from the given sequence of simple XML lines the subsegment
 * representing the first occurrence of the given element.
 * "Simple XML" means no troublesome things like nested occurrences of
 * the required element, nor occurrences in comments, etc.
 */
	public static String[] singleElFromSimpleXML(String el, String[] xlines) {

		final String EMPTY_END = "/>";
		final String EL_START = "<"+el;
		final String EL_END_TAG = EL_END(el);

		final int N = xlines.length;
		// Limits for the element lines.
		int elo = -1, ehi = -1;

		// Bounded linear search for the begin tag line of the first
		// occurrence of the given element.
		int i = 0, ii = N;
		String xln = null;
		while (i != ii) {
			xln = xlines[i].trim();
			if (xln.startsWith(EL_START)) { ii = i; } else { ++ i; }
		}

		// If we've already hit then end of the text the element is
		// missing and there's no more to be done.
		if (i != N) {
			// Note element start index.
			elo = i;
			// Advance to element end tag line.
			if (! xln.endsWith(EMPTY_END)) {
				++ i;
				// Bounded linear search for element end tag line.
				while (i != ii) {
					xln = xlines[i].trim();
					if (xln.equals(EL_END_TAG)) { ii = i; } else { ++ i; }
				}
			}
			if (i != N) { ehi = i + 1; }
			//else the element is incomplete -- and we just ignore it.
		}

		String[] elns = null;
		// If we found a complete element then save it.
		if (elo < ehi) {
			final int NE = ehi - elo;
			elns = new String[NE];
			for (int j=0; j!=NE; ++j) { elns[j] = xlines[elo+j]; }
		}

		return elns;
	}

/** Reads and returns the lines from the given buffered reader. */
	public static String[] readLines(BufferedReader brdr)
	throws IOException {

		ArrayList<String> lns = new ArrayList<String>();

		String ln = brdr.readLine();
		while (ln != null) {
			lns.add(ln);
			ln = brdr.readLine();
		}

		final int N = lns.size();
		String[] strs = new String[N];
		lns.toArray(strs);

		return strs;
	}

/** Returns a buffered reader, using the standard encoding (UTF-8), for
 * the given file within the given directory.
 */
	public static BufferedReader getBufReader(File dir, String file) {

		return getBufReader(new File(dir, file));
	}

/** Returns a buffered reader, using the standard encoding (UTF-8), for
 * the given file path.
 */
	public static BufferedReader getBufReader(String path) {

		return getBufReader(new File(path));
	}

/** Returns a buffered reader, using the standard encoding (UTF-8), for
 * the given file path.
 */
	public static BufferedReader getBufReader(File path) {

		BufferedReader brdr = null;
		try {
			brdr =
				new BufferedReader(
					new InputStreamReader(
						new FileInputStream(path), STD_ENCODING));
		}
		catch (IOException iox) {
			System.err.println(iox);
			System.exit(1);
		}

		return brdr;
	}

/** Returns a buffered reader, using the standard encoding (UTF-8), for
 * the standard input.
 */
	public static BufferedReader getStdBufReader() {

		BufferedReader brdr = null;
		try {
			brdr =
				new BufferedReader(
					new InputStreamReader(System.in, STD_ENCODING));
		}
		catch (IOException iox) {
			System.err.println(iox);
			System.exit(1);
		}

		return brdr;
	}

/** Extracts and returns the element name from the given string if it is
 * an empty XML element string, or returns {@code null} otherwise.
 */
	public static String elNameFromEmptyEl(String eel) {
		String ename = null;
		final int N = eel.length();
		if (eel.startsWith("<") && eel.endsWith("/>")) {
			// Crude but effective way of eliminating attribute definitions.
			String eelx = eel.replaceFirst("\\s+", "/>");
			final int I_END = eelx.indexOf("/>");
			ename = eelx.substring(1, I_END);
		}
		return ename;
	}

/** Returns XML lines in which the given element body is wrapped by an
 * element with the given type and a pair of attributes with the given names
 * and values.
 */
	public static final String[] EL_WRAP(
		String eltype, String anm0, String aval0,
		String anm1, String aval1, String[] body) {

		final String ELSTARTTAG = EL_START(eltype, anm0, aval0, anm1, aval1);

		return wrap(ELSTARTTAG, body, EL_END(eltype));
	}

/** Returns XML lines in which the given element body is wrapped by an
 * element with the given type and an attribute with the given name
 * and value.
 */
	public static final String[] EL_WRAP(
		String eltype, String anm, String aval, String[] body) {

		return wrap(EL_START(eltype, anm, aval), body, EL_END(eltype));
	}

/** Returns XML lines in which the given element body is wrapped by an
 * element with the given type.
 */
	public static final String[] EL_WRAP(String eltype, String[] body) {

		return wrap(EL_START(eltype), body, EL_END(eltype));
	}

/** Returns an XML declaration string with the given encoding. */
	public static final String XML_DECL(String enc) {

		return "<?xml version=\"1.0\" encoding=\""+enc+"\"?>";
	}

/** Returns an XML start tag for the given element type. */
	public static final String EL_START(String eltype) {

		return "<"+eltype+">";
	}

/** Returns an XML start tag for the given element type with a pair
 * of attributes with the given names and values.
 */
	public static final String EL_START(
		String eltype, String anm0, String aval0,
		String anm1, String aval1) {

		return "<"+eltype+ATTR_DEF(anm0, aval0)+ATTR_DEF(anm1, aval1)+">";
	}

/** Returns an XML start tag for the given element type with an
 * attribute with the given name and value.
 */
	public static final String EL_START(
		String eltype, String anm, String aval) {

		return "<"+eltype+ATTR_DEF(anm, aval)+">";
	}

/** Returns an XML end tag for the given element type. */
	public static final String EL_END(String eltype) {

		return "</"+eltype+">";
	}

/** Returns the element name from the given XML end tag, assumed to have
 * been created by {@link #EL_END(java.lang.String)}.
 */
	public static final String UN_EL_END(String elendtag) {

		final int N = elendtag.length();
		return elendtag.substring(2, N-1);
	}

/** Returns an empty XML element tag for the given element type. */
	public static final String EL_EMPTY(String eltype) {

		return "<"+eltype+" />";
	}

/** Returns an empty XML element tag for the given element type with an
 * attribute with the given name and value.
 */
	public static final String EL_EMPTY(String eltype, String anm, String aval) {

		return "<"+eltype+ATTR_DEF(anm, aval)+" />";
	}

/** Returns the text for a simple XML attribute definition with an
 * initial space, using the given name and value; "simple" here means
 * that absolutely _no_ escaping is applied to the value.
 */
	public static final String ATTR_DEF(String nm, String val) {

		return " "+nm+"="+"\""+val+"\"";
	}

/** Prints the given line sequence to the given printer. */
	public static void print(String[] lines, PrintWriter prntr) {

		for (String ln : lines) { prntr.println(ln); }
	}

/** Prints the given line sequence to standard output under the standard
 * (i.e. utf-8) encoding.
 */
	public static void toStdOut(String[] lines) {

		toStream(lines, System.out);
	}

/** Prints the given line sequence to the given print stream under
 * the standard (i.e. utf-8) encoding.
 */
	public static void toStream(String[] lines, PrintStream outs) {

		for (String ln : lines) {
			try { outs.write(bytes(ln)); }
			catch (IOException iox) {
				System.err.println("toStream(): "+iox);
			}
			outs.println();
		}
		outs.flush();
	}

/** Returns the byte sequence for the given string under the standard,
 * that is, "utf-8", encoding.
 */
	public static byte[] bytes(String s) {

		return bytes(s, STD_ENCODING);
	}

/** Returns the byte sequence for the given string under the given encoding. */
	public static byte[] bytes(String s, String enc) {
		byte[] bb = null;
		try { bb = s.getBytes(enc); }
		catch (UnsupportedEncodingException uex) {
			bb = s.getBytes();
			System.err.println("Bad encoding "+enc+": "+uex);
		}
		return bb;
	}

/** Returns a print stream, using the standard encoding (UTF-8), for
 * the given file within the given directory.
 */
	public static PrintStream filePrintStream(String dir, String fname)
	throws IOException {

		return filePrintStream(dir, fname, STD_ENCODING);
	}

/** Returns a print stream, using the given encoding, for
 * the given file within the given directory.
 */
	public static PrintStream filePrintStream(
		String dir, String fname, String enc)
	throws IOException {

		return filePrintStream(new File(dir), fname, enc);
	}

/** Returns a print stream, using the standard encoding (UTF-8), for
 * the given file within the given directory.
 */
	public static PrintStream filePrintStream(File dir, String fname)
	throws IOException {

		return filePrintStream(dir, fname, STD_ENCODING);
	}

/** Returns a print stream, using the given encoding, for
 * the given file within the given directory.
 */
	public static PrintStream filePrintStream(
		File dir, String fname, String enc)
	throws IOException {

		File psfile = new File(dir, fname);
		// (Default encoding on Mac OS X is not UTF-8.)
		return new PrintStream(psfile, enc);
	}

/** Returns an array of strings in  which the given body array is
 * wrapped with the given initial and final items.
 */
	public static String[] wrap(String beg, String[] body, String end) {

		final int N = body.length;

		String[] wseq = new String[N+2];
		wseq[0] = beg;  wseq[N+1] = end;
		for (int i=0; i!=N; ++i) { wseq[i+1] = body[i]; }

		return wseq;
	}

/** Returns an array of strings in  which the given body array is
 * prefixed with the given initial item.
 */
	public static String[] prefix(String pfx, String[] body) {

		final int N = body.length;

		String[] pseq = new String[N+1];
		pseq[0] = pfx;
		for (int i=0; i!=N; ++i) { pseq[i+1] = body[i]; }

		return pseq;
	}

/** Lots (120) of spaces. */
	private static final String				SPACES =
	"                                        "+
	"                                        "+
	"                                        ";  // (all bets are off after 120)

/** Returns an indent string with the given number of steps, at 2 spaces
 * per step.
 */
	public static final String	INDENT(int in) {
		return SPACES.substring(0, 2*in);
	}

/** Adjusts the lines of the given XML line sequence by applying the
 * appropriate indentation to each one, returning the given (albeit now
 * modified) sequence itself.  Assumes the XML is all tags, one tag per
 * line.  Cheap 'n' cheerful, although with luck it will handle
 * comments and PIs.
 */
	public static String[] indentXML(String[] lines)	{

		final int N = lines.length;
		int in = 0;

		for (int i=0; i!=N; ++i) {

			final String LN = lines[i].trim();

			if (LN.startsWith("</")) { -- in; }

			lines[i] = INDENT(in)+lines[i];

			// Ugly but effective, I hope:
			if (LN.startsWith("</")) { }
			else if(LN.startsWith("<!--")) { }
			else if(LN.startsWith("<?")) { }
			else if (LN.endsWith("/>")) { }
			else if (LN.endsWith("?>")) { }
			else if (LN.endsWith("-->")) { }
			else if (LN.endsWith(">")) { ++ in; }
		}

		return lines;
	}

/** Returns the string array obtained by concatenating pairs of
 * corresponding items in the two given arrays, which are assumed
 * to be of the same length.
 */
	public static String[] zip(String[] ss0, String[] ss1) {

		final int N = ss0.length;

		String[] sszip = new String[N];
		for (int i=0; i!=N; ++i) { sszip[i] = ss0[i] + ss1[i]; }

		return sszip;
	}

/** Returns a single new string array obtained by merging the given list
 * of string arrays.
 */
	public static String[] merge(String[][] slists) {

		ArrayList<String> alist = new ArrayList<String>();
		for (String[] list : slists) {
			for (String s : list) {
				alist.add(s);
			}
		}
		final int N = alist.size();
		String[] strs = new String[N];
		alist.toArray(strs);
		return strs;
	}

/** Merges the given sequence of lines into a single multi-line string
 * (with one EOL character at the end of each line).
 */
	public static String joinLines(String[] lines) {

		return join(lines, EOL, true);
	}

/** Merges the given sequence of strings using the given separator string. */
	public static String join(String[] sseq, String sep) {

		return join(sseq, sep, false);
	}

/** Joins the given string sequence into a single string using the
 given separator string, treating that separator as a terminator
 -- thus appending it to the final item if any -- as specified.
 */
	public static String join(
		String[] sseq, String sep, boolean treat_sep_as_term) {

		String pfx = "";

		StringBuilder buf = new StringBuilder(sseq.length*20);
		for (String sym : sseq) {
			buf.append(pfx).append(sym);
			pfx = sep;
		}
		// If there was a last item, and a following separator is needed
		// then supply it.
		if (treat_sep_as_term) { buf.append(pfx); }
		return buf.toString();
	}

/** Merges the given sequence of lines into a single multi-line string
 * (with one EOL character at the end of each line).
 */
	public static String joinLines(List<String> items) {

		return join(items, EOL, true);
	}

/** Merges the given sequence of lines into a single multi-line string
 * (with one EOL character at the end of each line).
 */
	public static String join(List<String> sseq, String sep) {

		return join(sseq, sep, false);
	}

/** Merges the given sequence of strings using the given separator string. */
	public static String join(
		List<String> sseq, String sep, boolean treat_sep_as_term) {

		return join(sseq.toArray(new String[0]), sep, treat_sep_as_term);
	}

/** Trims each string in the given sequence, returning the (modified)
 * sequence itself.
 */
	public static String[] trim(String[] sseq) {
		final int N = sseq.length;
		for (int i=0; i!=N; ++i) { sseq[i] = sseq[i].trim(); }
		return sseq;
	}

/** Returns the decimal string for the given (non-negative) number with
 * enough leading zeros to make the string length match that required
 * for the given (non-negative) maximum value.
 */
	public static String numStr(int num, int max) {

		return String.format("%0"+nDigits(max)+"d", num);
	}

/** Returns the number of (decimal) digits needed to print (non-negative) n. */
	public static int nDigits(int n) {

		int nd = 1, nn = n / 10;  // n==0 is a special case, gives nd==1;
		while (nn != 0) { nn /= 10; ++nd; }
		return nd;
	}
}
