/*
 * HNSOctetUCSMap.java	2003-09-05
 *
 * Ralph Elliott  <mailto:re@cmp.uea.ac.uk>
 */
package util;


import java.util.Properties;
import java.util.Enumeration;

import java.util.HashMap;

import util.ResourceUtils;


/** Defines mappings in both directions between HNS 8-bit character
 * code values (octets) and HamNoSys-UCS characters, most of
 * which are in the UCS private use area.
 *
 * There are factory methods providing access to singleton instances,
 * one which maps all UCS character values, and one which leaves
 * UCS values below 256 unchanged.
 */
public class HNSOctetUCSMap {


/** Token for unused entry in mapping file. */
	protected static final String
	HNS_UNUSED					= "UNUSED";
/** Path for standard HNS mapping file. */
	protected static final String
	HNS_OCTET_UCS_PROPS_FILE	= "util/HNSOctetToUnicode.txt";


/** The two "singleton" instances of this class, for the use of the
 * factory methods below.
 */
	private static HNSOctetUCSMap	MAP_OCTET_HIGH_UCS = null;
	private static HNSOctetUCSMap	MAP_OCTET_ALL_UCS = null;

/** Factory method, returning the singleton map that includes
 * high UCS HamNoSys character values only.
 */
	public static final HNSOctetUCSMap getHNSMapOctetHighUCS()
	{
		return HNSOctetUCSMap.getHNSMapOctetUCS(false);
	}

/** Factory method, returning a the singleton map that includes
 * all UCS HamNoSys character values.
 */
	public static final HNSOctetUCSMap getHNSMapOctetAllUCS()
	{
		return HNSOctetUCSMap.getHNSMapOctetUCS(true);
	}

/** Factory method, returning one of two singleton instances of this
 * class, depending on the value of the given flag {@code DO_ALL_UCS}:
 * if this flag is false then only high ({@code >= 256}) character
 * values are included in the mapping, but if it is true all values,
 * including those such as UCS value 44 ({@code hamcomma}, for which
 * the corresponding value in the old 8-bit HNS code is the octet
 * value 36.
 */
	public static final HNSOctetUCSMap getHNSMapOctetUCS(
		final boolean DO_ALL_UCS)
	{
		// Start with an unsynchronized check, so as to make the
		// more routine case fast.
		if ((DO_ALL_UCS ?
			MAP_OCTET_ALL_UCS : MAP_OCTET_HIGH_UCS) == null) {

			synchronized (HNSOctetUCSMap.class) {

				// Now we're synchronized, make sure we really still
				// need to do the work.
				if ((DO_ALL_UCS ?
					MAP_OCTET_ALL_UCS : MAP_OCTET_HIGH_UCS) == null) {

					// Create a new map of the required kind.
					HNSOctetUCSMap newmap =
						new HNSOctetUCSMap(
								HNS_OCTET_UCS_PROPS_FILE, DO_ALL_UCS);

					// Assign the new map to the right global variable.
					if (DO_ALL_UCS)
						MAP_OCTET_ALL_UCS = newmap;
					else
						MAP_OCTET_HIGH_UCS = newmap;
				}
			}
		}

		return (DO_ALL_UCS ? MAP_OCTET_ALL_UCS : MAP_OCTET_HIGH_UCS);
	}


/** "Null" octet value. */
	public final static int					O_NULL = 0;
/** Octet limit value. */
	public final static int					O_LIMIT = 256;
/** "Null" UCS value. */
	public final static char				U_NULL = (char)0x0FFFF;

/** "hamversion40" octet value. */
	public final static int					O_HAMVERSION40 = 20;
/** "hamspace" octet value. */
	public final static int					O_HAMSPACE = 32;


/** Map classic HNS octets to HNS-4 UCS codepoints. */
	protected char[]						o2u;
/** Map HNS-4 UCS codepoints to classic HNS octets. */
	protected HashMap<Character,Integer>	u2o;

/** "hamversion40" UCS value. */
	protected final char					U_HAMVERSION40;
/** "hamspace" UCS value. */
	protected final char					U_HAMSPACE;

/** Constructs a new map as defined by the given properties file.
 */
	protected HNSOctetUCSMap(
		String propspath, final boolean DO_ALL_UCS) {

		this.o2u = new char[256];
		for (int i=0; i!=O_LIMIT; ++i) {
			this.o2u[i] = U_NULL;
		}

		char uhamver = U_NULL;
		char uhamspace = U_NULL;

		Properties props = ResourceUtils.getProperties(propspath);
		Enumeration<?>	pen = props.propertyNames();
		while (pen.hasMoreElements()) {
			String ostr = (String)(pen.nextElement());
			final int OCT = Integer.decode(ostr).intValue();
			final int UCS = getFirstPropsInt(props, ostr);
			if (DO_ALL_UCS || O_LIMIT <= UCS) { this.o2u[OCT] = (char)UCS; }
			if (OCT == O_HAMVERSION40) { uhamver = (char)UCS; }
			if (OCT == O_HAMSPACE) { uhamspace = (char)UCS; }
		}

		this.u2o = new HashMap<Character,Integer>(512, (float)0.5);
		for (int i=0; i!=O_LIMIT; ++i) {
			if (this.o2u[i] != U_NULL) {
				this.u2o.put(new Character(o2u[i]), new Integer(i));
			}
		}

		// Somewhat imprecise to use IAE here, but it's better than nothing.
		if (uhamver == U_NULL) {
			throw new IllegalArgumentException(
				"HNSOctetUCSMap: missing UCS hamversion40.");
		}
		if (uhamspace == U_NULL) {
			throw new IllegalArgumentException(
				"HNSOctetUCSMap: missing UCS hamspace.");
		}

		this.U_HAMVERSION40 = uhamver;
		this.U_HAMSPACE = uhamspace;
	}

/** Returns the HNS-4 UCS character for the given classic HNS octet
 * value.
 */
	public char ucs(int o) {

		// not ASSERT: 0 <= o < oLim == 256
		//return this.o2u[o];
		return (0 <= o && o < O_LIMIT ? this.o2u[o] : U_NULL);
	}

/** Acts like {@link #ucs(int)}, unless the given code represents a
 * whitespace character, in which case the result is the "null" UCS
 * value, U+FFFF.
 */
	public char cleanUCS(int o) {
		return
			O_NULL < o  &&
			o <= O_HAMSPACE  &&
			o != O_HAMVERSION40  ? U_NULL : ucs(o);
	}

/** Returns the string of HNS-4 UCS characters corresponding to the
 * given sequence of classic HNS octet values.
 */
	public String ucs(byte[] octets) {

		final int N = octets.length;
		StringBuilder ubuf = new StringBuilder(N);
		for (int i=0; i!=N; ++i) {
			ubuf.append(this.ucs(unsigned(octets[i])));
		}
		String ustr = ubuf.toString();
		ubuf.setLength(0);

		return ustr;
	}

/** Returns the string of HNS-4 UCS characters corresponding to the
 * given string of classic HNS octet values.
 */
	public String ucs(String ostr) {

		final int N = ostr.length();
		StringBuilder ubuf = new StringBuilder(N);
		for (int i=0; i!=N; ++i) {
			ubuf.append(this.ucs(ostr.charAt(i)));
		}
		String ustr = ubuf.toString();
		ubuf.setLength(0);

		return ustr;
	}

/** Returns the same value as {@link #hns8(char)} (as an int). */
	public int octet(char u) { return this.hns8(u); }

/** If the given character value is an HNS-4 UCS character,
 * returns (as a character) the corresponding classic HNS octet,
 * otherwise the "null" HNS value, 0.
 */
	public char hns8(char u) {

		Integer oo = this.u2o.get(u);
		return (oo == null ? O_NULL : (char)(oo.intValue()));
	}

/** Acts like {@link #hns8(char)}, unless the given character is a
 * whitespace character, in which case the result is the "null" HNS
 * value, 0.
 */
	public char cleanHNS8(char u) {
		return
			u <= U_HAMSPACE  &&
			u != U_HAMVERSION40 ? O_NULL : this.hns8(u);
	}

/** Returns (as a character-string) the sequence of classic HNS octets
 * corresponding to the given sequence of HNS-4 UCS characters.
 */
	public String hns8(String ustr) {

		final int N = ustr.length();
		StringBuilder obuf = new StringBuilder(N);
		for (int i=0; i!=N; ++i) {
			obuf.append(this.hns8(ustr.charAt(i)));
		}
		String ostr = obuf.toString();
		obuf.setLength(0);

		return ostr;
	}

/** Returns a clean version of the given string, eliminating any invalid
 * or whitespace HNS-UCS characters, and outputs a message mentioning
 * the given index for each rejected character.
 */
	public String cleanHNSU(String hnsu, int ix) {

		String hnsuclean = hnsu;

		// Count invalid HNS-Unicode characters in the string hnsu.
		char[] hnsua = hnsu.toCharArray();
		final int N = hnsua.length;
		int j, nbad = 0;
		for (j=0; j!=N; ++j) {
			if (this.cleanHNS8(hnsua[j]) == O_NULL) { ++ nbad; }
		}

		if (nbad != 0) {
			// Move good characters (index g) down, eliminating the bad ones.
			final int NG = N - nbad;
			char[] hnsugd = new char[NG];
			int g = 0;  j = 0;
			for (char chu : hnsua) {
				if (this.cleanHNS8(chu) != O_NULL)  {
					hnsugd[g] = chu;  ++g;
				}
				else  {
					final String CHU_HEX = String.format("%04X", (int)chu);
					invalidMsg(ix, j, "HNS-UCS character U+"+CHU_HEX);
				}
				++ j;
			}

			hnsuclean = new String(hnsugd);
		}

		return hnsuclean;
	}

/** Returns a clean version of the given string, eliminating any invalid
 * HNS-8 characters, and outputs a message mentioning the given index
 * for each invalid character.
 */
	public String cleanHNS8(String hns8, int ix) {

		String hns8clean = hns8;

		// Count invalid HNS8 characters in the string hns8.
		char[] hns8a = hns8.toCharArray();
		final int N = hns8a.length;
		int j, nbad = 0;
		for (j=0; j!=N; ++j) {
			if (this.cleanUCS(hns8a[j]) == U_NULL) { ++ nbad; }
		}

		if (nbad != 0) {
			// Move good characters (index g) down, eliminating the bad ones.
			final int NG = N - nbad;
			char[] hns8gd = new char[NG];
			int g = 0;  j = 0;
			for (char ch8 : hns8a) {
				if (this.cleanUCS(hns8a[j]) != U_NULL) {
					hns8gd[g] = ch8; ++g;
				}
				else {
					invalidMsg(ix, j, "HNS-8 code "+(int)ch8);
				}
				++ j;
			}

			hns8clean = new String(hns8gd);
		}

		return hns8clean;
	}

/** Looks up the given string in the given properties set, and tries
 * to interpret the first token in the result as an integer value,
 * returning this value if successful, or the null UCS value
 * otherwise.
 */
	protected static int getFirstPropsInt(
		Properties props, String istr)
	{
		int val = U_NULL;
		String valstr = getFirstPropsToken(props, istr);
		if (valstr != null) {
			if (!valstr.equals(HNS_UNUSED)) {
				try {
					val = Integer.decode(valstr);
				}
				catch (NumberFormatException nfx) {
				}
			}
		}

		return val;
	}

/** Looks up the given string in the given properties set,
 * and returns the first token in the result if possible,
 * or returns {@code null} in any case of failure.
 */
	protected static String getFirstPropsToken(
			Properties props, String istr)
	{
		String pfirst = null;
		String pstr = props.getProperty(istr);
		if (pstr != null) {
			String ptoks[] = pstr.split("[ \\t]+", 2);
			if (ptoks[0].length() != 0) {
				pfirst = ptoks[0];
			}
		}

		return pfirst;
	}

/** Returns the given byte value as an unsigned integer in the
 * range [0..256).
 */
	private static int unsigned(byte b) { return b & 0xFF; }

/** Output a message on the standard error string, saying that within
 * the entry with the given first index the item at the second given
 * index is invalid.
 */
	private static void invalidMsg(int i, int j, String item) {

		final String MSG =
			"In item "+i+": rejected "+item+" at position "+j+".";
		System.err.println(MSG);
	}
}
