/*
 * ToSafeASCIIMap.java		2010-12-02
 */
package util;


import java.util.Properties;
import java.util.Enumeration;

import util.ResourceUtils;


/** Defines a mapping of UCS characters to "safe" characters, or short
 * strings of such characters, the safe characters all being in
 * the visible ASCII range.
 *
 * A factory method provides access to the singleton instance.
 */
public class ToSafeASCIIMap {

/** Path for properties file giving the mapping for characters in the
 * range U+00A0 to U+00FF.
 */
	protected static final String
	A0_FF_MAP	= "util/mapA0-FFRangeToASCII.txt";

/** The singleton instance of this class, for the use of the
 * factory method below.
 */
	private static ToSafeASCIIMap			TO_SAFE_ASCII_MAP = null;

//	public static void main(String[] args) {
//		ToSafeASCIIMap CAMAP = getToSafeASCIIMap();
//		for (char ch=(char)32; ch!=256; ++ch) {
//			String cch = CAMAP.safe(ch);
//			String chrep =
//				((32 <= ch && ch < 128) || (A0 <= ch)) ? ""+ch : "";
//			System.out.printf("%4x (%3s) --> %s\n", (int)ch, chrep, cch);
//		}
//	}

/** Factory method, returning the singleton map object. */
	public static final ToSafeASCIIMap getToSafeASCIIMap() {

		// Start with an unsynchronized check, so as to make the
		// more routine case fast.
		if (TO_SAFE_ASCII_MAP == null) {

			synchronized (ToSafeASCIIMap.class) {

				// Now we're synchronized, make sure we really still
				// need to do the work.
				if (TO_SAFE_ASCII_MAP == null) {
					TO_SAFE_ASCII_MAP = new ToSafeASCIIMap(A0_FF_MAP);
				}
			}
		}

		return TO_SAFE_ASCII_MAP;
	}

/** Notional base index for the A0 - FF table. */
	public final static int					A0 = 0x00A0;
/** Notional limit index for the A0 - FF table. */
	public final static int					T_LIM = 0x0100;
/** Actual A0 - FF table size. */
	public final static int					A0_TABLE_SIZE = (T_LIM - A0);
/** The string for the default character. */
	public final static String				DEFAULT_CHAR = "_";

/** The table giving the string for each character in the range A0 to FF.
 * The elements are strings rather than characters to allow for the case
 * where a character can naturally be represented by a multi-character
 * sequence, e.g. "ae" for "latin small letter a with diaeresis".
 */
	public final  String[]					A0_TABLE;

/** Constructs a new map as defined by the given properties file.
 */
	private ToSafeASCIIMap(String a0propspath) {

		this.A0_TABLE = new String[A0_TABLE_SIZE];
		for (int i=0; i!=A0_TABLE_SIZE; ++i) {
			this.A0_TABLE[i] = DEFAULT_CHAR;
		}

		Properties props = ResourceUtils.getProperties(a0propspath);
		Enumeration<?>	pen = props.propertyNames();
		while (pen.hasMoreElements()) {
			String chrstr = (String)(pen.nextElement());
			final int CHAR = Integer.decode(chrstr).intValue();
			final String TOK = getFirstPropsToken(props, chrstr);
			this.A0_TABLE[CHAR - A0] = (TOK != null ? TOK : DEFAULT_CHAR);
		}
	}

/** Returns the safe ASCII string for the given character. */
	public String safe(char ch) {

		String cstr = this.map(ch);

		return (cstr != null ? cstr : ""+ch);
	}

/** Returns the safe ASCII version of the given string. */
	public String safe(String str) {

		StringBuilder sbuf = new StringBuilder((int)(1.5 * str.length()));

		for (char ch : str.toCharArray()) {
			String chstr = this.map(ch);
			if (chstr != null) { sbuf.append(chstr); }
			else { sbuf.append(ch); }
		}

		return sbuf.toString();
	}

/** If the given character is outside the set of safe ASCII values, maps
 * it to a short (non-empty) sequence of such values and returns that
 * string, or returns {@code null} otherwise.
 */
	private String map(char ch) {

		String cstr = null;
		if (!(
				(ch=='-') ||
				(ch=='_') ||
				(ch=='.') ||
				('a'<=ch && ch <= 'z') ||
				('A'<=ch && ch <= 'Z') ||
				('0'<=ch && ch <= '9')	)) {

			// The character is outside the safe range: if its in the
			// range of our mapping table use the string obtained that
			// way, or if it is '@' use the string "at", otherwise use
			// the default character string.
			cstr =
				A0 <= ch && ch < T_LIM ?
					this.A0_TABLE[ch - A0] :
					(ch == '@' ? "at" : DEFAULT_CHAR);
		}

		return cstr;
	}

/** Looks up the given string in the given properties set,
 * and returns the first token in the result if possible,
 * or returns {@code null} in any case of failure.
 */
	protected static String getFirstPropsToken(
			Properties props, String istr)
	{
		String pfirst = null;
		String pstr = props.getProperty(istr);
		if (pstr != null) {
			String ptoks[] = pstr.split("[ \\t]+", 2);
			if (ptoks[0].length() != 0) {
				pfirst = ptoks[0];
			}
		}

		return pfirst;
	}

}
