/**
 * 
 */
package kr.ac.kaist.swrc.jhannanum.plugin.SupplementPlugin.PlainTextProcessor.InformalSentenceFilter;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.StringTokenizer;

import kr.ac.kaist.swrc.jhannanum.comm.PlainSentence;
import kr.ac.kaist.swrc.jhannanum.plugin.SupplementPlugin.PlainTextProcessor.PlainTextProcessor;

/**
 * @author Sangwon Park (hudoni@world.kaist.ac.kr), CILab, SWRC, Kaist
 *
 */
public class InformalSentenceFilter implements PlainTextProcessor {
	final static private int REPEAT_CHAR_ALLOW = 10;

	@Override
	public PlainSentence doProcess(PlainSentence ps) {
		String word = null;
		String res = "";

		StringTokenizer st = new StringTokenizer(ps.getSentence(), " \t");

		while (st.hasMoreTokens()) {
			word = st.nextToken();

			/* ݺǴ Ưȣ Ǵ 1  ó */
			if (word.length() > REPEAT_CHAR_ALLOW) {
				char[] wordArray = word.toCharArray();
				int repeatCnt = 0;
				char checkChar = wordArray[0];

				if (checkChar != '"' && checkChar != '\'') {
					res += checkChar;
				}

				for (int i = 1; i < wordArray.length; i++) {
					if (checkChar == wordArray[i]) {
						if (repeatCnt == REPEAT_CHAR_ALLOW - 1) {
							continue;
						}
						repeatCnt++;
					} else {
						checkChar = wordArray[i];
						repeatCnt = 0;
					}
					if (checkChar != '"' && checkChar != '\'') {
						res += checkChar;
					}
				}
			} else {
				res += word;
			}
			res += ' ';
		}
		ps.setSentence(res);
		return ps;
	}


	@Override
	public void initialize(String configFile) throws FileNotFoundException, IOException {

	}


	@Override
	public PlainSentence flush() {
		return null;
	}


	@Override
	public void shutdown() {

	}

	@Override
	public boolean hasRemainingData() {
		return false;
	}
}
