import java.io.*;
import java.net.*;

import java.util.regex.*;
import com.Ostermiller.util.*;

public class SweepFileConversion {


    private static final String MARKER_ID = "marker id";
    private static final String CHROMOSOME = "chromosome";
    private static final String POSITION = "position";
    private static final String POS_START = "pos_start";
    private static final String POS_END = "pos_end";

    private static final String[] POPULATIONS = {"CEU", "JC", "YRI"};

    private static Pattern snpPattern = Pattern.compile("[ ]+- (rs[0-9]+): ([0-9]+)");
    private static Pattern haplotypePattern = Pattern.compile("[ ]+- ([A-Z0-9]+)_([c12]+): ([AGCT-]+)");


    public static void main(String[] args) throws Exception {

        if (args.length < 1) {
            System.out.println("Useage: java SweepFileConversion <input-file>");
            System.exit(-1);
        }

        File outputTopDirectory = new File("output");
        outputTopDirectory.mkdirs();

        String inputFile = args[0];

		for (String population: POPULATIONS) {

			File outputDirectory = new File(outputTopDirectory, population);
			outputDirectory.mkdir();

			File manyFile = new File(outputDirectory, "population_" + population + ".many");
			PrintWriter manyFileWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(manyFile)));

			LabeledCSVParser inputFileParser = new LabeledCSVParser(
				new CSVParser(new FileInputStream(new File(inputFile)), '\t')
			);


			while (inputFileParser.getLine() != null) {

				// Read information on next sample.
				String markerId = inputFileParser.getValueByLabel(MARKER_ID);
				String chromosome = inputFileParser.getValueByLabel(CHROMOSOME);
				String position = inputFileParser.getValueByLabel(POSITION);
				String posStart = inputFileParser.getValueByLabel(POS_START);
				String posEnd = inputFileParser.getValueByLabel(POS_END);

				String haplotypeOutputName = markerId + "_" + chromosome + "_" + population + ".emphase";
				String snpOutputName = markerId + "_" + chromosome + "_" + population + ".snp";


				// Construct HapMap URL.
				String urlString = "http://www.hapmap.org/cgi-perl/phased?pop=" + population
					+ "&chr=" + chromosome
					+ "&start=" + posStart
					+ "&stop=" + posEnd
					;

				URL url = new URL(urlString);
				URLConnection urlConnection = url.openConnection();
				BufferedReader reader = null;
				try {
					reader = new BufferedReader(
						new InputStreamReader(new BufferedInputStream(urlConnection.getInputStream()))
					);
				} catch (Exception e) {
					System.out.println("Failed to connect to URL " + urlString);
					throw e;
				}

				System.out.println("Processing chromosome " + chromosome + " and population " + population + ":");


				try {

					String inputLine = null;
					while ((inputLine = reader.readLine()) != null && !inputLine.startsWith("snps")) {}


					//
					// Process SNPs
					//

					File snpOutputFile = new File(outputDirectory, snpOutputName);
					PrintWriter snpWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(snpOutputFile)));

					// Write header line.
					snpWriter.print("snpid");
					snpWriter.print('\t');
					snpWriter.print("chr");
					snpWriter.print('\t');
					snpWriter.print("HG17");
					snpWriter.println();

					try {

						// Find SNP list in file.
						boolean processingSnps = ((inputLine = reader.readLine()) != null);
						while (processingSnps) {
							Matcher matcher = snpPattern.matcher(inputLine);
							if (matcher.matches()) {

								snpWriter.write(matcher.group(1));
								snpWriter.print('\t');
								snpWriter.print(chromosome);
								snpWriter.print('\t');
								snpWriter.print(matcher.group(2));
								snpWriter.println();

								processingSnps = ((inputLine = reader.readLine()) != null);
							} else {
								processingSnps = false;
							}
						}
					} finally {
						snpWriter.close();
					}
					System.out.println("\t- Finished writing SNP information to " + snpOutputName);



					//
					// Process haplotypes.
					//

					if (inputLine.startsWith("phased_haplotypes")) {
						File haplotypeOutputFile = new File(outputDirectory, haplotypeOutputName);
						PrintWriter haplotypeWriter = new PrintWriter(new OutputStreamWriter(new FileOutputStream(haplotypeOutputFile)));

						try {

							String lastSample = null;
							String lastCs = null;
							int runningSampleCount = 0;

							// Find haplotypes list in file.
							boolean processingHaplotypes = ((inputLine = reader.readLine()) != null);
							while (processingHaplotypes) {
								Matcher matcher = haplotypePattern.matcher(inputLine);
								if (matcher.matches()) {

									String sample = matcher.group(1);
									String cs = matcher.group(2).equals("c1") ? "T" : "U";
									String sequence = matcher.group(3);

									// If we are looking at a different sample from the previous one,
									// see if we need to add an extra line because of X chromosome.
									if (!sample.equals(lastSample)) {
										if (runningSampleCount == 1) {
											haplotypeWriter.write(lastSample);
											haplotypeWriter.print('\t');
											haplotypeWriter.print(lastCs.equals("T") ? "U" : "T");

											for (int i = 0; i < sequence.length(); i++) {
												haplotypeWriter.print('\t');
												haplotypeWriter.print('5');
											}
											haplotypeWriter.println();
										}
										runningSampleCount = 0;
									}
									runningSampleCount++;

									haplotypeWriter.write(sample);
									haplotypeWriter.print('\t');
									haplotypeWriter.print(cs);

									for (int i = 0; i < sequence.length(); i++) {
										haplotypeWriter.print('\t');
										haplotypeWriter.print(encode(sequence.charAt(i)));
									}

									haplotypeWriter.println();

									// Remember the previous sample.
									lastSample = sample;
									lastCs = cs;

									processingHaplotypes = ((inputLine = reader.readLine()) != null);
								} else {
									processingHaplotypes = false;
								}
							}
						} finally {
							haplotypeWriter.close();
						}
						System.out.println("\t- Finished writing haplotype information to " + haplotypeOutputName);
						System.out.println();

					}
				} finally {
					try {
						reader.close();
					} catch (Exception e) {
						e.printStackTrace();
					}
				}

				// Append information to summary ".many" file.
				manyFileWriter.print(haplotypeOutputName);
				manyFileWriter.print('\t');
				manyFileWriter.print(snpOutputName);
				manyFileWriter.println();

			}

			try {
				manyFileWriter.close();
			} catch (Exception e) {
				e.printStackTrace();
			}

		}
    }

    private static char encode(char letter) {
        switch (letter) {
            case 'A':
                return '1';
            case 'C':
                return '2';
            case 'G':
                return '3';
            case 'T':
                return '4';
            default:
                return '5';
        }
    }

}