#! /usr/local/bin/perl
#
################################################################################
# Requirements                                                                 #
################################################################################
#
require 5.002;
#require "GUSI.ph";   #MacOS
#
# External References
#
use GD;
use Carp;
use FileHandle;
use File::Path;
use File::Basename;
use Getopt::Std;    #UNIX

use vars qw($opt_d $opt_n $opt_h);
#
################################################################################
# Main program global variables                                                #
################################################################################
#
my $log_number = 0;
my $seq_number = 0;
my $C_anzahl   = 0;
my $c_anzahl   = 0;
my $a_anzahl   = 0;
my $g_anzahl   = 0;
my $t_anzahl   = 0;
my $n_anzahl   = 0;
my $ct_anzahl  = 0;
my $ca_anzahl  = 0;
my $cg_anzahl  = 0;
my $tc_anzahl  = 0;
my $ta_anzahl  = 0;
my $tg_anzahl  = 0;
my $ac_anzahl  = 0;
my $ag_anzahl  = 0;
my $at_anzahl  = 0;
my $ga_anzahl  = 0;
my $gc_anzahl  = 0;
my $gt_anzahl  = 0;
my $logfile;
my $seqfile;
my @seqfiles;
my @logfiles;
my $seqfilelist;
my $logfilelist;
my $log;
my $seq;
#
#
# MAN page
#
my $MAN = <<MAN;
NAME

     EvalLog: Program seaches for unexpected base exchanges in log-files

AVAILABILITY

     Requires (Mac)perl 5.002

DESCRIPTION

     The fidelity of the PCR and the sequencing reaction has a strong 
     impact on the accuracy of the methylation data. It can be estimated 
     from the number of base exchanges other than C to T conversions 
     recorded in the log-file. The "EvalLog" program scans the log-file 
     for these  substitutions, and determines in parallel the total number 
     of the five bases 5mC, c, t, a and g in the corresponding seq1 files. 
     These values are written to a text-file and can be 
     used to estimate the error rate of the reactions. The log-file and the 
     seq1-files are expected to be in the same subdirectory.

SYNOPSIS

     EvalLog.pl [-d] directory [-n] outfile [-h] 
     (replaced by file selector box under MacOS)

OPTIONS

    -d string directory containing the log-files and seq1-files
    -n string name of the output file
    -h        Prints this message ('Cancel' 'Cancel' under MacOS)
 
AUTHOR(S)

     Christoph Grunau (cgrunau\@imb-jena.de)

CHANGE LOG

     Nov.   1st, 1998 CG   Initial version
     Sep.  30th, 1999 CG   minor revisions
MAN
#
# Command line parsing 
#
getopts('d:n:h') || croak($MAN);              #UNIX
#$opt_d = &GetFolder("Choose input folder");    #MacOS
#$opt_n = &PutFile("Choose output error file"); #MacOS

#
# Check command line arguments
#
croak($MAN) if ( $opt_h || ! defined($opt_d) || $opt_d eq "" );

chdir $opt_d;
@logfiles = <*.log>;                  #write all log-files into a list
$log_number = $#logfiles+1;
if ($log_number < 1) 
{print "Sorry! No log-files available. Please try option -h.\n"}
else
{
print "\nAnalyzing log-files";
foreach $logfilelist(@logfiles)
      {
	     $log = "";
	     open (LOG, "<$logfilelist");  #transfer log-files
	     print (".");                  #show program is running
	     while (<LOG>)
                   {
	                  #
		                 # read line by line, remove Newline
		                 # make text from lines
		                 #
		                 $logfile = $_;
                   chomp $logfile;
                   $log = $log . $logfile;
	                  }
      close LOG;
      #
      # search for base exchanges in log-files
      #
      while ($log =~ /"c:t"/g)   {$ct_anzahl++;}
      while ($log =~ /"c:a"/g)   {$ca_anzahl++;}
      while ($log =~ /"c:g"/g)   {$cg_anzahl++;}
      while ($log =~ /"t:c"/g)   {$tc_anzahl++;}
      while ($log =~ /"t:a"/g)   {$ta_anzahl++;}
      while ($log =~ /"t:g"/g)   {$tg_anzahl++;}
      while ($log =~ /"a:c"/g)   {$ac_anzahl++;}
      while ($log =~ /"a:g"/g)   {$ag_anzahl++;}
      while ($log =~ /"a:t"/g)   {$at_anzahl++;}
      while ($log =~ /"g:a"/g)   {$ga_anzahl++;}
      while ($log =~ /"g:c"/g)   {$gc_anzahl++;}
      while ($log =~ /"g:t"/g)   {$gt_anzahl++;}
      }

@seqfiles = <*.seq1>;                    #write all.seq1-files into a list
$seq_number = $#seqfiles+1;
if ($seq_number < 1) {croak "No seq1-files available. Try option -h.\n";}
print "\nAnalyzing seq1-files";
foreach $seqfilelist(@seqfiles)
      {
	     $seq = "";
	     open (SEQUENCE, "<$seqfilelist");  #transfer sequences
	     print (".");                       #show program is running
	     while (<SEQUENCE>)
            {
		          #
		          # read line by line, remove Newline and
		          # make sequence out of lines
		          #
	           $seqfile = $_;
	           chomp $seqfile;
	           unless ($seqfile =~ /^>/) {$seq = $seq . $seqfile;}
	           }
      close SEQUENCE;
      #
      # count bases
      #
      while ($seq =~ /C/g)   {$C_anzahl++;}
      while ($seq =~ /c/g)   {$c_anzahl++;}
      while ($seq =~ /t/g)   {$t_anzahl++;}
      while ($seq =~ /a/g)   {$a_anzahl++;}
      while ($seq =~ /g/g)   {$g_anzahl++;}
      while ($seq =~ /n/g)   {$n_anzahl++;}
      }
print "\n";
#
# Write results to file.
#
$path = `pwd`;
chomp $path;

if (!defined $opt_n)
   {
    ($base = $path) =~ s#.*[/:]##;    #from path name end to last / or :
    }

else {
     $opt_n =~ tr/a-zA-Z0-9_\.\///cd;  #UNIX
     $base  =  $opt_n;
     }

if ($base !~ /\//) {$base =  substr ($base,0,28);}        #UNIX
unless ($base =~ /\.err$/i) {$base = $base.".err";}

open ERROR_FILE, ">$base" || croak("Cannot open file '$base'.");
print ERROR_FILE "Unexpected base exchanges:\n";
print ERROR_FILE "c:t\t",$ct_anzahl,"\n";
print ERROR_FILE "c:a\t",$ca_anzahl,"\n";
print ERROR_FILE "c:g\t",$cg_anzahl,"\n";
print ERROR_FILE "t:c\t",$tc_anzahl,"\n";
print ERROR_FILE "t:a\t",$ta_anzahl,"\n";
print ERROR_FILE "t:g\t",$tg_anzahl,"\n";
print ERROR_FILE "a:c\t",$ac_anzahl,"\n";
print ERROR_FILE "a:g\t",$ag_anzahl,"\n";
print ERROR_FILE "a:t\t",$at_anzahl,"\n";
print ERROR_FILE "g:a\t",$ga_anzahl,"\n";
print ERROR_FILE "g:c\t",$gc_anzahl,"\n";
print ERROR_FILE "g:t\t",$gt_anzahl,"\n";
print ERROR_FILE "\n";
print ERROR_FILE "in total number of bases:\n";
print ERROR_FILE "5mC:\t",$C_anzahl,"\n";
print ERROR_FILE "  C:\t",$c_anzahl,"\n";
print ERROR_FILE "  T:\t",$t_anzahl,"\n";
print ERROR_FILE "  A:\t",$a_anzahl,"\n";
print ERROR_FILE "  G:\t",$g_anzahl,"\n";
print ERROR_FILE "  n:\t",$n_anzahl,"\n";
print ERROR_FILE
"sum:\t",$n_anzahl+$g_anzahl+$a_anzahl+$t_anzahl+$c_anzahl+$C_anzahl,"\n";
print ERROR_FILE "\n";
print ERROR_FILE "in ",$seq_number," individual sequences.\n";

close ERROR_FILE || croak("Cannot close open file '$base'.");
print "\nError report written to file '$base'."
}
##############################################################################
# SUBROUTINES                                                                #
# part of the Standard File Package Utility for MacPerl 4.1.1                #
#                                                                            #
# 1994.01.05 v4.1.1 Matthias Neeracher <neeri@iis.ee.ethz.ch>                #
#  Minor changes to reflect future plans for standard file support.          #
#                                                                            #
# 1993.10.27 v1.2	wm                                                         #
#	Change the calling syntax to adopt the 4.1.0 release.                      #
#                                                                            #
# 1993.10.19 v1.1	wm                                                         #
#	convert for 4.1b6                                                          #
#                                                                            #
# 1993.8.10  V1.0                                                            #
#     Watanabe Maki (Watanabe.Maki@tko.dec.com)                              #
#                                                                            #
##############################################################################
# Name
#    PutFile/GetNewFile
# Syntax
#    $filename = &PutFile($prompt [, $default]);
#    $filename = &GetNewFile($prompt [, $default]);
# Description
#    Query a new file name to user by Standard File Dialog Box.
#    $prompt is a prompt sting on the dialog box.
#    $default is a default file name.
#
#  sub PutFile {
#      local($prompt, $default) = @_;
#      
#      &MacPerl'Choose(
#          &GUSI'AF_FILE,         # domain
#          0,                     # type
#          $prompt,               # prompt
#          "",                    # constraint
#          &GUSI'CHOOSE_NEW + ($default ?
#  &GUSI'CHOOSE_DEFAULT : 0),    
#  		  								 # flag 
#          $default               # default filename
#          );
#  }
######
# Name
#    GetFolder
# Syntax
#    $foldername = &GetFolder($prompt [, $default]);
# Description
# Query a folder name to user by Standard File Dialog Box.
# $default is the default dialog
#
#  sub GetFolder {
#  	local($prompt, $default) = @_;
#  	
#      &MacPerl'Choose(
#          &GUSI'AF_FILE,          # domain
#          0,                      # type
#          $prompt,                # prompt
#          "",                     # constraint
#          &GUSI'CHOOSE_DIR + ($default ?
#  &GUSI'CHOOSE_DEFAULT : 0),
#  		                          # flag
#  		  $default
#          );
#  }
