#!/usr/local/bin/perl -w

# parse_primers.pl
# Maureen Liu, Sanger Institute, May/2005
# Use Bioperl to process my CpG methylation assay primers
# Input: primers and pcr fragments, in fasta
# Output: primer list for sigma-genosis form
#         restriction site count for methylation assay spreadsheet


# location of bioperl
use lib '/nfs/disk100/pubseq/PerlModules/Ensembl/www_38_1/bioperl-live';

use Bio::Seq;
use Bio::SeqIO;
use Bio::Tools::RestrictionEnzyme;

# input and output files===============================================

# Get the primer file
print "What's the primer file: "; 
$primer_file = <STDIN>;
chomp $primer_file;

# Get the pcr file
print "What's the pcr file: "; 
$pcr_file = <STDIN>;
chomp $pcr_file;

# Output primer list for ordering
$sigma="primer_sigma.txt";
open (SIGMA, ">$sigma") or die "Can't open output:$sigma\n";

# Output restriction digest file
$digest="digest.txt";
open (DIGEST, ">$digest") or die "Can't open output:$digest\n";

# primers for sigma====================================================

# get primer list
$primerIO = Bio::SeqIO->new(-file => $primer_file, 
                            -format => "fasta" );

# store primer id and seq in hash for later use
%primer=();

# print primer list for sigma order form
while ($primer = $primerIO->next_seq) {   
  my $primer_id = $primer->display_id;
  my $primer_seq = $primer->seq;
  print SIGMA $primer_id, "\t", $primer_seq,"\n";
  $primer{$primer_id} = $primer_seq;
}

# restriction digest of pcr fragments==================================

# get pcr fragment list
$pcrIO = Bio::SeqIO->new(-file => $pcr_file, 
                         -format => "fasta" );

# count restriction enzyme digest sites for HpaII
# warn if there is HindIII site

$HindIII = new Bio::Tools::RestrictionEnzyme(-name=>"HindIII");
$MspI = new Bio::Tools::RestrictionEnzyme(-name=>"MspI");

while ($pcr = $pcrIO->next_seq) {

  # get relevant primers from %primer
  my $name = $pcr->display_id;
  my $primerA = $name."A";
  $primerA = $primer{$primerA};
  my $primerB = $name."B";
  $primerB = $primer{$primerB};
  my $length = $pcr->length;
    
  # use subroutine cut_freq to calculate cutting sites
  my $cut_C = &cut_freq($HindIII,$pcr);
  my $cut_M = &cut_freq($MspI,$pcr);
  
  if ($cut_C == 0) {    
    print DIGEST "$name\t$primerA\t$primerB\t$cut_M\t$length bp\n";
    
  }
  else {print DIGEST "$name\tHindIII cuts!!!\n";}  
}

# subroutine return number of cutting sites============================
sub cut_freq
{
  my @cut = $_[0]->cut_seq($_[1]);
  $#cut;
}

exit;
