#!/usr/bin/perl -w

use strict;
use warnings;
use DateTime;

my $CASP = "CASP11";
my $HASH_MONTH = {
'JAN'=>1,
'FEB'=>2,
'MAR'=>3,
'APR'=>4,
'MAY'=>5,
'JUN'=>6,
'JUL'=>7,
'AUG'=>8,
'SEP'=>9,
'OCT'=>10,
'NOV'=>11,
'DEC'=>12
};

my $TOPBEST = 25;


die("the first argument should be name of target, e.g. T1234") if !defined($ARGV[0]);

my $target = $ARGV[0];

my %hash_LGA_S; # key pdb_template, value LGA_S

system ("for f in ../input/$target/*.res ; do echo \$f ; grep 'SUMMARY(LGA)' \$f; done > tmp");
sleep 1;

open IN, "< tmp ";

while(1){
     my $templ;
     if(!defined($templ = <IN>)){
	last;
     }
     if($templ !~ m/$target/){
	next;
     }
     chomp $templ;
     $templ = substr($templ, rindex($templ, '/') + 1); # ../input/R0001/1abcA.R0001.pdb.res
     $templ = substr($templ, 0, index($templ, '.') );
     if(defined(my $lga_line = <IN>)){
	if($lga_line !~ m/^SUMMARY\(LGA\)/){
	  next;
	}
	my @tokens = split(/\s+/, $lga_line);
	$hash_LGA_S{$templ} = $tokens[7];
     }
}

close(IN);
system("rm -f tmp");


#sort templates according to value LGA_S
my @sort_templs = sort {$hash_LGA_S{$b}<=>$hash_LGA_S{$a}} keys %hash_LGA_S;

my @final_templs;

my %tmp_hash;

# select 25 the best templates keeping only the best model from nmr structures
# only one chain (the best) is selected from one protein
foreach my $templ (@sort_templs){
   if(defined($tmp_hash{substr($templ,0,4)})){
	next;
   }else{
	$tmp_hash{substr($templ,0,4)} = 1;
   }
   # check date
   if(&checkDate($templ,$target) == 0){
	next;
   }
   push(@final_templs, $templ);
   if(scalar(@final_templs) >= $TOPBEST){
     last;
   }
}

# output 

if(-d "../output/$target"){
  system("/bin/bash -c \"rm -fr ../output/$target/*\"");
}else{
  system("mkdir -p ../output/$target");
}

if( -e "../output/$target.csv.filtered"){
  system("rm -f ../output/$target.csv.filtered");
}

foreach my $templ (@final_templs){
   my $templ_s = substr($templ,0,5);
   system("cat ../input/$target/$templ.$target.pdb.res ../input/$target/$templ.$target.pdb.pdb | grep '!!!' -v  > ../output/$target/$templ_s.$target.pdb.res");
   system("echo \"$templ_s,$hash_LGA_S{$templ}\" >> ../output/$target.csv.filtered");
}

# upload at predictioncenter.org
system("rsync -az -e ssh ../output/$target  p3:/local/$CASP/RESULTS/TEMPLATES/");
system("rsync -az -e ssh ../output/$target.csv.filtered  p3:/local/$CASP/RESULTS/TEMPLATES/");

exit;

sub checkDate{
    my ($templ, $target) = @_;
    my $date_str1; # date string template
    my $date_str2; # date string target
    #parse file
    my $prot = substr($templ,0,4);
    open IN, " grep $prot ../../phase0/output/dates | ";
	my $line = <IN>;chomp $line;
	($date_str1,$date_str1) = split(/,/, $line);
	my ($day,$month,$year) = split(/-/,$date_str1); # 31-MAR-95
	if ($year > 60){
		$year = 1900 + $year;
	}else{	
		$year = 2000 + $year;
	}
	$month = $HASH_MONTH->{uc($month)};
	my $date_template = new DateTime(year=>$year, month=>$month, day=>$day, hour=>0,minute=>0, time_zone => 'America/Los_Angeles');
    close IN;

    my $target_wot_domain = $target;
    $target_wot_domain =~ s/-D.*$//;
    open IN, "grep $target_wot_domain ../input/targets.date | ";
	$line = <IN>;chomp $line;
	($date_str2,$date_str2) = split(/,/, $line);
	($year,$month,$day) = split(/-/,$date_str2); # 2011-12-01
	my $date_target = new DateTime(year=>$year, month=>$month, day=>$day, hour=>0,minute=>0, time_zone => 'America/Los_Angeles');
    close IN;

    if(DateTime->compare_ignore_floating($date_target, $date_template) < 0){ # date_target is before the date_template
	return 0;
    }else{
	return 1;
    }
}

