#!  /usr/bin/perl
#this script is designed to parse PSVS reports by project_name, and generate html file which contains a table about protein information
#Target name, source, link to PSVS concise report, summary report, bb/all r.m.s.d for order region, RPF score,link to precision map,Z score,  link of superimposed image with pdb structure, 
#link of coordinate file, chemcial shift file and peak list files

use strict;
use warnings;
use LWP::UserAgent;
use HTTP::Request;
use XML::Simple;
use Data::Dumper;


my $ua = LWP::UserAgent->new(timeout => 600);
my $psvs_url = 'http://psvs.nesg.org/results/rosetta_MR/';
my $psvs_path = '/data/PSVS/PSVS-v1.4-dev/results/rosetta_MR/';
#my $si_path = $eNMR_path.'si/';
my $output_file = $psvs_path.'PSVS_summary_blind.html';
die("Usage:generateTable.pl <rmsd_file>\n") if(scalar @ARGV < 1);
my ($rmsd_file) = shift @ARGV; 

my %projs = ('nmr' => 'NMR', 'r3' => 'R3', 'r3_cst' => 'R3Cons');
#rmsd file stored rmsd, gdt_ts, tm, and Ca_chemical_shift_pred_corrlation information
open(RMSD,"<$rmsd_file") or die("failed to open rmsd file:$!");
my %rmsd;
while(<RMSD>){
	chomp($_);	
	next if(/^target/);
	my @values = split(/\s+/,$_);
	my($target,$source,$ca_rmsd,$bb_rmsd_order,$bb_rmsd_all,$heavy_rmsd_order,$heavy_rmsd_all,$tm,$gdt,$n1,$n2) = @values;
	$target =~ tr/a-z/A-Z/;
	$rmsd{$target}->{$source}->{'proj_name'} = $target.'_'.$projs{$source}.'_em_bcr3';
	$rmsd{$target}->{$source}->{'ca_rmsd'} = $ca_rmsd;
	$rmsd{$target}->{$source}->{'bb_rmsd_order'} = $bb_rmsd_order;
	$rmsd{$target}->{$source}->{'bb_rmsd_all'} = $bb_rmsd_all;
	$rmsd{$target}->{$source}->{'heavy_rmsd_order'} = $heavy_rmsd_order;
	$rmsd{$target}->{$source}->{'heavy_rmsd_all'} = $heavy_rmsd_all;
	$rmsd{$target}->{$source}->{'gdt'} = $gdt;
	$rmsd{$target}->{$source}->{'tm'} = $tm;
#	$rmsd{$target}->{$source}->{'Ca_R'} = $Ca_R;	
}
close RMSD;

my %table = (); #to save all the information included in the table
foreach my $target (sort keys %rmsd){
	foreach my $source (sort keys %projs){
		my $projName = $rmsd{$target}->{$source}->{'proj_name'};
		$table{$target}->{$source} = parsePSVS($projName);
	}	
}
outputLink();
outputTable($output_file);
1;

#create a HTML file linked to all PSVS_results
sub outputLink{
	open(O,">$psvs_path/psvs_link.html") or die("failed to open psvs_link.html:$!");
	print O "<HTML>\n<HEAD>\n<TITLE>PSVS Links for Rosetta MR Project</TITLE>\n<link rel='stylesheet' type='text/css' href='http://psvs.nesg.org/results/rosetta_MR/r3NMR.css' />\n</HEAD>\n";
	print O "<BODY><center><H3>PSVS Links for Rosetta MR Project</H3></center><hr>";
	print O "<TABLE border=1 align='center'>\n";
 	foreach my $target (sort keys %table){
		print O "<TR>\n";
		my @sources = sort keys %{$table{$target}};
		unshift(@sources,' ');
		foreach my $source (@sources){
			print O "<TD>$source</TD>\n";
		}
		print O "</TR>\n<TR>\n<TD>$target</TD>\n";
		foreach my $source (@sources){
			next if($source eq ' ');
			my %info = %{$table{$target}->{$source}};
			print O "<TD><a href='$info{'c_rpt'}'><img src='$info{'molPic'}' height=100 width=100 /></a></TD>\n";
		}
		print O "</TR>\n";		
	}
	print O "</TABLE>\n</BODY>\n</HTML>";
	close O;
}
#to output the PSVS information into a html table
sub outputTable{
	my $output_file = shift;
	open(P,">psvs_info.txt") or die("can't open file psvs_info.txt:$!");
	my @psvs_fields = ('Target','Method','bb_rmsd_ord','bb_rmsd_all','heavy_rmsd_ord','heavy_rmsd_all','0.1','0.2','0.5','deg1','deg10','Precision','Recall','DP','v3d','prosa','procheck_bb','procheck_all','molprobity');
	print P join("\t",@psvs_fields),"\n";
	open(O,">$output_file") or die("failed to open $output_file:$!");
	print O "<HTML>\n<HEAD>\n<TITLE>Analysis for Rosetta MR Project</TITLE>\n<link rel='stylesheet' type='text/css' href='http://psvs.nesg.org/results/rosetta_MR/r3NMR.css' />\n</HEAD>\n";
	print O "<BODY>\n<TABLE border=1 align='center'>\n<CAPTION>Analysis for Rosetta MR Project</CAPTION>\n<TR>\n";
	print O "<TH colspan=3:>General</TH>\n<TH colspan=7>RMSD to X-ray structure(Å) and structural similarity</TH>\n<TH colspan=2>PSVS reports</TH>\n<TH colspan=2>Ensemble RMSD(Å)</TH>\n<TH colspan=3>NOE Violations</TH>\n<TH colspan=2>ACO Violations</TH>\n<TH colspan=2>RPF anysis</TH><TH colspan=5>Structural Quality Z-scores</TH>\n<TH colspan=2>Misc</TH></TR>\n";
	my @fields = ('Target','Method','Model#',
			'Ca_All<sup>1</sup>','BB_Ord<sup>2</sup>','BB_All<sup>2</sup>','Hvy_Ord<sup>2</sup>','Hvy_All<sup>2</sup>','TM-Score<sup>1</sup>','GDT-TS<sup>3</sup>',
			'Concise Report','Summary',
			'backbone<sup>4</sup>','heavy<sup>4</sup>',
			'0.1-0.2 Å','0.2-0.5 Å','>0.5 Å',
			'1-10 °','>10 °',
			'DP-score','Precicison Map',			
		      	'Verify3D','ProsaII','Procheck (<I>phi-psi</I>)<sup>4</sup>','Procheck (<I>all</I>)<sup>4</sup>','MolProbity Clashscore',		       	
		      	'superimposed Coordinates','input files<sup>5</sup>');
	foreach (@fields){
		print O "<TH>$_</TH>\n";
	}
	print O "</TR>\n";	
	my $class = '';
	foreach my $target (sort keys %table){
		my @sources = sort keys %{$table{$target}};
		my $n_source = scalar @sources;
		$class = $sources[0];
	#	$class = ($i%2 == 1?'specalt':'spec');	
		print O "<TR class='$class'><TD class='multiRow' rowspan='$n_source'><font color='#0099ff'>$target</font></TD>\n";
		my $j = 0;
		foreach my $source (@sources){
			#$class = ($i%2 == 1?'specalt':'spec');
			print O "<TR class='$source'>\n" if($j);
			my %info = %{$table{$target}->{$source}};
			my @values = ($projs{$source});
			my @psvss = ($target,$source);
			my $data = $info{'data'};
			cpush(\@values,[$data->{'STRUCTURES_USED'}]);				
			cpush(\@values,[$rmsd{$target}->{$source}->{'ca_rmsd'},$rmsd{$target}->{$source}->{'bb_rmsd_order'},$rmsd{$target}->{$source}->{'bb_rmsd_all'},$rmsd{$target}->{$source}->{'heavy_rmsd_order'},$rmsd{$target}->{$source}->{'heavy_rmsd_all'},$rmsd{$target}->{$source}->{'tm'},$rmsd{$target}->{$source}->{'gdt'}]);
			cpush(\@values,[$info{'c_rpt'},$info{'s_rpt'}],1);
                        cpush(\@values,[$data->{'RMSD'}->{'SELECTED'}->{'BACKBONE_ATOMS'},$data->{'RMSD'}->{'SELECTED'}->{'HEAVY_ATOMS'}]);
                        cpush(\@psvss,[$data->{'RMSD'}->{'SELECTED'}->{'BACKBONE_ATOMS'},$data->{'RMSD'}->{'ALL'}->{'BACKBONE_ATOMS'},
			              $data->{'RMSD'}->{'SELECTED'}->{'HEAVY_ATOMS'},$data->{'RMSD'}->{'ALL'}->{'HEAVY_ATOMS'}]);
			cpush(\@values,[$data->{'DIST_VIOLATIONS'}->{'GROUP1'},$data->{'DIST_VIOLATIONS'}->{'GROUP2'},$data->{'DIST_VIOLATIONS'}->{'GROUP3'}]);
			cpush(\@psvss,[$data->{'DIST_VIOLATIONS'}->{'GROUP1'},$data->{'DIST_VIOLATIONS'}->{'GROUP2'},$data->{'DIST_VIOLATIONS'}->{'GROUP3'}]);
			cpush(\@values,[$data->{'DIHEDRAL_VIOLATIONS'}->{'GROUP1'},$data->{'DIHEDRAL_VIOLATIONS'}->{'GROUP2'}]);
			cpush(\@psvss,[$data->{'DIHEDRAL_VIOLATIONS'}->{'GROUP1'},$data->{'DIHEDRAL_VIOLATIONS'}->{'GROUP2'}]);
                        if($data->{'RPF'}){
                                cpush(\@values,[$data->{'RPF'}->{'DP_SCORE'}]);
                                cpush(\@values,[$info{'precMap'}],1);
                        }else{
                                push(@values,('N/A','N/A'));
                        }
			cpush(\@psvss,[$data->{'RPF'}->{'PRECISION'},$data->{'RPF'}->{'RECALL'},$data->{'RPF'}->{'DP_SCORE'}]);
	#		cpush(\@values,[$rmsd{$target}->{$source}->{'Ca_R'}]);
                        my $z_score_ref = $data->{'QFACTORS'}->{'ZSCORE'};
			cpush(\@values,[$z_score_ref->{'VERIFY3D'},$z_score_ref->{'PROSAII_NEG'},$z_score_ref->{'PROCHECK_G_PHI_PSI'},$z_score_ref->{'PROCHECK_G_ALL'},$z_score_ref->{'MAGE_CLASH'}]);
			cpush(\@psvss,[$z_score_ref->{'VERIFY3D'},$z_score_ref->{'PROSAII_NEG'},$z_score_ref->{'PROCHECK_G_PHI_PSI'},$z_score_ref->{'PROCHECK_G_ALL'},$z_score_ref->{'MAGE_CLASH'}]);
			cpush(\@values,[$info{'mol'},$info{'input'}],1);
			foreach (@values){
				print O "<TD>$_</TD>\n";
			}
			print O "</TR>\n";		
			print P join("\t",@psvss);
			$j++;
		}		
	}
	print O "</TABLE>\n";
	print O "<sup>1</sup> Y. Zhang, J. Skolnick, Scoring function for automated assessment of protein structure template quality, Proteins, 2004 57: 702-710<BR/>\n";
	print O "<sup>1</sup> RMSD calculated by Pymol<BR/>\n";
	print O "<sup>3</sup> Zemla A. (2003)  LGA: A method for finding 3D similarities in protein structures.<br/>\n";
	print O "<sup>4</sup>Ensemble RMSDs and Procheck Z-Scores are calculated based on ordered residues of NMR structures deposited in PDB, calculated By PdbStat.<br/>\n";
	print O "<sup>5</sup> Input files including coordinate files, peak list files, chemical shift files and RPF control file. <br/>\n";
	print O "</BODY>\n</HTML>\n";
	close O;
	close P;
}

#check if data exists before push
sub cpush{
	my ($array_ref,$items_ref,$link_b) = @_;
	foreach my $item (@$items_ref){
		if(defined $item){
			$item = round($item,3)	if($item =~ /^\-?\d+\.\d{4}/);
			$item = "<A href='$item' target='_blank'>link</A>" if($link_b);
			push(@$array_ref,$item);
		}else{
			push(@$array_ref,'N/A');
		}
	}
	return;
}


#this script is to parse all the available PSVS report information
sub parsePSVS{
	my $projName = shift;
	my $projUrl = $psvs_url.$projName;
	my $projPath = $psvs_path.$projName;
	my %info = ();
	$info{'c_rpt'} = $projUrl.'/OUTPUT.html';
	$info{'s_rpt'} = $projUrl."/${projName}_results_summary.html";
	$info{'precMap'} = $projUrl.'/precision.jpg' if(-e "${projPath}/precision.jpg");
	$info{'sauPic'} = $projUrl.'/sausage.jpg' if(-e "${projPath}/sausage.jpg");
	$info{'molPic'} = $projUrl.'/molecule.jpg' if(-e "${projPath}/sausage.jpg");
	$info{'mol'} = $projUrl.'/selSi.pdb' if(-e "${projPath}/selSi.pdb");
	my $coorFile = $1 if($projName =~ /(.*?)\d*$/);
	#my $s_rpt_p = $psvs_path."/${projName}_results_summary.html";
	#open(S,"<$s_rpt_p") or die("failed to open file $s_rpt_p:$!");
	my $xml_path = $projPath."/${projName}_parseable_results.xml";
	my $data = XMLin($xml_path);
	$info{'data'} = $data;
	createInputHtml($projName);
	$info{'input'} = "${psvs_url}/htdocs/${projName}_input.html";
	return \%info;
}

sub createInputHtml{
	my $projName = shift;
	my $inputPath = $psvs_path.$projName.'/input';
	my $inputUrl = $psvs_url.$projName.'/input/';
	open(H,">$psvs_path/htdocs/${projName}_input.html") or die("failed to open input html:$!");
	print H "<HTML>\n<HEAD>\n<TITLE>Input files for $projName</TITLE>\n<link rel='stylesheet' type='text/css' href='http://psvs.nesg.org/eNMR/htdocs/eNMR.css' />\n</HEAD>\n";
	print H "<BODY>\n";
	my %inp_files = ();
	opendir(ID,$inputPath) or die("failed to open directory $inputPath:$!");
	my @files = readdir(ID);
	close ID;
	foreach (@files){
		if(/pdb$/){
			push(@{$inp_files{'coordinates file'}},$_);
		}elsif(/bmrb$/){
			push(@{$inp_files{'chemical shift file'}},$_);
		}elsif(/list$/ or /peaks$/){
			push(@{$inp_files{'peaks list files'}},$_);
		}elsif(/upl$/ or /lol$/ or /mr$/){
			push(@{$inp_files{'constraints files'}},$_);
		}elsif(/ctrl_file/){
			push(@{$inp_files{'RPF control file'}},$_);
		}
	}
	print H "<TABLE align='center'><CAPTION>Table for input files of $projName</CAPTION>";
	print H "<TR><TH>file description</TH><TH>file names</TH></TR>\n";
	foreach my $keyword (sort keys %inp_files){
		print H "<TR><TD>$keyword</TD>";
		my $content = '';
		foreach (@{$inp_files{$keyword}}){
			$content .= "<A href='${inputUrl}/$_' target='_blank'>$_</A>&nbsp;&nbsp;";
		}
		print H "<TD>$content</TD></TR>\n";
	}
	print H "</TABLE>\n</BODY>\n</HTML>";
	close H;	
}
sub round{
	my ($value,$n) = @_;
	return int($value*(10**$n)+0.5)/(10**$n);
}


