#!/usr/local/bin/perl

$ENV{'PERL5LIB'}="/usr/local/lib/perl5";
$ENV{'PGPLOT_BACKGROUND'}="white";
$ENV{'PGPLOT_FOREGROUND'}="black";
#require ("/usr/local/lib/perl5/pgplot.pl");
&primeRegSubs;
if ($ARGV[0]=~/-gtest/)
{
#  &blastGraph($ARGV[1],"/XWIN"); exit;
}

$queryString = $ENV{'QUERY_STRING'};

if ( $queryString=~/^gif=on/i) { $gifOn=1; }  else { $gifOn=0 ; }
$queryString=~s/^gif=[a-z]+\&//;

if ( $queryString =~ m/sequence/) 
{ &doblast; }
else { &intro; }
exit;


sub doblast 
{
&printHeader("BLASTN Results");
$queryString =~ s/%3E/>/g;
$queryString =~ s/%0A/\n/g;
$queryString =~ s/%0D/ /g;
$queryString =~ s/^sequence=//g;
  $queryString =~ s/[+]/ /g;

$database = "$dpiDocsPath/sites.fasta";

$blastFile=&genSeqFile($queryString);

$command="dec_blast/blastall -pblastn -b0 -ddec_blast/sites.fasta -i$blastFile -o$blastFile.out\n";
#print "Executing $command\n";
system($command);

#`touch /tmp/index.html; dec_blast/blastall -pblastn -b0 -ddec_blast/sites.fasta -i$blastFile -o$blastFile.out`;
;

if ($gifOn==1)
  {
#    &blastGraph("$blastFile.out","$blastFile.gif/GIF");
    print "<img src=\"http://arep.med.harvard.edu$blastFile.gif\">\n";
    print "<p>Blue: forward strand. Red: complementary strand.<p>\n";
}
open(BLAST,"$blastFile.out");
while ( $_ = <BLAST> )
{
  if (! m/^Notice:|^To identify|^BLASTN|^Reference|^and David|^215:403|Smallest$|Poisson$|Probability$/)
   { 
     if    (m/^Searching/) { print "<pre>";  }
     elsif (m/^Sequences producing/) {}
#       { print "<pre>\n\n\t\t+ Proven site\t? Candidate site\t- Pseudo-site\n"; }
     elsif (m/^Query=/) { print "<PRE>$_"; }
     elsif (m/^Database:/) { print "<pre>Database:  Ec DNA BP Ds:\t+ Proven\t? Candidate\t- Pseudo\n"; } 
     else
      {
        s!^([+\-\?])([a-zA-Z0-9_]*)([ \t]*#)([0-9]*)!$1<a href="$dpiGene$2#$4">$2$3$4</a>!o;
            s!^>(.)([a-zA-Z0-9_]*)([ \t]*#)([0-9]*)!>$1<a href="$dpiGene$2#$4">$2$3$4</a>!o;
        print $_;

      }
   }
}
print "</pre>$claimer";
if (rand(10)<1) { &cleanup; }
  exit;
} 

sub intro
{
&printHeader("BLASTN Search");
print '
<FORM METHOD="GET" ACTION="',$dpiBlastnSearch,'">
<pre>
';
print '
<hr>
<h2>DNA sequence to search<input type="submit" value="BLAST away!   "></h2>
<inPUT TYPE="reset" VALUE="Clear">',"\n";
# print '<input name="gif" type="checkbox">Graphical Map of Hits',"\n";
print '
<textarea name="sequence" cols="100" rows="10"></textarea></FORM>
</pre>
Enter a DNA sequence with either no comments or in FASTA format. You may include spaces and carriage returns in the sequence entry; they will be ignored by BLAST.
<p>

</pre>
<hr>
Please report problems with this service to krobison@nucleus.harvard.edu
';
 exit;
}


sub blastGraph
{
  local($blastData,$outDevice)=@_;
  local($i);
  open(BLAST,$blastData);
  while ( $_ = <BLAST> )
    {
      if (/^ +\([0-9]+ letters/)
	{
	  s/^ +[(]//; 
	  ($seqLen)=split;
	}
      elsif (/^>/)
	{
	  chop; ($hitId)=split; 
	  $hitId=~s/_.*$//; 
	  $hitId=~s/^>.//;
	  push(@hitNames,$hitId);	  
	}
      elsif (/^Query:/)
	{
	  ($dum,$qStart,$dum,$qEnd)=split;
	  push(@qStarts,$qStart);
	  push(@qEnds,$qEnd);
	  if ($qStart<$qEnd) { push(@qStrands,1); }
	  else               { push(@qStrands,-1); }
	}
   }
  &makePlot($outDevice,$seqLen,*hitNames,*qStarts,*qEnds,qStrands);	
  close(BLAST);      
 }
  
#!/usr/local/bin/perl

&primeRegSubs;

sub printHeader
{
 ($title,$isIndex,$notScript)=@_;
 if ($notScript==1) {} else { print "Content-type: text/html\n"; }
 print "\n";
 print "<head>\n";
 print "<TITLE>DPInteract: $title</TITLE>\n";
 if ($isIndex==1) { print "<isindex>\n"; }
 print "</head>\n";
 print "<body bgcolor=\"FFFFFF\"><h1>$title</h1>\n";
}
sub primeRegSubs
{
$RelayBase = "http://golgi.harvard.edu/htbin/relay";
 $Cgsc     = "http://cgsc.biology.yale.edu/cgi-bin/sybgw/cgsc/Site/";
 $GenbankA = "$RelayBase/genbank-acc?";

 $Medline  = "http://golgi.harvard.edu/htbin/dpinteract/medline-muid?";
 $MedNeigh = 'http://atlas.nlm.nih.gov:5700/htbin/enf/entrezmmnei?';
 $Pir      = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=p&form=6&uid=";
 $Pdb      = 'http://expasy.hcuge.ch/cgi-bin/pdb_entry_or_image?';
 $Prints   = 'http://www.biochem.ucl.ac.uk/cgi-bin/attwood/DoPRINTS.pl?cmd_a=Display&qua_a=/Full&fun_a=Code&qst_a=';
 $Blocks   = 'http://www.blocks.fhcrc.org/blocks-bin/getblock.www?';

 $dpiServer = "http://arep.med.harvard.edu";
 $dpiCgi = "$dpiServer/cgi-bin/dpinteract";
 $dpiDocsPath = "/usr/arep/a3/httpd/htdocs/dpinteract"; 
 $dpiDocsUrl ="$dpiServer/dpinteract";
 $dpiFam   = "$dpiCgi/family?";
 $dpiSfam  = "$dpiCgi/subfamily?";
 $dpiGene  = "$dpiCgi/gene?";
 $dpiClass = "$dpiCgi/class?";
 $dpiMsearch = "$dpiCgi/msearch?"; 
 $dpiSearch = "$dpiCgi/search?"; 
 $dpiRefSearch = "$dpiCgi/ref?";
 $dpiMrefSearch = "$dpiCgi/mref?";
 $dpiSeqregSearch = "$dpiCgi/seqreg?";
 $dpiBlastnSearch = "$dpiCgi/blastn";
 $dpiMatSearch = "$dpiCgi/matsrch";

 $Expasy  = "http://expasy.hcuge.ch";
 $Prosite  = "$Expasy/cgi-bin/get-prosite-entry?";
 $Prodoc   = "$Expasy/cgi-bin/get-prodoc-entry?";
 $Scop     = "http://www.bio.cam.ac.uk/scop/data";
# $Sprot    = "$Expasy/cgi-bin/get-sprot-entry?";
 $Sprot    = "http://expasy.hcuge.ch/cgi-bin/get-sprot-entry?";
 $SprotPi  = "http://expasy.hcuge.ch/cgi-bin/getpI?";
 $seqAnalRef = "$Expasy/cgi-bin/get-seqanalr-entry?";

 $ecocycGene="http://ecocyc.ai.sri.com:1555/new-image?type=GENE&object=EG";
 $ecocycMap="http://ecocyc.ai.sri.com:1555/new-image?type=LOCUS-POSITION&object=EG";

 $entrezProtein = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=p&form=6&Dopt=r&uid=";
 $entrezMedline = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=m&form=6&Dopt=r&uid=";
 $mailKr = '<a href="mailto:krobison@nucleus.harvard.edu">krobison@nucleus.harvard.edu</a>';
 
 $claimer = "<hr>
Help: <a href=\"/dpinteract/welcome.html\">Overview</a>, <a href=\"/dpinteract/fields.html\">Fields</a><p>
<em>Note: This database copyright 1994 Harvard University</em>.

 
Robison, K., and Church, G.M. DPInteract: A database on DNA-protein interactions.  (1994).  Electronically published and manuscript in preparation.<p>
Please contact $mailKr to report additional data or problems with this database.<p>
<a href=\"/dpinteract/index.html\"><img width=200 height=102 align=middle src=\"http://arep.med.harvard.edu/dpinteract/trpR_metJ.jpg\">
DPInteract Home</a>
</body>
</html>
";
 $subsPrimed = 1;
 return
}

$docs = '<h2>Documentation</h2>
<ul>
<li><a href="/dpinteract/welcome.html">Welcome and Overview</a>
<li><a href="/dpinteract/fields.html">Field Abbreviations</a>
</ul>';

sub makeDpiSubs
{
  if (/^AC/)
    {
      s/^AC\tBDBPG?/AC\tDP/;
    }
  if (m/^PG/ )
    {  s#^PG[\t]*([A-Za-z0-9]*)#PG	<a href="$dpiSfam$1">$1 sub-family</a>#o; }  
  if (m/^PF/ && ! m/Unclassified[^_]/i)
    {  s#^PF[\t]*([A-Za-z0-9]*)#PF	<a href="$dpiFam$1">$1 family</a>#o; }
  if (m/^PC/ && ! m/Unclassified/i)
  {  s#^PC[\t]*([A-Za-z0-9\-]*)#PC	<a href="$dpiClass$1">Class $1</a>#o; }
  if (m/^SP/)
    {
      s#([A-Za-z0-9_]*)(.)([A-Z][0-9]{5})#SwissProt: $1$2$3 <a href="$Sprot\U$3\E">Expasy</a> <a href="$entrezProtein$3">Entrez</a> <a href="$SprotPi$3">pI &amp; MW</a>#o; 
    }
  if ( m/^XR|^RX/ )
    {
      s#Gene: *([A-Za-z0-9_]*)#Gene: <a href="$dpiGene$1">$1</a>#oi;
      s#Family: *([A-Za-z0-9_]*)#Family: <a href="$dpiFam$1">$1</a>#io;
      s#Class: *([A-Za-z0-9_\-]*)#Class: <a href="$dpiClass$1">$1</a>#io;
      s#\t(BL[0-9]*)#\tBlocks:                  <a href="$Blocks$1">$1</a>#o;
    s#\tEG([0-9]*)#\tEcoGene $1 <a href="$ecocycMap$1">Map</a> <a href="$ecocycGene$1">EcoCyc</a>#io;
      s#\t(PS[0-9]*)#\tProsite Pattern          <a href="$Prosite$1">$1</a>#o;
      s#\t(PDOC[0-9]*)#\tProsite Documentation  <a href="$Prodoc$1">$1</a>#o;
      s#\tCGSC:?([0-9]*)#\t<a href="$Cgsc$1">Coli Genetic Stock Center: Site \#$1</a>#o;
      s#\tPIR:([A-Z0-9]*)#	<a href="$entrezProtein$1">PIR (Entrez):$1</a>#o;
      s#\tPDB:([A-Z0-9]*)#	<a href="$Pdb$1">PDB:$1</a>#o;
      s#\tGI:([A-Z0-9]*)#	<a href="$entrezProtein$1">Entrez:$1</a>#o;
      s#\tPRINTS:\t*([A-Z0-9]+)#	<a href="$Prints$1">Prints:$1</a>#oi;
      s#SAR:([A-Z]*[0-9]*)#<a href="$seqAnalRef$1">SeqAnalRef $1</a>#o;
      s#SCOP([0-9\.]*)#<a href="$Scop/$1.html">SCOP:$1</a>#o;
    }
  s#^RM	([0-9]*)#RM	<a href="$entrezMedline$1">Medline MUID $1</a>#o;  
  if (/^RL/ && /J Biol Chem/ && /\(199[5-9]\)/)
    {
      $tmp=$_; $tmp=~s/^.*J Biol Chem *//; 
      $tmp=~s/://;
      $tmp=~s/-/ /;
      local($vol,$fp)=split(/[ \t\n]+/,$tmp);
      s#$#\t<a href="http://www-jbc.stanford.edu/jbc/scripts/abstract/vpref=$vol:$fp">JBC On-Line</a>#;
    }
  return;
}

sub cleanup
{
$oneHourInSeconds = 60*60;
$cutoff=6*$oneHourInSeconds;
@fileList=split(/[ \t\n]+/,`ls /tmp/dpi* /tmp/sh*`);
$myUid=$<;
for ($i=0; $i<=$#fileList; $i++)
{
  ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
              $atime,$mtime,$ctime,$blksize,$blocks)
                  = stat($fileList[$i]);
  if ($uid==$myUid && (($t-$mtime) > $cutoff))
    {
      unlink($fileList[$i]);
    }
}
}

sub acc2Name
{
 local($targAcc)=@_;
 local($dum,$name,$regexp);
 $targAcc=~s/^DP0*//;
 $regexp="^DP0*$targAcc";

 open(ACCTAB,"$dpiDocsPath/accessions.tab");
 $name=$targAcc;
 while ( $_ = <ACCTAB> )
   {
     if (/$regexp/o)
       {

	 chop;
	 ($dum,$name)=split;
	 close(ACCTAB);
       }
   }
 $name=~s/[.][a-z]*$//i;
 return $name;
}

sub getEntry
{
  # 0 args; get Gene $ARGV[0]
  # 1 arg;  specify entry type in first arg, entry id in $ARGV[]0
  # 2 args; specify entry type in first arg, entry id in 2nd arg
  local($targ,$entryType,*args,$origName);
  @args=@_;
  if ($#args<0) { $entryType="Gene"; } else { $entryType= $args[0]; }
  if ($#args<1) { $targ=$ENV{'QUERY_STRING'};    } 
  else { $targ     = $args[1]; }

  &primeRegSubs;
  
  $targ=~s/[ \t\n]*//g;  
  $origName=$targ;

  if ($targ=~/^DP|^[0-9]/)
    {
      $targ=&acc2Name($targ);
   } 
  &printHeader("$targ $entryType");
  local($targFile);
  if    ($entryType=~/gene/i)   { $targFile ="genes/$targ.gene"; }
  elsif ($entryType=~/family/i) { $targFile ="fams/$targ.fam"; }
  elsif ($entryType=~/class/i)  { $targFile =     "$targ.class"; }
  $targFile="$dpiDocsPath/$targFile"; 
  if ( -e $targFile )
    {
      open (ENTRY,$targFile);
      print "<PRE>\n";
      $bCount=0;
      while ( $_ = <ENTRY> )
	{
	  &makeDpiSubs;
	  if (m/^B[PNS]\t/) { $bCount++; print "<a name=\"$bCount\">"; }
	  print $_;
	}
    }
  else
    {
      $targ="Not Found";
      print "<h2>Not Found!</h2> DPInteract does not contain an entry of type <b>$entryType</b> for <b>$origName</b>.<p>
  If you reached this point from an outside database, please contact the adminstrator(s) for that database; if you reached here from another location in DPInteract, please send E-mail to $mailKr.";

    }
  return $targ;
}

sub msearchForm
{
local($i);
print '<form action="',$dpiMsearch,'">',"\n";
print "Blank terms are ignored. Only gene entries are searched.  A gene entry must match all terms to be returned (boolean AND).<p>\n";
for ($i=0; $i<4; $i++)
{
  print 'Term: <input name="K',$i,'">',"\n";
  print 'Search field(s) <select name="F',$i,'"><option selected>All<Option>DE (Definition)<Option>PC (Protein Class)<Option>PF (Protein Family)<option>OS (Organism, Species)<option>RA (Reference Author)<option>RK (Reference Keyword)<option>RT (Reference Title)</select><br>',"\n";
}
print '<select name="OS"><option selected>Any species
<option>Escherichia coli K12
<option>Escherichia coli (all)
<option>Salmonella typhimurium
</select><br>';
print '<select name="PC"><option selected>Any structural class
<option>Helix-Turn-Helix
<option>Beta-Ribbon
<option>Probe-Helix
<option>Zinc-Finger
<option>Structural Class Unclassified 
</select><br>';
print '<inPUT TYPE="submit" VALUE="Search"><inPUT TYPE="reset" VALUE="Reset">',"\n";
print '</form>',"\n";

}
sub mrefForm
{
local($i);
print "\n",'<form action="',$dpiMrefSearch,'">',"\n";
print "Blank terms are ignored. Only gene entries are searched.  A gene entry must match all terms to be returned (boolean AND).<p>\n";
for ($i=0; $i<=4; $i++)
{
  print "\n",'Term: <input name="K',$i,'">',"\n";
  print 'Search citation field(s) <select name="F',$i,'"><option selected>All
<option>RL (Reference citation)<option>RA (Reference Author)
<option>RT (Reference Title)<option>RK (Reference Keyword)</select><br>',"\n";
}
if (1==0)
{
  print "<a href=\"$dpiDocsPath/rk.html\">Reference Keyword</a><select name=\"F\">\n";
  open(RKLIST,"$dpiDocsPath/rk.list");
  while ($_ = <RKLIST> )
    {
      chop;
      ($code,$rk,$desc)=split;
      print "<option>$rk\n";
    }
  print "</select>\n";
}
print '<inPUT TYPE="submit" VALUE="Search"><inPUT TYPE="reset" VALUE="Reset">',"\n";
print '</form>',"\n";
}

sub genSeqFile
{
  local($seqData)=@_;
  local($rand)= $$;
  local($searchFile)="/tmp/dpi.$rand";
  local(*seqDataLines,$line);
  $searchFile=~s/%3E/>/;
  open (SEARCH_INPUT, ">$searchFile");
  @seqDataLines=split(/\n/,$seqData);
  print SEARCH_INPUT ">Your Query\n" unless ($seqDataLines[0]=~/^>/);
  foreach $line(@seqDataLines)
    {
      unless ($line=~/^>/)
	{
	  $line=~s/[^A-Za-z]+//g;
	}
      print SEARCH_INPUT $line,"\n";
    }
  close(SEARCH_INPUT);
  return ($searchFile);
}
1;

sub makePlot
{
  local($outDevice,$seqLen,*hitNames,*qStarts,*qEnds,*qStrands)=@_;
  local($i,$baseY,$y2,*hitList,*nameList,$textUp,$seqNum,$seqTick);
  &pgbeg(0,$outDevice,1,1);
  &pgpap(6,0.4);
  &pgpage;	    
  &pgvstd;
  open(LOG,">/tmp/bpbd.makePlot.log");
  for ($i=0; $i<=$#hitNames; $i++)
    {
      $hitList{$hitNames[$i]}=1;
    }
  @nameList=reverse(sort(keys(%hitList)));
  &pgswin(-10,$seqLen+10,0,$#nameList+2);
  &pgsch(2);
  &pglabel("Query","Matches","Hits");
  
  &pgbox("NS",0,0,"",100,10);
  $textUp=0.85;
  for ($i=0; $i<= $#nameList; $i++)
    {
      $hitList{$nameList[$i]}=$i+1;
      &pgptxt(0,$i+$textUp,0,1.0,$nameList[$i]);
      &pgptxt($seqLen+10,$i+$textUp,0,1.0,$nameList[$i]);
    }
  
  
  for ($i=0; $i<= $#hitNames; $i++)
    {
      $qStart=$qStarts[$i];
      $qEnd=$qEnds[$i];
      $hitId=$hitNames[$i];
      $y1=$hitList{$hitId};
      if ($qStrands[$i]==0)  # Neither strand
	{
	  $y2=$y1+0.25;
	  $y1=$y1-0.25;
	  &pgsci(1);
	}
      elsif ($qStrands[$i]==1)
	{
	  &pgsci(4); 
	  $y2=$y1+0.25;
	}
      elsif ($qStrands[$i]==-1)
	{
	  &pgsci(2); 
	  $y2=$y1-0.25;	  
	}
      &pgrect($qStart,$qEnd,$y1,$y2);
      print LOG "$i\t$hitId\t$qStart\t$qEnd\t$y1\t$y2\n";      
    }
  &pgend;
}
