#!/usr/local/bin/perl

if ( $#ARGV < 0 )  # No arguments
{
 &printHeader("Gene Lookup",1);
}
else
{
 $targGene=&getEntry("Gene");
 print "</pre>$claimer";
}

exit;


#!/usr/local/bin/perl

&primeRegSubs;

sub printHeader
{
 ($title,$isIndex,$notScript)=@_;
 if ($notScript==1) {} else { print "Content-type: text/html\n"; }
 print "\n";
 print "<head>\n";
 print "<TITLE>DPInteract: $title</TITLE>\n";
 if ($isIndex==1) { print "<isindex>\n"; }
 print "</head>\n";
 print "<body bgcolor=\"FFFFFF\"><h1>$title</h1>\n";
}
sub primeRegSubs
{
$RelayBase = "http://golgi.harvard.edu/htbin/relay";
 $Cgsc     = "http://cgsc.biology.yale.edu/cgi-bin/sybgw/cgsc/Site/";
 $GenbankA = "$RelayBase/genbank-acc?";

 $Medline  = "http://golgi.harvard.edu/htbin/dpinteract/medline-muid?";
 $MedNeigh = 'http://atlas.nlm.nih.gov:5700/htbin/enf/entrezmmnei?';
 $Pir      = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=p&form=6&uid=";
 $Pdb      = 'http://expasy.hcuge.ch/cgi-bin/pdb_entry_or_image?';
 $Prints   = 'http://www.biochem.ucl.ac.uk/cgi-bin/attwood/DoPRINTS.pl?cmd_a=Display&qua_a=/Full&fun_a=Code&qst_a=';
 $Blocks   = 'http://www.blocks.fhcrc.org/blocks-bin/getblock.www?';

 $dpiServer = "http://arep.med.harvard.edu";
 $dpiCgi = "$dpiServer/cgi-bin/dpinteract";
 $dpiDocsPath = "/usr/arep/a3/httpd/htdocs/dpinteract"; 
 $dpiDocsUrl ="$dpiServer/dpinteract";
 $dpiFam   = "$dpiCgi/family?";
 $dpiSfam  = "$dpiCgi/subfamily?";
 $dpiGene  = "$dpiCgi/gene?";
 $dpiClass = "$dpiCgi/class?";
 $dpiMsearch = "$dpiCgi/msearch?"; 
 $dpiSearch = "$dpiCgi/search?"; 
 $dpiRefSearch = "$dpiCgi/ref?";
 $dpiMrefSearch = "$dpiCgi/mref?";
 $dpiSeqregSearch = "$dpiCgi/seqreg?";
 $dpiBlastnSearch = "$dpiCgi/blastn";
 $dpiMatSearch = "$dpiCgi/matsrch";

 $Expasy  = "http://expasy.hcuge.ch";
 $Prosite  = "$Expasy/cgi-bin/get-prosite-entry?";
 $Prodoc   = "$Expasy/cgi-bin/get-prodoc-entry?";
 $Scop     = "http://www.bio.cam.ac.uk/scop/data";
# $Sprot    = "$Expasy/cgi-bin/get-sprot-entry?";
 $Sprot    = "http://expasy.hcuge.ch/cgi-bin/get-sprot-entry?";
 $SprotPi  = "http://expasy.hcuge.ch/cgi-bin/getpI?";
 $seqAnalRef = "$Expasy/cgi-bin/get-seqanalr-entry?";

 $ecocycGene="http://ecocyc.ai.sri.com:1555/new-image?type=GENE&object=EG";
 $ecocycMap="http://ecocyc.ai.sri.com:1555/new-image?type=LOCUS-POSITION&object=EG";

 $entrezProtein = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=p&form=6&Dopt=r&uid=";
 $entrezMedline = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=m&form=6&Dopt=r&uid=";
 $mailKr = '<a href="mailto:krobison@nucleus.harvard.edu">krobison@nucleus.harvard.edu</a>';
 
 $claimer = "<hr>
Help: <a href=\"/dpinteract/welcome.html\">Overview</a>, <a href=\"/dpinteract/fields.html\">Fields</a><p>
<em>Note: This database copyright 1994 Harvard University</em>.

 
Robison, K., and Church, G.M. DPInteract: A database on DNA-protein interactions.  (1994).  Electronically published and manuscript in preparation.<p>
Please contact $mailKr to report additional data or problems with this database.<p>
<a href=\"/dpinteract/index.html\"><img width=200 height=102 align=middle src=\"http://arep.med.harvard.edu/dpinteract/trpR_metJ.jpg\">
DPInteract Home</a>
</body>
</html>
";
 $subsPrimed = 1;
 return
}

$docs = '<h2>Documentation</h2>
<ul>
<li><a href="/dpinteract/welcome.html">Welcome and Overview</a>
<li><a href="/dpinteract/fields.html">Field Abbreviations</a>
</ul>';

sub makeDpiSubs
{
  if (/^AC/)
    {
      s/^AC\tBDBPG?/AC\tDP/;
    }
  if (m/^PG/ )
    {  s#^PG[\t]*([A-Za-z0-9]*)#PG	<a href="$dpiSfam$1">$1 sub-family</a>#o; }  
  if (m/^PF/ && ! m/Unclassified[^_]/i)
    {  s#^PF[\t]*([A-Za-z0-9]*)#PF	<a href="$dpiFam$1">$1 family</a>#o; }
  if (m/^PC/ && ! m/Unclassified/i)
  {  s#^PC[\t]*([A-Za-z0-9\-]*)#PC	<a href="$dpiClass$1">Class $1</a>#o; }
  if (m/^SP/)
    {
      s#([A-Za-z0-9_]*)(.)([A-Z][0-9]{5})#SwissProt: $1$2$3 <a href="$Sprot\U$3\E">Expasy</a> <a href="$entrezProtein$3">Entrez</a> <a href="$SprotPi$3">pI &amp; MW</a>#o; 
    }
  if ( m/^XR|^RX/ )
    {
      s#Gene: *([A-Za-z0-9_]*)#Gene: <a href="$dpiGene$1">$1</a>#oi;
      s#Family: *([A-Za-z0-9_]*)#Family: <a href="$dpiFam$1">$1</a>#io;
      s#Class: *([A-Za-z0-9_\-]*)#Class: <a href="$dpiClass$1">$1</a>#io;
      s#\t(BL[0-9]*)#\tBlocks:                  <a href="$Blocks$1">$1</a>#o;
    s#\tEG([0-9]*)#\tEcoGene $1 <a href="$ecocycMap$1">Map</a> <a href="$ecocycGene$1">EcoCyc</a>#io;
      s#\t(PS[0-9]*)#\tProsite Pattern          <a href="$Prosite$1">$1</a>#o;
      s#\t(PDOC[0-9]*)#\tProsite Documentation  <a href="$Prodoc$1">$1</a>#o;
      s#\tCGSC:?([0-9]*)#\t<a href="$Cgsc$1">Coli Genetic Stock Center: Site \#$1</a>#o;
      s#\tPIR:([A-Z0-9]*)#	<a href="$entrezProtein$1">PIR (Entrez):$1</a>#o;
      s#\tPDB:([A-Z0-9]*)#	<a href="$Pdb$1">PDB:$1</a>#o;
      s#\tGI:([A-Z0-9]*)#	<a href="$entrezProtein$1">Entrez:$1</a>#o;
      s#\tPRINTS:\t*([A-Z0-9]+)#	<a href="$Prints$1">Prints:$1</a>#oi;
      s#SAR:([A-Z]*[0-9]*)#<a href="$seqAnalRef$1">SeqAnalRef $1</a>#o;
      s#SCOP([0-9\.]*)#<a href="$Scop/$1.html">SCOP:$1</a>#o;
    }
  s#^RM	([0-9]*)#RM	<a href="$entrezMedline$1">Medline MUID $1</a>#o;  
  if (/^RL/ && /J Biol Chem/ && /\(199[5-9]\)/)
    {
      $tmp=$_; $tmp=~s/^.*J Biol Chem *//; 
      $tmp=~s/://;
      $tmp=~s/-/ /;
      local($vol,$fp)=split(/[ \t\n]+/,$tmp);
      s#$#\t<a href="http://www-jbc.stanford.edu/jbc/scripts/abstract/vpref=$vol:$fp">JBC On-Line</a>#;
    }
  return;
}

sub cleanup
{
$oneHourInSeconds = 60*60;
$cutoff=6*$oneHourInSeconds;
@fileList=split(/[ \t\n]+/,`ls /tmp/dpi* /tmp/sh*`);
$myUid=$<;
for ($i=0; $i<=$#fileList; $i++)
{
  ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
              $atime,$mtime,$ctime,$blksize,$blocks)
                  = stat($fileList[$i]);
  if ($uid==$myUid && (($t-$mtime) > $cutoff))
    {
      unlink($fileList[$i]);
    }
}
}

sub acc2Name
{
 local($targAcc)=@_;
 local($dum,$name,$regexp);
 $targAcc=~s/^DP0*//;
 $regexp="^DP0*$targAcc";

 open(ACCTAB,"$dpiDocsPath/accessions.tab");
 $name=$targAcc;
 while ( $_ = <ACCTAB> )
   {
     if (/$regexp/o)
       {

	 chop;
	 ($dum,$name)=split;
	 close(ACCTAB);
       }
   }
 $name=~s/[.][a-z]*$//i;
 return $name;
}

sub getEntry
{
  # 0 args; get Gene $ARGV[0]
  # 1 arg;  specify entry type in first arg, entry id in $ARGV[]0
  # 2 args; specify entry type in first arg, entry id in 2nd arg
  local($targ,$entryType,*args,$origName);
  @args=@_;
  if ($#args<0) { $entryType="Gene"; } else { $entryType= $args[0]; }
  if ($#args<1) { $targ=$ENV{'QUERY_STRING'};    } 
  else { $targ     = $args[1]; }

  &primeRegSubs;
  
  $targ=~s/[ \t\n]*//g;  
  $origName=$targ;

  if ($targ=~/^DP|^[0-9]/)
    {
      $targ=&acc2Name($targ);
   } 
  &printHeader("$targ $entryType");
  local($targFile);
  if    ($entryType=~/gene/i)   { $targFile ="genes/$targ.gene"; }
  elsif ($entryType=~/family/i) { $targFile ="fams/$targ.fam"; }
  elsif ($entryType=~/class/i)  { $targFile =     "$targ.class"; }
  $targFile="$dpiDocsPath/$targFile"; 
  if ( -e $targFile )
    {
      open (ENTRY,$targFile);
      print "<PRE>\n";
      $bCount=0;
      while ( $_ = <ENTRY> )
	{
	  &makeDpiSubs;
	  if (m/^B[PNS]\t/) { $bCount++; print "<a name=\"$bCount\">"; }
	  print $_;
	}
    }
  else
    {
      $targ="Not Found";
      print "<h2>Not Found!</h2> DPInteract does not contain an entry of type <b>$entryType</b> for <b>$origName</b>.<p>
  If you reached this point from an outside database, please contact the adminstrator(s) for that database; if you reached here from another location in DPInteract, please send E-mail to $mailKr.";

    }
  return $targ;
}

sub msearchForm
{
local($i);
print '<form action="',$dpiMsearch,'">',"\n";
print "Blank terms are ignored. Only gene entries are searched.  A gene entry must match all terms to be returned (boolean AND).<p>\n";
for ($i=0; $i<4; $i++)
{
  print 'Term: <input name="K',$i,'">',"\n";
  print 'Search field(s) <select name="F',$i,'"><option selected>All<Option>DE (Definition)<Option>PC (Protein Class)<Option>PF (Protein Family)<option>OS (Organism, Species)<option>RA (Reference Author)<option>RK (Reference Keyword)<option>RT (Reference Title)</select><br>',"\n";
}
print '<select name="OS"><option selected>Any species
<option>Escherichia coli K12
<option>Escherichia coli (all)
<option>Salmonella typhimurium
</select><br>';
print '<select name="PC"><option selected>Any structural class
<option>Helix-Turn-Helix
<option>Beta-Ribbon
<option>Probe-Helix
<option>Zinc-Finger
<option>Structural Class Unclassified 
</select><br>';
print '<inPUT TYPE="submit" VALUE="Search"><inPUT TYPE="reset" VALUE="Reset">',"\n";
print '</form>',"\n";

}
sub mrefForm
{
local($i);
print "\n",'<form action="',$dpiMrefSearch,'">',"\n";
print "Blank terms are ignored. Only gene entries are searched.  A gene entry must match all terms to be returned (boolean AND).<p>\n";
for ($i=0; $i<=4; $i++)
{
  print "\n",'Term: <input name="K',$i,'">',"\n";
  print 'Search citation field(s) <select name="F',$i,'"><option selected>All
<option>RL (Reference citation)<option>RA (Reference Author)
<option>RT (Reference Title)<option>RK (Reference Keyword)</select><br>',"\n";
}
if (1==0)
{
  print "<a href=\"$dpiDocsPath/rk.html\">Reference Keyword</a><select name=\"F\">\n";
  open(RKLIST,"$dpiDocsPath/rk.list");
  while ($_ = <RKLIST> )
    {
      chop;
      ($code,$rk,$desc)=split;
      print "<option>$rk\n";
    }
  print "</select>\n";
}
print '<inPUT TYPE="submit" VALUE="Search"><inPUT TYPE="reset" VALUE="Reset">',"\n";
print '</form>',"\n";
}

sub genSeqFile
{
  local($seqData)=@_;
  local($rand)= $$;
  local($searchFile)="/tmp/dpi.$rand";
  local(*seqDataLines,$line);
  $searchFile=~s/%3E/>/;
  open (SEARCH_INPUT, ">$searchFile");
  @seqDataLines=split(/\n/,$seqData);
  print SEARCH_INPUT ">Your Query\n" unless ($seqDataLines[0]=~/^>/);
  foreach $line(@seqDataLines)
    {
      unless ($line=~/^>/)
	{
	  $line=~s/[^A-Za-z]+//g;
	}
      print SEARCH_INPUT $line,"\n";
    }
  close(SEARCH_INPUT);
  return ($searchFile);
}
1;

