#!/usr/local/bin/perl &primeRegSubs; if ( $#ARGV < 0 ) # No arguments { &printHeader("Search Sites",1); print 'This index can be searched using a case-insensitive PERL regular expression. Partial words will be matched.'; print ' Some of the available meta-characters
? Question mark
allow 0 or 1 of preceding expression; at?c will match ac or atc
* Asterisk
match 0 or more of the preceding expression.
[] Square brackets
match any one of the nucleotides inside the brackets. at[cg]at will match atcat and atgat. Alternatively, you may use IUPAC ambiguity codes.
. Period
match any nucleotide at this position.
| Vertical bar (style)
Logical OR. gaa|ttc will match either trinucleotide (this is the way to search both strands with a non-palindromic query)
IUPAC ambiguity codes are supported

Example queries


REBASE (Restriction Enzyme Database)
'; print $claimer; } else { $regexp = $ENV{'QUERY_STRING'}; if ($regexp =~ m/%/) { $regexp =~ s/%3F/?/g; $regexp =~ s/%7C/|/g; $regexp =~ s/%5B/[/g; $regexp =~ s/%5C/\\/g; $regexp =~ s/%5D/]/g; $regexp =~ s/%5B/[/g; } $regexp =~ s/[+]{1,}/ /g; $regexp =~ s/ or /|/g; # IUPAC ambiguity code expansion $regexp =~ s/k/[gt]/gi; $regexp =~ s/m/[ac]/gi; $regexp =~ s/[nx]/./gi; $regexp =~ s/s/[cg]/gi; $regexp =~ s/r/[ag]/gi; $regexp =~ s/u/t/gi; $regexp =~ s/v/[acg]/gi; $regexp =~ s/w/[at]/gi; $regexp =~ s/y/[tc]/gi; $regexp =~ s/b/[cgt]/gi; $regexp =~ s/d/[agt]/gi; $regexp =~ s/h/[act]/gi; &printHeader("Site Search $regexp"); &primeRegSubs; open (SEQS,"$dpiDocsPath/sites.fasta"); print "

Important

This search is only on the actual sequence fragments contained on the database. Sites which exist in the genome may be missed if only a portion of the site is represented in our database.

\n"; print "

\t+ Proven\t? Candidate\t- Pseudo site or non-binding mutant\n"; 
 print "
\n"; $regexp =~ tr/A-Z/a-z/; $regexpUC = $regexp; $regexpUC =~ tr/a-z/A-Z/; while ( $_ = ) { chop; if (m/^>/) { &output; $id = $_; $sequence = ""; } # id else { s/[ \t\-]//g; $sequence = "$sequence$_"; } } &output; print "
"; } print "$claimer"; exit; sub output { if ($sequence =~ m/$regexp/oi) { $id=~s!^([+\-\?])([a-zA-Z0-9_]*)([ \t]*#)([0-9]*)!$1$2$3$4!o; $id=~s!^>(.)([a-zA-Z0-9_]*)([ \t]*#)([0-9]*)!>$1$2$3$4!o; $sequence =~ tr/A-Z/a-z/; $sequence =~ s/($regexp)/\U$1\L/gi; print "
$id\n
$sequence\n"; } } #!/usr/local/bin/perl &primeRegSubs; sub printHeader { ($title,$isIndex,$notScript)=@_; if ($notScript==1) {} else { print "Content-type: text/html\n"; } print "\n"; print "\n"; print "DPInteract: $title\n"; if ($isIndex==1) { print "\n"; } print "\n"; print "

$title

\n"; } sub primeRegSubs { $RelayBase = "http://golgi.harvard.edu/htbin/relay"; $Cgsc = "http://cgsc.biology.yale.edu/cgi-bin/sybgw/cgsc/Site/"; $GenbankA = "$RelayBase/genbank-acc?"; $Medline = "http://golgi.harvard.edu/htbin/dpinteract/medline-muid?"; $MedNeigh = 'http://atlas.nlm.nih.gov:5700/htbin/enf/entrezmmnei?'; $Pir = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=p&form=6&uid="; $Pdb = 'http://expasy.hcuge.ch/cgi-bin/pdb_entry_or_image?'; $Prints = 'http://www.biochem.ucl.ac.uk/cgi-bin/attwood/DoPRINTS.pl?cmd_a=Display&qua_a=/Full&fun_a=Code&qst_a='; $Blocks = 'http://www.blocks.fhcrc.org/blocks-bin/getblock.www?'; $dpiServer = "http://arep.med.harvard.edu"; $dpiCgi = "$dpiServer/cgi-bin/dpinteract"; $dpiDocsPath = "/usr/arep/a3/httpd/htdocs/dpinteract"; $dpiDocsUrl ="$dpiServer/dpinteract"; $dpiFam = "$dpiCgi/family?"; $dpiSfam = "$dpiCgi/subfamily?"; $dpiGene = "$dpiCgi/gene?"; $dpiClass = "$dpiCgi/class?"; $dpiMsearch = "$dpiCgi/msearch?"; $dpiSearch = "$dpiCgi/search?"; $dpiRefSearch = "$dpiCgi/ref?"; $dpiMrefSearch = "$dpiCgi/mref?"; $dpiSeqregSearch = "$dpiCgi/seqreg?"; $dpiBlastnSearch = "$dpiCgi/blastn"; $dpiMatSearch = "$dpiCgi/matsrch"; $Expasy = "http://expasy.hcuge.ch"; $Prosite = "$Expasy/cgi-bin/get-prosite-entry?"; $Prodoc = "$Expasy/cgi-bin/get-prodoc-entry?"; $Scop = "http://www.bio.cam.ac.uk/scop/data"; # $Sprot = "$Expasy/cgi-bin/get-sprot-entry?"; $Sprot = "http://expasy.hcuge.ch/cgi-bin/get-sprot-entry?"; $SprotPi = "http://expasy.hcuge.ch/cgi-bin/getpI?"; $seqAnalRef = "$Expasy/cgi-bin/get-seqanalr-entry?"; $ecocycGene="http://ecocyc.ai.sri.com:1555/new-image?type=GENE&object=EG"; $ecocycMap="http://ecocyc.ai.sri.com:1555/new-image?type=LOCUS-POSITION&object=EG"; $entrezProtein = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=p&form=6&Dopt=r&uid="; $entrezMedline = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=m&form=6&Dopt=r&uid="; $mailKr = 'krobison@nucleus.harvard.edu'; $claimer = "
Help: Overview, Fields

Note: This database copyright 1994 Harvard University. Robison, K., and Church, G.M. DPInteract: A database on DNA-protein interactions. (1994). Electronically published and manuscript in preparation.

Please contact $mailKr to report additional data or problems with this database.

DPInteract Home "; $subsPrimed = 1; return } $docs = '

Documentation

'; sub makeDpiSubs { if (/^AC/) { s/^AC\tBDBPG?/AC\tDP/; } if (m/^PG/ ) { s#^PG[\t]*([A-Za-z0-9]*)#PG $1 sub-family#o; } if (m/^PF/ && ! m/Unclassified[^_]/i) { s#^PF[\t]*([A-Za-z0-9]*)#PF $1 family#o; } if (m/^PC/ && ! m/Unclassified/i) { s#^PC[\t]*([A-Za-z0-9\-]*)#PC Class $1#o; } if (m/^SP/) { s#([A-Za-z0-9_]*)(.)([A-Z][0-9]{5})#SwissProt: $1$2$3 Expasy Entrez pI & MW#o; } if ( m/^XR|^RX/ ) { s#Gene: *([A-Za-z0-9_]*)#Gene: $1#oi; s#Family: *([A-Za-z0-9_]*)#Family: $1#io; s#Class: *([A-Za-z0-9_\-]*)#Class: $1#io; s#\t(BL[0-9]*)#\tBlocks: $1#o; s#\tEG([0-9]*)#\tEcoGene $1 Map EcoCyc#io; s#\t(PS[0-9]*)#\tProsite Pattern $1#o; s#\t(PDOC[0-9]*)#\tProsite Documentation $1#o; s#\tCGSC:?([0-9]*)#\tColi Genetic Stock Center: Site \#$1#o; s#\tPIR:([A-Z0-9]*)# PIR (Entrez):$1#o; s#\tPDB:([A-Z0-9]*)# PDB:$1#o; s#\tGI:([A-Z0-9]*)# Entrez:$1#o; s#\tPRINTS:\t*([A-Z0-9]+)# Prints:$1#oi; s#SAR:([A-Z]*[0-9]*)#SeqAnalRef $1#o; s#SCOP([0-9\.]*)#SCOP:$1#o; } s#^RM ([0-9]*)#RM Medline MUID $1#o; if (/^RL/ && /J Biol Chem/ && /\(199[5-9]\)/) { $tmp=$_; $tmp=~s/^.*J Biol Chem *//; $tmp=~s/://; $tmp=~s/-/ /; local($vol,$fp)=split(/[ \t\n]+/,$tmp); s#$#\tJBC On-Line#; } return; } sub cleanup { $oneHourInSeconds = 60*60; $cutoff=6*$oneHourInSeconds; @fileList=split(/[ \t\n]+/,`ls /tmp/dpi* /tmp/sh*`); $myUid=$<; for ($i=0; $i<=$#fileList; $i++) { ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat($fileList[$i]); if ($uid==$myUid && (($t-$mtime) > $cutoff)) { unlink($fileList[$i]); } } } sub acc2Name { local($targAcc)=@_; local($dum,$name,$regexp); $targAcc=~s/^DP0*//; $regexp="^DP0*$targAcc"; open(ACCTAB,"$dpiDocsPath/accessions.tab"); $name=$targAcc; while ( $_ = ) { if (/$regexp/o) { chop; ($dum,$name)=split; close(ACCTAB); } } $name=~s/[.][a-z]*$//i; return $name; } sub getEntry { # 0 args; get Gene $ARGV[0] # 1 arg; specify entry type in first arg, entry id in $ARGV[]0 # 2 args; specify entry type in first arg, entry id in 2nd arg local($targ,$entryType,*args,$origName); @args=@_; if ($#args<0) { $entryType="Gene"; } else { $entryType= $args[0]; } if ($#args<1) { $targ=$ENV{'QUERY_STRING'}; } else { $targ = $args[1]; } &primeRegSubs; $targ=~s/[ \t\n]*//g; $origName=$targ; if ($targ=~/^DP|^[0-9]/) { $targ=&acc2Name($targ); } &printHeader("$targ $entryType"); local($targFile); if ($entryType=~/gene/i) { $targFile ="genes/$targ.gene"; } elsif ($entryType=~/family/i) { $targFile ="fams/$targ.fam"; } elsif ($entryType=~/class/i) { $targFile = "$targ.class"; } $targFile="$dpiDocsPath/$targFile"; if ( -e $targFile ) { open (ENTRY,$targFile); print "
\n";
      $bCount=0;
      while ( $_ =  )
	{
	  &makeDpiSubs;
	  if (m/^B[PNS]\t/) { $bCount++; print ""; }
	  print $_;
	}
    }
  else
    {
      $targ="Not Found";
      print "

Not Found!

DPInteract does not contain an entry of type $entryType for $origName.

If you reached this point from an outside database, please contact the adminstrator(s) for that database; if you reached here from another location in DPInteract, please send E-mail to $mailKr."; } return $targ; } sub msearchForm { local($i); print '

',"\n"; print "Blank terms are ignored. Only gene entries are searched. A gene entry must match all terms to be returned (boolean AND).

\n"; for ($i=0; $i<4; $i++) { print 'Term: ',"\n"; print 'Search field(s)
',"\n"; } print '
'; print '
'; print '',"\n"; print '

',"\n"; } sub mrefForm { local($i); print "\n",'
',"\n"; print "Blank terms are ignored. Only gene entries are searched. A gene entry must match all terms to be returned (boolean AND).

\n"; for ($i=0; $i<=4; $i++) { print "\n",'Term: ',"\n"; print 'Search citation field(s)
',"\n"; } if (1==0) { print "
Reference Keyword\n"; } print '',"\n"; print '

',"\n"; } sub genSeqFile { local($seqData)=@_; local($rand)= $$; local($searchFile)="/tmp/dpi.$rand"; local(*seqDataLines,$line); $searchFile=~s/%3E/>/; open (SEARCH_INPUT, ">$searchFile"); @seqDataLines=split(/\n/,$seqData); print SEARCH_INPUT ">Your Query\n" unless ($seqDataLines[0]=~/^>/); foreach $line(@seqDataLines) { unless ($line=~/^>/) { $line=~s/[^A-Za-z]+//g; } print SEARCH_INPUT $line,"\n"; } close(SEARCH_INPUT); return ($searchFile); } 1;