#!/usr/local/bin/perl
&primeRegSubs;
if ( $#ARGV < 0 ) # No arguments
{
&printHeader("Search Sites",1);
print 'This index can be searched using a case-insensitive PERL regular expression. Partial words will be matched.';
print ' Some of the available meta-characters
- ? Question mark
- allow 0 or 1 of preceding expression; at?c will match ac or atc
- * Asterisk
- match 0 or more of the preceding expression.
- [] Square brackets
- match any one of the nucleotides inside the brackets. at[cg]at will match atcat and atgat. Alternatively, you may use IUPAC ambiguity codes.
- . Period
- match any nucleotide at this position.
- | Vertical bar (style)
- Logical OR. gaa|ttc will match either trinucleotide (this is the way to search both strands with a non-palindromic query)
- IUPAC ambiguity codes are supported
Example queries
REBASE (Restriction Enzyme Database)
';
print $claimer;
}
else
{ $regexp = $ENV{'QUERY_STRING'};
if ($regexp =~ m/%/)
{
$regexp =~ s/%3F/?/g;
$regexp =~ s/%7C/|/g;
$regexp =~ s/%5B/[/g;
$regexp =~ s/%5C/\\/g;
$regexp =~ s/%5D/]/g;
$regexp =~ s/%5B/[/g;
}
$regexp =~ s/[+]{1,}/ /g;
$regexp =~ s/ or /|/g;
# IUPAC ambiguity code expansion
$regexp =~ s/k/[gt]/gi;
$regexp =~ s/m/[ac]/gi;
$regexp =~ s/[nx]/./gi;
$regexp =~ s/s/[cg]/gi;
$regexp =~ s/r/[ag]/gi;
$regexp =~ s/u/t/gi;
$regexp =~ s/v/[acg]/gi;
$regexp =~ s/w/[at]/gi;
$regexp =~ s/y/[tc]/gi;
$regexp =~ s/b/[cgt]/gi;
$regexp =~ s/d/[agt]/gi;
$regexp =~ s/h/[act]/gi;
&printHeader("Site Search $regexp");
&primeRegSubs;
open (SEQS,"$dpiDocsPath/sites.fasta");
print "Important
This search is only on the actual sequence fragments contained on the database. Sites which exist in the genome may be missed if only a portion of the site is represented in our database.\n";
print "
\t+ Proven\t? Candidate\t- Pseudo site or non-binding mutant\n";
print "
\n";
$regexp =~ tr/A-Z/a-z/;
$regexpUC = $regexp; $regexpUC =~ tr/a-z/A-Z/;
while ( $_ = )
{
chop;
if (m/^>/) { &output; $id = $_; $sequence = ""; } # id
else { s/[ \t\-]//g; $sequence = "$sequence$_"; }
}
&output;
print "
";
}
print "$claimer";
exit;
sub output
{
if ($sequence =~ m/$regexp/oi)
{
$id=~s!^([+\-\?])([a-zA-Z0-9_]*)([ \t]*#)([0-9]*)!$1$2$3$4!o;
$id=~s!^>(.)([a-zA-Z0-9_]*)([ \t]*#)([0-9]*)!>$1$2$3$4!o;
$sequence =~ tr/A-Z/a-z/;
$sequence =~ s/($regexp)/\U$1\L/gi;
print "$id\n$sequence\n";
}
}
#!/usr/local/bin/perl
&primeRegSubs;
sub printHeader
{
($title,$isIndex,$notScript)=@_;
if ($notScript==1) {} else { print "Content-type: text/html\n"; }
print "\n";
print "\n";
print "DPInteract: $title\n";
if ($isIndex==1) { print "\n"; }
print "\n";
print "$title
\n";
}
sub primeRegSubs
{
$RelayBase = "http://golgi.harvard.edu/htbin/relay";
$Cgsc = "http://cgsc.biology.yale.edu/cgi-bin/sybgw/cgsc/Site/";
$GenbankA = "$RelayBase/genbank-acc?";
$Medline = "http://golgi.harvard.edu/htbin/dpinteract/medline-muid?";
$MedNeigh = 'http://atlas.nlm.nih.gov:5700/htbin/enf/entrezmmnei?';
$Pir = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=p&form=6&uid=";
$Pdb = 'http://expasy.hcuge.ch/cgi-bin/pdb_entry_or_image?';
$Prints = 'http://www.biochem.ucl.ac.uk/cgi-bin/attwood/DoPRINTS.pl?cmd_a=Display&qua_a=/Full&fun_a=Code&qst_a=';
$Blocks = 'http://www.blocks.fhcrc.org/blocks-bin/getblock.www?';
$dpiServer = "http://arep.med.harvard.edu";
$dpiCgi = "$dpiServer/cgi-bin/dpinteract";
$dpiDocsPath = "/usr/arep/a3/httpd/htdocs/dpinteract";
$dpiDocsUrl ="$dpiServer/dpinteract";
$dpiFam = "$dpiCgi/family?";
$dpiSfam = "$dpiCgi/subfamily?";
$dpiGene = "$dpiCgi/gene?";
$dpiClass = "$dpiCgi/class?";
$dpiMsearch = "$dpiCgi/msearch?";
$dpiSearch = "$dpiCgi/search?";
$dpiRefSearch = "$dpiCgi/ref?";
$dpiMrefSearch = "$dpiCgi/mref?";
$dpiSeqregSearch = "$dpiCgi/seqreg?";
$dpiBlastnSearch = "$dpiCgi/blastn";
$dpiMatSearch = "$dpiCgi/matsrch";
$Expasy = "http://expasy.hcuge.ch";
$Prosite = "$Expasy/cgi-bin/get-prosite-entry?";
$Prodoc = "$Expasy/cgi-bin/get-prodoc-entry?";
$Scop = "http://www.bio.cam.ac.uk/scop/data";
# $Sprot = "$Expasy/cgi-bin/get-sprot-entry?";
$Sprot = "http://expasy.hcuge.ch/cgi-bin/get-sprot-entry?";
$SprotPi = "http://expasy.hcuge.ch/cgi-bin/getpI?";
$seqAnalRef = "$Expasy/cgi-bin/get-seqanalr-entry?";
$ecocycGene="http://ecocyc.ai.sri.com:1555/new-image?type=GENE&object=EG";
$ecocycMap="http://ecocyc.ai.sri.com:1555/new-image?type=LOCUS-POSITION&object=EG";
$entrezProtein = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=p&form=6&Dopt=r&uid=";
$entrezMedline = "http://www3.ncbi.nlm.nih.gov/htbin-post/Entrez/query?db=m&form=6&Dopt=r&uid=";
$mailKr = 'krobison@nucleus.harvard.edu';
$claimer = "
Help: Overview, Fields
Note: This database copyright 1994 Harvard University.
Robison, K., and Church, G.M. DPInteract: A database on DNA-protein interactions. (1994). Electronically published and manuscript in preparation.
Please contact $mailKr to report additional data or problems with this database.
DPInteract Home