#!/usr/local/bin/perl ################################################################# #Copyright: University of Utah Genome Center, University of Utah #Author:Andrew von Niederhausern #Email: avonnied@genetics.utah.edu #Created:12/21/2001 #Last Updated:07/14/2002 #Version: 1.0.3 #Purpose: Colored Fasta #WebSite: http://www.genome.utah.edu/genesnps #Unauthorized use is prohibited, this header may not be removed # or edited ################################################################# $genbank_dir = "genbank"; ## edit this for the path of the genbank files use Bio::Seq; use Bio::SeqIO; use CGI::Carp qw(carpout fatalsToBrowser); print "Content-type: text/html\n\n"; $buffer = $ENV{"QUERY_STRING"}; @pairs = split(/&/, $buffer); foreach $pair (@pairs) { ($Fname, $Fvalue) = split(/=/, $pair); $Fvalue =~ tr/+/ /; #$Fvalue =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg; $FORM{$Fname} = $Fvalue; } if ( exists $FORM{organism} ) { $genbank_dir = "$FORM{organism}/$genbank_dir"; } if ($FORM{"gene"}) { ############################################ # # # Colored Fasta # # # ############################################ $utr_color = "00AA00"; $gene = $FORM{"gene"}; &get_seqobj; ($sequence) = $seqobj->seq(); (@exons) = &feature_array($seqobj,'exon'); (@utrs) = &feature_array($seqobj,'UTR'); (@CDS) = &feature_array($seqobj,'CDS'); (@variations) = &feature_array($seqobj,'variation'); $number_exons = scalar(@exons); (@sequence) = split(//,$sequence); foreach (@exons) { $exonc++; my ($realexon) = $_->get_tag_values ("number"); my ($realgene) = $_->get_tag_values ("gene"); $start = $_->start(); $stop = $_->end(); $print{$start-1} .= qq{}; $label{$start-1} .= " Exon$realexon (Gene: $realgene)"; $print{$stop} .= qq{}; } print qq{ }; if ($variations[0] != -1) { foreach (@variations) { $start = $_->start(); $stop = $_->end(); eval{ ($implication) = $_->each_tag_value("implication"); }; eval{ ($standard_name) = $_->each_tag_value("standard_name"); }; eval{ ($alleles) = $_->each_tag_value("allele"); }; eval{ ($submitter) = $_->each_tag_value("submitter"); }; eval{ ($frequency) = $_->each_tag_value("frequency"); }; eval{ ($heterozygosity) = $_->each_tag_value("heterozygosity"); }; eval{ ($pop) = $_->each_tag_value("population"); }; if ($frequency == 0 && $heterozygosity != 0) { $frequency = $heterozygosity; } $implication =~ s/,/ /g; ($iupac) = &iupac("$alleles"); if ($standard_name =~ /^ss(\d+)_/) { $ss_number = $1; if ($pop =~ /\w+/) { $key = "$ss_number,&,$pop"; } else { $key = "$ss_number"; } $submitter{"$key"} = "$submitter"; $frequency{"$key"} = $frequency; $population{"$key"} = $pop; } if ($standard_name =~ /($gene-\d+)/) { $ss_number = $1; if ($pop =~ /\w+/) { $key = "$ss_number,&,$pop"; } else { $key = "$ss_number"; } #print "$ss_number = $submitter,$frequency,$pop
\n"; $submitter{"$key"} = "$submitter"; $frequency{"$key"} = $frequency; $population{"$key"} = $pop; $egp{"$key"}++; } if (!$printed_variation{"$start"}) { $label{$start} .= " var($start):[$alleles]"; } $printed_variation{"$start"}++; if ($implication =~ /nonsynon\s+(\w{3})\s+(\d+)\s+(\w{3})\s+\:\:\s+(\w{3})\s+(\d+)\s+(\w{3})/) { $codon_init = $1; $codon_pos = $2; $codon_change = $3; $pep_init = $4; $c_p = $5; $pep_change = $6; $peptide_orig{"$codon_pos"} = "$pep_init"; $peptid_change{"$codon_pos"} = "$pep_change"; # $print{$start-1} .= qq{}; # $extra = qq{}; $coding{"$ss_number"} = "$standard_name"; $coding2{"$ss_number"} = "$implication"; $ssnumbers{"$codon_pos"} .= "$ss_number,"; } elsif ($implication =~ /^synon\s+(\w{3})\s+(\d+)\s+(\w{3})\s+\:\:\s+(\w{3})\s+(\d+)\s+(\w{3})/) { $codon_init = $1; $codon_pos = $2; $codon_change = $3; $pep_init = $4; $c_p = $5; $pep_change = $6; $peptide_orig{"$codon_pos"} = "$pep_init"; $peptid_change{"$codon_pos"} = "$pep_change"; $coding{"$ss_number"} = "$standard_name"; $coding2{"$ss_number"} = "$implication"; $ssnumbers{"$codon_pos"} .= "$ss_number,"; } $print{$start-1} .= qq{}; $print{$stop} .= qq{$extra}; undef($extra); } } # print "HELLO @CDS\n"; foreach $c (@CDS) { $location = $c->location(); # print $location; eval{ ($rna_acc) = $c->each_tag_value("standard_name"); }; undef(@CDS2); eval{ # $location = $_->location(); if ($location =~ /Simple/) { $start = $c->start(); $stop = $c->end(); # print "
SIMPLE $start $stop
\n"; push(@CDS2,"$start,$stop"); } elsif ($location =~ /Split/) { @subloc = $location->sub_Location(); if (@subloc) { foreach $sl (@subloc) { $start = $sl->start(); $stop = $sl->end(); push(@CDS2,"$start,$stop"); # print "SPLIT $start $stop
\n"; } } } }; $s = $c->start(); # print "SSS $s\n"; $label{$s-1} .= " Translation($rna_acc)"; undef(@subloc); ($translation) = $c->each_tag_value('translation'); (@cds) = split(//,$translation); $cc = 0; $codon = 1; $codon_c = 0; foreach (@CDS2) { # print "$_
\n"; ($start,$stop) = split(/,/,$_); $stop2 = $stop + 1; $start2 = $start; while ($start2 <$stop2) { if ($next_cc) { if ($next_cc =~ /SINGLE/) { $next_cc = 0; } # $already_in = 21; # $in_exon{$start2+$next_cc} = "YES"; $next_key = $start2 + $next_cc; $amino_acid{$next_key} = $cds[$codon_c-1]; # print "On The NEXT $next_cc ".($start2+$next_cc)." = $cds[$codon_c-1]
\n"; # $start2++; undef($next_cc); next; } $cc++; if ($cc == $codon) { if ($start2 >= $stop) { $next_cc = $start2 - $stop; if ($next_cc == 0) { $next_cc = "SINGLE"; } # print "NEXT $next_cc
\n"; } else { $amino_acid{$start2+1} = $cds[$codon_c]; } # print "$cds[$codon_c] ".($start2+1)."
\n"; $codon += 3; $codon_c++; } $start2++; } $in_exon{$start-1} = "YES"; undef($already_in); $in_exon{$stop} = "NO"; } } ##################### UTRS ###################### foreach (sort {$a<=>$b} @utrs) { # $location = $_->location(); # print $location; eval{ $location = $_->location(); if ($location =~ /Simple/) { $start = $_->start(); $stop = $_->end(); # print "SIMPLE $start $stop\n"; $print{$start-1} .= qq{}; $label{$start-1} .= "| UTR"; $print{$stop} .= qq{}; } elsif ($location =~ /Split/) { @subloc = $location->sub_Location(); if (@subloc) { foreach $sl (@subloc) { $start = $sl->start(); $stop = $sl->end(); $print{$start-1} .= qq{}; $label{$start-1} .= " UTR"; $print{$stop} .= qq{}; } } } }; undef(@subloc); } print qq{
$gene Colored FASTA
}; $lb = 50; $spacer = 10; $count = 1; # $sequence_length = scalar(@sequence); $pep_count = 0; foreach (@sequence) { if ($in_exon) { if ($amino_acid{$pep_count}) { $ccprint++; $spacer2 .= $amino_acid{$pep_count}; } else { $spacer2 .= " "; } if ($pep_count == $spacer) { $spacer2 .= " "; } } if ($in_exon{$pep_count} =~ /YES/) { $in_exon = "YES"; } if ($in_exon{$pep_count} =~ /NO/) { $next_line_no_br = 21; undef($in_exon); } $this_line_count++; if ($in_exon{$pep_count} =~ /YES/ ) { $another_br = "
"; $spac = 0; $spac_space = 10; while ($spac < $this_line_count) { $spac++; $spacer2 .= " "; if ($spac == $spac_space) { $spacer2 .= " "; $spac_space+=10; } } # if ($amino_acid{$pep_count-1}) { # $spacer2 .= $amino_acid{$pep_count-1}; # } # $spacer2 .= "HERE"; } # if ($href{$pep_count}) { # if ($href{"$pep_count"} =~ /javascript/ && $pep_count == $lb) { # $href_next .= $href{"$pep_count"}; # } else { # print "$href{$pep_count}"; # } # } if ($print{"$pep_count"}) { if ($pep_count == $lb) { $print_next .= $print{"$pep_count"}; } else { print "$print{$pep_count}"; } if ($label{"$pep_count"}) { $label .= " | $label{$pep_count}"; } } if ($next_line_no_br) { if ($pep_count == $spacer) { $spacer2 .= " "; } } if ($pep_count == $lb) { print " $pep_count $label
\n"; # undef($another_br); if ($spacer2) { print "$spacer2$ccprint
\n"; undef($spacer2); } if ($next_line_no_br == 21) { undef($another_br); undef($next_line_no_br); } $this_line_count=0; undef($label); $lb+= 50; } if ($next_line_no_br) { $spacer2 .= " "; } if ($pep_count == $spacer) { print " "; $spacer += 10; } if ($print_next) { print "$print_next"; undef($print_next); } print $_; $pep_count++; } print qq{
}; } ############################################################## ################################ # ############################## Sub Routines # ################################ # ############################################################## sub connect { ####################### Database Connection ############################ $db = DBI->connect("dbi:Oracle:host=gemini7.genetics.utah.edu;sid=jamuna","reader","seqmore") or print qq{

ERROR:cannot connect to DB:
$DBI::errstr
There has been an error with the Database please send an email to GeneSNPs

\n }; } sub get_seqobj { # print "GETTING SEQOBJ $gene\n"; $in = Bio::SeqIO->new("-file"=>"$genbank_dir/$gene.gb","-format"=>"Genbank"); $seqobj = $in->next_seq(); # print "OBJ $seqobj\n"; } sub feature_array { undef(@returns); ($obj) = $_[0]; ($tag) = $_[1]; ($subtag) = $_[2]; ($subvalue) = $_[3]; @all = $obj->all_SeqFeatures(); foreach (@all) { if ($_->primary_tag =~ /$tag/) { if ($subtag && $subvalue) { eval{ ($cvalue) = $_->each_tag_value("$subtag"); if ($cvalue =~ /$subvalue/) { push(@returns,$_); } }; } else { push(@returns,$_); } } } if ($returns[0]) { return(@returns); } else { return(-1); } } sub iupac { ($code) = $_[0]; (@A) = split(/\//,$code); $A = join("/",sort @A); # print "AAA $A\n"; %iupac_dna = ( "A/C" => 'M', "A/G" => 'R', "A/T" => 'W', "G/T" => 'K', "C/T" => 'Y', "C/G" => 'S', "A/C/G" => 'V', "A/C/T" => 'H', "A/G/T" => 'D', "C/G/T" => 'B', "A/C/G/T" => 'n' ); return($iupac_dna{"$A"}); }