Skip to content

Commit

Permalink
make core IGRome alignment
Browse files Browse the repository at this point in the history
  • Loading branch information
harry-thorpe committed Nov 13, 2016
1 parent 2576e55 commit e4f3467
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 15 deletions.
32 changes: 17 additions & 15 deletions bin/R_plotter.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
# }

#####
#library(cowplot)
library(ggplot2)
library(cowplot)
library(reshape2)

# roary_gene_divergences <- read.csv("/media/harry/extra/igry_test/roary_gene_divergences.csv")
Expand Down Expand Up @@ -134,22 +134,24 @@ switched_region_divergences <- read.csv(file=in_file, header=TRUE)
# ggplot(switched_region_divergences_len_summary, aes(x=Length_identity)) +
# geom_histogram(binwidth=0.01)

# switched_region_divergences_long <- melt(switched_region_divergences, id.vars="Gene", measure.vars=c("Length_identity", "Nuc_identity"))
#
# sr_div_hist <- ggplot(switched_region_divergences_long, aes(x=value)) +
# geom_histogram(binwidth=0.005) +
# scale_x_continuous(limits=c(0,1.01)) +
# labs(x="Identity", y="Count") +
# facet_grid(variable~., scales="free")
#
# tiff("/media/harry/extra/igry_test/figures/sr_div_hist.tif", height=5, width=10, units="in", res=100)
#
# sr_div_hist
#
# dev.off()
switched_region_divergences_long <- melt(switched_region_divergences, id.vars="Gene", measure.vars=c("Length_identity", "Nuc_identity"))

sr_div_hist <- ggplot(switched_region_divergences_long, aes(x=value)) +
geom_histogram(binwidth=0.005) +
geom_vline(xintercept=0.9, linetype="dashed", colour="red") +
labs(x="Identity", y="Count") +
facet_grid(variable~., scales="free")

out_file=paste(out_dir, "/switched_region_divergences_hist.tif", sep="")

tiff(filename=out_file, height=5, width=10, units="in", res=100)

sr_div_hist

dev.off()

sr_div_point <- ggplot(switched_region_divergences, aes(x=Length_identity, y=Nuc_identity)) +
geom_point() +
geom_point(alpha=0.1) +
geom_hline(yintercept=0.9, linetype="dashed", colour="red") +
geom_vline(xintercept=0.9, linetype="dashed", colour="red") +
labs(x="Length identity", y="Nucleotide identity")
Expand Down
92 changes: 92 additions & 0 deletions bin/core_alignment_creator.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/perl -w

$out_dir=$ARGV[0];

open OUTPUT, ">$out_dir/core_IGR_alignment.fasta";

mkdir "$out_dir/isolate_core_IGR_tmp";

open INPUT, "$out_dir/isolates.txt";
while(<INPUT>){
$line=$_;
chomp $line;

push @isolate_array, $line;

open OUTPUT_TMP, ">$out_dir/isolate_core_IGR_tmp/$line.fasta";
print OUTPUT_TMP ">$line\n";
}
close INPUT;
close OUTPUT_TMP;

$isolate_count=scalar(@isolate_array);
$isolate_core_count=int($isolate_count * 0.99);

open INPUT_I, "$out_dir/IGR_presence_absence.csv";
while(<INPUT_I>){
$line=$_;
$line=~s/\R//g;
$line=~s/^"//;
$line=~s/"$//;
@line_array=split(/","/, $line);

if($line !~ /^Gene","/){
if($line_array[3] >= $isolate_core_count && $line_array[5] == 1){
push @cluster_array, $line_array[0];
}
}
}
close INPUT_I;

foreach $cluster(@cluster_array){

%cluster_seq_hash=();
$len=0;
open INPUT, "$out_dir/cluster_intergenic_alignment_files/${cluster}_aligned.fasta";
while(<INPUT>){
$line=$_;
chomp $line;

if($line =~ /^>(.+)/){
$cluster_id=$1;
@cluster_id_array=split(/_\+_\+_/, $cluster_id);
$isolate=$cluster_id_array[0];
}else{
$cluster_seq_hash{$isolate}=$line;

$len=length($line);
}
}
close INPUT;

foreach $isolate(@isolate_array){
open OUTPUT_TMP, ">>$out_dir/isolate_core_IGR_tmp/$isolate.fasta";
if(!$cluster_seq_hash{$isolate}){
for($i=0; $i<$len; $i++){
print OUTPUT_TMP "-";
}
}else{
print OUTPUT_TMP "$cluster_seq_hash{$isolate}";
}
}
}
close OUTPUT_TMP;

foreach $isolate(@isolate_array){
open OUTPUT_TMP, ">>$out_dir/isolate_core_IGR_tmp/$isolate.fasta";
print OUTPUT_TMP "\n";
}
close OUTPUT_TMP;

foreach $isolate(@isolate_array){
open INPUT, "$out_dir/isolate_core_IGR_tmp/$isolate.fasta";
while(<INPUT>){
$line=$_;
chomp $line;

print OUTPUT "$line\n";
}
}
close INPUT;
close OUTPUT;

0 comments on commit e4f3467

Please sign in to comment.