Skip to content

Commit

Permalink
rename scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
harry-thorpe committed Mar 15, 2017
1 parent 00807b0 commit c37414b
Show file tree
Hide file tree
Showing 16 changed files with 51 additions and 29 deletions.
74 changes: 48 additions & 26 deletions bin/piggy
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ then
printf "cd-hit found: %s\n" "$cd_hit_command"
else
printf "Error: cd-hit not found\n"
exit
fi

# mafft
Expand All @@ -71,6 +72,7 @@ then
printf "mafft found: %s\n" "$mafft_command"
else
printf "Error: mafft not found\n"
exit
fi

# blastn
Expand All @@ -94,6 +96,7 @@ then
printf "blastn found: %s\n" "$blastn_command"
else
printf "Error: blastn not found\n"
exit
fi

# makeblastdb
Expand All @@ -117,6 +120,7 @@ then
printf "makeblastdb found: %s\n" "$makeblastdb_command"
else
printf "Error: makeblastdb not found\n"
exit
fi


Expand Down Expand Up @@ -147,7 +151,7 @@ fi

if [ -z "$roary_dir" ]
then
printf "No Roary output directory specified.\n"
printf "Error: No Roary output directory specified.\n"
exit
fi

Expand Down Expand Up @@ -182,7 +186,7 @@ fi

if [ ! -d "$in_dir" ]
then
printf "Input folder doesn't exist.\n"
printf "Error: Input folder doesn't exist.\n"
exit
fi

Expand Down Expand Up @@ -215,7 +219,7 @@ if [ "$isolate_count" -gt 0 ]
then
printf "$isolate_count isolates found.\n"
else
printf "$isolate_count isolates found in the input folder.\n"
printf "Error: $isolate_count isolates found in the input folder.\n"
exit
fi

Expand All @@ -233,9 +237,9 @@ then
mkdir "$out_dir/coordinate_files"
fi

if [ -f "$out_dir/output_fasta.fasta" ]
if [ -f "$out_dir/IGR_sequences.fasta" ]
then
rm "$out_dir/output_fasta.fasta"
rm "$out_dir/IGR_sequences.fasta"
fi

if [ ! -d "$out_dir/isolate_intergenic_files" ]
Expand All @@ -257,22 +261,20 @@ fi

for isolate in ${isolate_array[@]}
do
gff_modifier.pl "$out_dir/gff_files/$isolate.gff" >> "$out_dir/log.txt"
piggy_gff_modifier.pl "$out_dir/gff_files/$isolate.gff" >> "$out_dir/log.txt"

gene_intergenic_coordinate_extracter.pl "$isolate" "$out_dir/gff_files/$isolate.gff.modified" "$out_dir/coordinate_files" >> "$out_dir/log.txt"
piggy_gene_intergenic_coordinate_extracter.pl "$isolate" "$out_dir/gff_files/$isolate.gff.modified" "$out_dir/coordinate_files" >> "$out_dir/log.txt"

if [ ! -d "$out_dir/isolate_intergenic_files/$isolate" ]; then
mkdir "$out_dir/isolate_intergenic_files/$isolate"
fi

intergenic_sequence_extracter.pl "$isolate" "$out_dir/gff_files/$isolate.gff.modified" "$out_dir/coordinate_files/${isolate}_intergenic_coordinates.tab" "$out_dir" "$out_dir/isolate_intergenic_files/$isolate" >> "$out_dir/log.txt"
piggy_intergenic_sequence_extracter.pl "$isolate" "$out_dir/gff_files/$isolate.gff.modified" "$out_dir/coordinate_files/${isolate}_intergenic_coordinates.tab" "$out_dir" "$out_dir/isolate_intergenic_files/$isolate" >> "$out_dir/log.txt"
done

printf "Clustering IGRs...\n" >> "$out_dir/log.txt";printf "Clustering IGRs...\n"

$cd_hit_command -i $out_dir/output_fasta.fasta -o "$out_dir/output_fasta_clustered.fasta" -T "$threads" -n 10 -d 0 -c "$nuc_identity" -G 0 -s "$len_identity" -aS 0.9 -g 0 -r 1 -mask N -M 0 &> /dev/null

rm "$out_dir/output_fasta_clustered.fasta"
$cd_hit_command -i $out_dir/IGR_sequences.fasta -o "$out_dir/IGR_sequences_clustered.fasta" -T "$threads" -n 10 -d 0 -c "$nuc_identity" -G 0 -s "$len_identity" -aS 0.9 -g 0 -r 1 -mask N -M 0 &> /dev/null

printf "IGRs clustered.\n" >> "$out_dir/log.txt";printf "IGRs clustered.\n"

Expand All @@ -291,7 +293,7 @@ then
mkdir "$out_dir/cluster_representative_files"
fi

cluster_file_creator.pl "$out_dir" >> "$out_dir/log.txt"
piggy_cluster_file_creator.pl "$out_dir" >> "$out_dir/log.txt"

printf "Doing all-vs-all IGR cluster search.\n" >> "$out_dir/log.txt";printf "Doing all-vs-all IGR cluster search.\n"

Expand All @@ -301,17 +303,17 @@ $blastn_command -db "$out_dir/representative_clusters_db" -query "$out_dir/repre

printf "Merging IGR clusters.\n" >> "$out_dir/log.txt";printf "Merging IGR clusters.\n"

cluster_merger.pl "$out_dir/representative_clusters_blast_out.tab" "$out_dir/representative_clusters_merged.tab" "$out_dir/cluster_intergenic_files" "$out_dir/clusters.txt" "$out_dir/representative_clusters.fasta" "$nuc_identity" "$len_identity"
piggy_cluster_merger.pl "$out_dir/representative_clusters_blast_out.tab" "$out_dir/representative_clusters_merged.tab" "$out_dir/cluster_intergenic_files" "$out_dir/clusters.txt" "$out_dir/representative_clusters.fasta" "$nuc_identity" "$len_identity"

readarray -t cluster_array < "$out_dir/clusters.txt"

cluster_count=${#cluster_array[@]}

printf "%i IGR clusters found after merging.\n" "$cluster_count" >> "$out_dir/log.txt";printf "%i IGR clusters found after merging.\n" "$cluster_count"

cluster_presence_absence.pl "$out_dir" >> "$out_dir/log.txt"
piggy_cluster_presence_absence.pl "$out_dir" >> "$out_dir/log.txt"

roary_piggy_combiner.pl "$out_dir" "$roary_dir" >> "$out_dir/log.txt"
piggy_roary_piggy_combiner.pl "$out_dir" "$roary_dir" >> "$out_dir/log.txt"

if [ "$fast" -eq 0 ]
then
Expand All @@ -329,7 +331,7 @@ do
$mafft_command --thread "$threads" --retree 2 --maxiterate 0 --adjustdirection --quiet "$out_dir/cluster_intergenic_files/${cluster}.fasta" 1> "$out_dir/cluster_intergenic_files/${cluster}_aligned_tmp.fasta" 2> /dev/null
#$mafft_command --thread "$threads" --localpair --maxiterate 1000 --adjustdirection --quiet "$out_dir/cluster_intergenic_files/${cluster}.fasta" 1> "$out_dir/cluster_intergenic_files/${cluster}_aligned_tmp.fasta" 2> /dev/null

fasta_converter.pl "$out_dir/cluster_intergenic_files/${cluster}_aligned_tmp.fasta" "$out_dir/cluster_intergenic_files/${cluster}_aligned.fasta"
piggy_fasta_converter.pl "$out_dir/cluster_intergenic_files/${cluster}_aligned_tmp.fasta" "$out_dir/cluster_intergenic_files/${cluster}_aligned.fasta"

cp "$out_dir/cluster_intergenic_files/${cluster}_aligned.fasta" "$out_dir/cluster_intergenic_alignment_files/${cluster}_aligned.fasta"
else
Expand All @@ -347,24 +349,24 @@ printf "IGR clusters aligned.\n" >> "$out_dir/log.txt";printf "IGR clusters alig

#printf "Calculating gene divergences...\n" >> "$out_dir/log.txt";printf "Calculating gene divergences...\n"

#divergence_checker.pl "$roary_dir/pan_genome_sequences" "$out_dir" "roary_gene_divergences.csv"
#piggy_divergence_checker.pl "$roary_dir/pan_genome_sequences" "$out_dir" "roary_gene_divergences.csv"

#printf "Gene divergences calculated.\n" >> "$out_dir/log.txt";printf "Gene divergences calculated.\n"

printf "Calculating IGR divergences...\n" >> "$out_dir/log.txt";printf "Calculating IGR divergences...\n"

divergence_checker.pl "$out_dir/cluster_intergenic_alignment_files" "$out_dir" "cluster_IGR_divergences.csv"
piggy_divergence_checker.pl "$out_dir/cluster_intergenic_alignment_files" "$out_dir" "cluster_IGR_divergences.csv"

printf "IGR divergences calculated.\n" >> "$out_dir/log.txt";printf "IGR divergences calculated.\n"

core_alignment_creator.pl "$out_dir" >> "$out_dir/log.txt"
piggy_core_alignment_creator.pl "$out_dir" >> "$out_dir/log.txt"

if [ ! -d "$out_dir/switched_region_files" ]
then
mkdir "$out_dir/switched_region_files"
fi

switched_region_creator.pl "$out_dir/roary_piggy_combined.tab" "$out_dir/cluster_intergenic_files" "$out_dir" >> "$out_dir/log.txt"
piggy_switched_region_creator.pl "$out_dir/roary_piggy_combined.tab" "$out_dir/cluster_intergenic_files" "$out_dir" >> "$out_dir/log.txt"

if [ ! -d "$out_dir/switched_region_alignment_files" ]
then
Expand All @@ -379,7 +381,7 @@ for switched_region in ${switched_region_array[@]}
do
$blastn_command -query "$out_dir/switched_region_files/${switched_region}.fasta" -subject "$out_dir/switched_region_files/${switched_region}.fasta" -dust no -outfmt 6 -out "$out_dir/blast_out.tab"

blast_hit=$(blast_parser.pl "$out_dir/blast_out.tab" "$out_dir/switched_region_files/${switched_region}.fasta" "$out_dir/switched_region_files/${switched_region}_aligned.fasta")
blast_hit=$(piggy_blast_parser.pl "$out_dir/blast_out.tab" "$out_dir/switched_region_files/${switched_region}.fasta" "$out_dir/switched_region_files/${switched_region}_aligned.fasta")

rm "$out_dir/blast_out.tab"

Expand All @@ -388,7 +390,7 @@ do
$mafft_command --thread "$threads" --retree 2 --maxiterate 0 --adjustdirection --quiet "$out_dir/switched_region_files/${switched_region}.fasta" 1> "$out_dir/switched_region_files/${switched_region}_aligned_tmp.fasta" 2> /dev/null
#$mafft_command --thread "$threads" --localpair --maxiterate 1000 --adjustdirection --quiet "$out_dir/switched_region_files/${switched_region}.fasta" 1> "$out_dir/switched_region_files/${switched_region}_aligned_tmp.fasta" 2> /dev/null

fasta_converter.pl "$out_dir/switched_region_files/${switched_region}_aligned_tmp.fasta" "$out_dir/switched_region_files/${switched_region}_aligned.fasta"
piggy_fasta_converter.pl "$out_dir/switched_region_files/${switched_region}_aligned_tmp.fasta" "$out_dir/switched_region_files/${switched_region}_aligned.fasta"
fi

cp "$out_dir/switched_region_files/${switched_region}_aligned.fasta" "$out_dir/switched_region_alignment_files/${switched_region}_aligned.fasta"
Expand All @@ -398,7 +400,7 @@ printf "Candidate switched IGRs aligned.\n" >> "$out_dir/log.txt";printf "Candid

printf "Calculating candidate switched IGR divergences...\n" >> "$out_dir/log.txt";printf "Calculating candidate switched IGR divergences...\n"

divergence_checker.pl "$out_dir/switched_region_alignment_files" "$out_dir" "switched_region_divergences.csv"
piggy_divergence_checker.pl "$out_dir/switched_region_alignment_files" "$out_dir" "switched_region_divergences.csv"

printf "Candidate switched IGR divergences calculated.\n" >> "$out_dir/log.txt";printf "Candidate switched IGR divergences calculated.\n"

Expand All @@ -408,7 +410,7 @@ then

# Needs R, Rscript, ggplot2, reshape2.

SR_plotter.R "$out_dir" "$nuc_identity" "$len_identity" &> /dev/null
piggy_SR_plotter.R "$out_dir" "$nuc_identity" "$len_identity" &> /dev/null
fi

fi
Expand All @@ -419,10 +421,30 @@ then

# Needs R, Rscript, ggplot2, reshape2.

R_plotter.R "$out_dir" "$roary_dir" &> /dev/null
piggy_R_plotter.R "$out_dir" "$roary_dir" &> /dev/null
fi

# Cleanup

#rm -r "$out_dir/cluster_intergenic_files"
mkdir "$out_dir/plots" &> /dev/null
mv "$out_dir/"*".tif" "$out_dir/plots"

rm -r "$out_dir/isolate_intergenic_files"
rm -r "$out_dir/cluster_intergenic_files"
rm -r "$out_dir/cluster_representative_files"
rm -r "$out_dir/switched_region_files"
rm -r "$out_dir/isolate_core_IGR_tmp"

rm "$out_dir/clusters.txt"
rm "$out_dir/isolates.txt"
rm "$out_dir/switched_regions.txt"
rm "$out_dir/representative_clusters_db.nhr"
rm "$out_dir/representative_clusters_db.nin"
rm "$out_dir/representative_clusters_db.nsq"
rm "$out_dir/representative_clusters_merged.tab"
rm "$out_dir/representative_clusters.fasta"
rm "$out_dir/representative_clusters_blast_out.tab"
rm "$out_dir/representative_clusters_blast_out_modified.tab"
rm "$out_dir/IGR_sequences_clustered.fasta"
rm "$out_dir/IGR_sequences_clustered.fasta.clstr"

File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
open OUTPUT_REP, ">$out_dir/representative_clusters.fasta";

$count=0;
open INPUT, "$out_dir/output_fasta_clustered.fasta.clstr";
open INPUT, "$out_dir/IGR_sequences_clustered.fasta.clstr";
while(<INPUT>){
$line=$_;
chomp $line;
Expand Down Expand Up @@ -64,7 +64,7 @@
close OUTPUT_REP;

$count=0;
open INPUT, "$out_dir/output_fasta_clustered.fasta.clstr";
open INPUT, "$out_dir/IGR_sequences_clustered.fasta.clstr";
while(<INPUT>){
$line=$_;
chomp $line;
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
$max_len=1000;
$max_n_prop=0.1;

open OUTPUT, ">>$out_dir/output_fasta.fasta";
open OUTPUT, ">>$out_dir/IGR_sequences.fasta";

$include=0;
open INPUT, "$in_file";
Expand Down
File renamed without changes.
File renamed without changes.

0 comments on commit c37414b

Please sign in to comment.