Skip to content

Commit

Permalink
make roary-like IGR presence absence csv file
Browse files Browse the repository at this point in the history
  • Loading branch information
harry-thorpe committed Nov 10, 2016
1 parent 362fdec commit 6ffd514
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 15 deletions.
5 changes: 5 additions & 0 deletions bin/.Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
core_switched_region_divergences <- read.csv("/media/harry/extra/ST131/core_switched_region_divergences.csv", stringsAsFactors=FALSE)
View(core_switched_region_divergences)
library(cowplot)
core_switched_region_divergences_subset <- core_switched_region_divergences[(core_switched_region_divergences$Nuc_identity < 0.8 & core_switched_region_divergences$Length_identity < 0.8), ]
core_switched_region_divergences_subset <- core_switched_region_divergences[(core_switched_region_divergences$Nuc_identity < 0.8 | core_switched_region_divergences$Length_identity < 0.8), ]
11 changes: 6 additions & 5 deletions bin/cluster_presence_absence.pl
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,21 @@

@cluster_sorted_array=sort { $cluster_isolate_count_hash{$b} <=> $cluster_isolate_count_hash{$a} } keys %cluster_isolate_count_hash;

print OUTPUT "Cluster,Isolates,Sequences";
print OUTPUT "\"Gene\",\"Non-unique Gene name\",\"Annotation\",\"No. isolates\",\"No. sequences\",\"Avg sequences per isolate\",\"Genome Fragment\",\"Order within Fragment\",\"Accessory Fragment\",\"Accessory Order with Fragment\",\"QC\",\"Min group size nuc\",\"Max group size nuc\",\"Avg group size nuc\"";
foreach $isolate(@isolate_array){
print OUTPUT ",$isolate";
print OUTPUT ",\"$isolate\"";
}
print OUTPUT "\n";

foreach $cluster(@cluster_sorted_array){
$ave_seqs=($cluster_seq_count_hash{$cluster}/$cluster_isolate_count_hash{$cluster});

print OUTPUT "$cluster,$cluster_isolate_count_hash{$cluster},$cluster_seq_count_hash{$cluster}";
print OUTPUT "\"$cluster\",\"\",\"\",\"$cluster_isolate_count_hash{$cluster}\",\"$cluster_seq_count_hash{$cluster}\",\"$ave_seqs\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",\"\"";
foreach $isolate(@isolate_array){
if($cluster_hash{$cluster}{$isolate}){
print OUTPUT ",$cluster_hash{$cluster}{$isolate}";
print OUTPUT ",\"$cluster_hash{$cluster}{$isolate}\"";
}else{
print OUTPUT ",";
print OUTPUT ",\"\"";
}
}
print OUTPUT "\n";
Expand Down
25 changes: 15 additions & 10 deletions bin/roary_igry_combiner.pl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/perl -w

use Text::CSV;
#use Text::CSV;

$csv = Text::CSV->new({ sep_char => ',', binary => 1 });
#$csv = Text::CSV->new({ sep_char => ',', binary => 1 });

$out_dir=$ARGV[0];
$roary_dir=$ARGV[1];
Expand All @@ -13,12 +13,15 @@
while(<INPUT_R>){
$line=$_;
$line=~s/\R//g;
$line=~s/^"//;
$line=~s/"$//;
@line_array=split(/","/, $line);

if($csv->parse($line)){
#if($csv->parse($line)){

@line_array=$csv->fields();
# @line_array=$csv->fields();

if($line =~ /^"Gene",/){
if($line =~ /^Gene","/){
@header_array=@line_array;

$col_count=scalar(@line_array);
Expand All @@ -42,21 +45,23 @@
}
}
}
}
#}
}

open INPUT_I, "$out_dir/IGR_presence_absence.csv";
while(<INPUT_I>){
$line=$_;
chomp $line;
@line_array=split(/,/, $line);
$line=~s/\R//g;
$line=~s/^"//;
$line=~s/"$//;
@line_array=split(/","/, $line);

if($line =~ /^Cluster,Isolates,/){
if($line =~ /^Gene","/){
@header_array=@line_array;

$col_count=scalar(@line_array);

$isolate_sta=3;
$isolate_sta=14;
$isolate_end=$col_count - 1;

#for($i=$isolate_sta; $i<=$isolate_end; $i++){
Expand Down

0 comments on commit 6ffd514

Please sign in to comment.