Skip to content

Commit

Permalink
Merge pull request #102 from k-kom/feature/refgeneindex-protocol
Browse files Browse the repository at this point in the history
Introduce `GeneAnnotationIndex` as an abstraction of `RefGeneIndex`
  • Loading branch information
federkasten committed Jun 23, 2024
2 parents e09551b + e00da13 commit a0971a0
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 21 deletions.
14 changes: 7 additions & 7 deletions src/varity/hgvs_to_vcf.clj
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,18 @@
(hgvs->vcf-variants hgvs gene seq-rdr rgidx))))

(defmethod hgvs->vcf-variants :ref-gene-index
([hgvs seq-rdr rgidx] (hgvs->vcf-variants hgvs nil seq-rdr rgidx))
([{:keys [kind transcript] :as hgvs} gene seq-rdr rgidx]
([hgvs seq-rdr gaidx] (hgvs->vcf-variants hgvs nil seq-rdr gaidx))
([{:keys [kind transcript] :as hgvs} gene seq-rdr gaidx]
(let [convert (case kind
:coding-dna coding-dna-hgvs->vcf-variants
:protein protein-hgvs->vcf-variants
(throw (ex-info "supported HGVS kinds are only `:coding-dna` and `:protein`"
{:type ::unsupported-hgvs-kind
:hgvs-kind kind})))
rgs (if (supported-transcript? transcript)
(rg/ref-genes (str transcript) rgidx)
(rg/ref-genes (str transcript) gaidx)
(if-not (string/blank? gene)
(rg/ref-genes gene rgidx)
(rg/ref-genes gene gaidx)
(throw (ex-info "Transcript (NM_, NR_, ENST, ENSP) or gene must be supplied."
{:type ::ref-gene-clue-not-found}))))]
(if (seq rgs)
Expand Down Expand Up @@ -115,9 +115,9 @@
(protein-hgvs->vcf-variants-with-coding-dna-hgvs hgvs gene seq-rdr rgidx))))

(defmethod protein-hgvs->vcf-variants-with-coding-dna-hgvs :ref-gene-index
([hgvs seq-rdr rgidx] (protein-hgvs->vcf-variants-with-coding-dna-hgvs nil seq-rdr rgidx))
([hgvs gene seq-rdr rgidx]
([hgvs seq-rdr gaidx] (protein-hgvs->vcf-variants-with-coding-dna-hgvs nil seq-rdr gaidx))
([hgvs gene seq-rdr gaidx]
{:pre [(= (:kind hgvs) :protein)]}
(->> (rg/ref-genes gene rgidx)
(->> (rg/ref-genes gene gaidx)
(keep #(prot/->vcf-variants-with-coding-dna-hgvs hgvs seq-rdr %))
(apply concat))))
31 changes: 18 additions & 13 deletions src/varity/ref_gene.clj
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,12 @@
[rgs]
(group-by :name2 rgs))

(defrecord RefGeneIndex [locus ref-seq gene])
(defprotocol GeneAnnotationIndex
(lookup [this ks] "Returns ref-gene records specified by the ks."))

(defrecord RefGeneIndex [locus ref-seq gene]
GeneAnnotationIndex
(lookup [this ks] (get-in this ks)))

(defn index
"Creates refGene index for search."
Expand All @@ -283,24 +288,24 @@
(defn ref-genes
"Searches refGene entries with ref-seq, gene or (chr, pos) using index,
returning results as sequence. See also varity.ref-gene/index."
([s rgidx]
(get-in rgidx (if (re-find #"^ENST|^(NC|LRG|NG|NM|NR|NP)_" s)
([s gaidx]
(lookup gaidx (if (re-find #"^ENST|^(NC|LRG|NG|NM|NR|NP)_" s)
[:ref-seq s]
[:gene s])))
([chr pos rgidx] (ref-genes chr pos rgidx 0))
([chr pos rgidx tx-margin]
([chr pos gaidx] (ref-genes chr pos gaidx 0))
([chr pos gaidx tx-margin]
{:pre [(<= 0 tx-margin max-tx-margin)]}
(let [pos-r (round-int pos pos-index-block)]
(->> (get-in rgidx [:locus
(->> (lookup gaidx [:locus
(normalize-chromosome-key chr)
[pos-r (+ pos-r pos-index-block)]])
(filter (fn [{:keys [tx-start tx-end]}]
(<= (- tx-start tx-margin) pos (+ tx-end tx-margin))))))))

(defn in-any-exon?
"Returns true if chr:pos is located in any ref-gene exon, else false."
[chr pos rgidx]
(->> (ref-genes chr pos rgidx)
[chr pos gaidx]
(->> (ref-genes chr pos gaidx)
(some #(in-exon? pos %))
(true?)))

Expand Down Expand Up @@ -380,12 +385,12 @@

(defn seek-gene-region
"Seeks chr:pos through exon entries in refGene and returns those indices"
([chr pos rgidx]
(seek-gene-region chr pos rgidx nil))
([chr pos rgidx name]
([chr pos gaidx]
(seek-gene-region chr pos gaidx nil))
([chr pos gaidx name]
(->> (if name
(ref-genes name rgidx)
(ref-genes chr pos rgidx))
(ref-genes name gaidx)
(ref-genes chr pos gaidx))
(map (fn [rg]
(let [sgn (case (:strand rg)
:forward identity
Expand Down
10 changes: 9 additions & 1 deletion test/varity/ref_gene_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,15 @@
"KRAS" ["ENST00000311936.8" "ENSP00000000001.8"]

"FOO" '()
"ENSP00000000001.8" '())))
"ENSP00000000001.8" '()))

(testing "ref-genes can handle GeneAnnotationIndex"
(let [gene-annotation {:name2 "KRAS" :tx-start 100 :tx-end 200}
index (reify rg/GeneAnnotationIndex
(lookup [_ _] [gene-annotation]))]
(is (= [gene-annotation] (rg/ref-genes "gene-name" index)))
(is (= [gene-annotation] (rg/ref-genes "chr" 100 index)))
(is (= [gene-annotation] (rg/ref-genes "chr" 100 index 10))))))

(defslowtest regions-slow
(cavia-testing "region-tests"
Expand Down

0 comments on commit a0971a0

Please sign in to comment.