Skip to content

Commit

Permalink
Work on parsing xml
Browse files Browse the repository at this point in the history
  • Loading branch information
krcurtis committed Aug 13, 2022
1 parent 37e803d commit 9c3202b
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 11 deletions.
47 changes: 45 additions & 2 deletions app/Main.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,49 @@
--------------------------------------------------------------------------------
-- Copyright 2022 Fred Hutchinson Cancer Center
-- Load DIVA XML file and generate a corresponding GatingML file
-- WARNING GatingML gate coordinates where the logicle transform was used in the DIVA XML are hopefully close but will not be exact


module Main where

import Lib
import Options.Applicative
import qualified Options.Applicative as O
import Data.Semigroup ((<>))



--------------------------------------------------------------------------------
-- command line argument parsing

data CommandParameters = CommandParameters
{ arg_diva_file :: String
, arg_output_file :: String
}

command_parameters :: O.Parser CommandParameters
command_parameters = CommandParameters
<$> O.strOption
( O.long "input_diva_xml"
<> O.short 'i'
<> O.metavar "FILE"
<> O.help "DIVA formatted XML file" )
<*> O.strOption
( O.long "output_file"
<> O.short 'o'
<> O.metavar "FILE"
<> O.help "output GatingML file" )

main :: IO ()
main = someFunc
main = O.execParser opts >>= run_app
where
opts = O.info (command_parameters O.<**> O.helper)
( O.fullDesc
<> O.progDesc "Extract GatingML gates from DIVA XML"
<> O.header "extract-gating-ml - app for getting gates from DIVA XML files")


--------------------------------------------------------------------------------


run_app :: CommandParameters -> IO ()
run_app = undefined -- TODO
13 changes: 10 additions & 3 deletions extract-gating-ml.cabal
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
cabal-version: 1.12

-- This file has been generated from package.yaml by hpack version 0.33.0.
-- This file has been generated from package.yaml by hpack version 0.34.4.
--
-- see: https://github.com/sol/hpack
--
-- hash: 69a5323e0b29b197520c84ad1a8aaf0bd4b78388ae4fdcf0af9b7ff03a4f9e92

name: extract-gating-ml
version: 0.1.0.0
Expand Down Expand Up @@ -38,6 +36,9 @@ library
src
build-depends:
base >=4.7 && <5
, containers
, hspec
, optparse-applicative >=0.15.1
, text
, xml
default-language: Haskell2010
Expand All @@ -51,7 +52,10 @@ executable extract-gating-ml-exe
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
base >=4.7 && <5
, containers
, extract-gating-ml
, hspec
, optparse-applicative >=0.15.1
, text
, xml
default-language: Haskell2010
Expand All @@ -66,7 +70,10 @@ test-suite extract-gating-ml-test
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
base >=4.7 && <5
, containers
, extract-gating-ml
, hspec
, optparse-applicative >=0.15.1
, text
, xml
default-language: Haskell2010
4 changes: 4 additions & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ dependencies:
- base >= 4.7 && < 5
- xml
- text
- hspec
- optparse-applicative >= 0.15.1
- containers


library:
source-dirs: src
Expand Down
119 changes: 115 additions & 4 deletions src/ParseDiva.hs
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
--------------------------------------------------------------------------------
--- Parse FACSDiva xml gating file for conversion to Gating-ML 2.0



{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# OPTIONS_GHC -Werror=missing-fields #-}


module ParseDiva where

Expand All @@ -13,9 +17,9 @@ module ParseDiva where
import Text.XML.Light
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Data.Maybe (catMaybes)


import Data.Maybe (catMaybes, fromJust, isJust)
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set

{-
Expand Down Expand Up @@ -88,9 +92,27 @@ data DivaGate = DivaGate

type DivaGateSet = [DivaGate]



data DivaInfo = DivaInfo { di_specimens :: [String] -- named specimens, there might be specimen sections without names, which are not included
, di_specimen_tubes :: Map.Map String [DivaTube]
-- what about tube name to DivaTube, this might make sense if tube names don't overlap ...
, di_has_overlap :: Bool -- some tube names overlap
, di_overlapping_tube_names :: [String] --list of tube names that occur in more than one specimen
, di_tube_names :: [String] -- unique listing of the tube names
}
deriving (Show)

data DivaTube = DivaTube { dt_tube_name :: String
, dt_gates :: [DivaGate]
-- TODO compensation
}
deriving (Show)


--------------------------------------------------------------------------------

diva_file = "/home/kcurtis/project-base/software/apps/extract-gating-ml/testdata/PE_2.xml"


simple_name s = QName s Nothing Nothing

Expand All @@ -105,6 +127,18 @@ convert_to = read . strContent




load_root_node :: String -> IO Element
load_root_node filename = do
source <- T.readFile filename
let contents = parseXML source
entries = concatMap (findElements $ simple_name "bdfacs") (onlyElems contents)

if length entries == 1
then return $ entries !! 0
else error $ "ERROR could not load bdfacs root node from" <> filename

{-
gate_contents :: IO ()
gate_contents = do
source <- T.readFile diva_file
Expand All @@ -116,12 +150,38 @@ gate_contents = do
if 1 == length entries
then print . show $ (gates_in_specimens (entries !! 0)) !! 0
else print "ERROR expecting bdfacs root xml node"
-}


specimen_nodes :: Element -> [Element]
specimen_nodes root_node = findElements (simple_name "specimen") root_node

specimen_listing :: Element -> [String]
specimen_listing root_node = catMaybes . map (findAttr (simple_name "name")) $ nodes
where
nodes = specimen_nodes root_node

specimen_mapping :: Element -> Map.Map String Element
specimen_mapping root_node = Map.fromList specimens'
where
specimens = [ (findAttr (simple_name "name") s, s) | s <- specimen_nodes root_node]
specimens' = [ (fromJust name, s) | (name, s) <- specimens, isJust name]



{-
gates_in_specimens :: Element -> [DivaGateSet]
gates_in_specimens root_node = map collect_gate_set specimen_elements
where
specimen_elements = findElements (simple_name "specimen") root_node
-}

gates_in_specimen :: Element -> String -> Maybe DivaGateSet
gates_in_specimen root_node specimen_name = fmap collect_gate_set s
where
mapping = specimen_mapping root_node
s = Map.lookup specimen_name mapping



get_gates_element :: Element -> Element
Expand Down Expand Up @@ -190,3 +250,54 @@ parse_point p = do
x <- fmap read $ findAttr (simple_name "x") p
y <- fmap read $ findAttr (simple_name "y") p
return (x,y)


{-
compensation_in_specimen :: Element -> Maybe [ (String, [Double])]
compensation_in_specimen node = do
instrument_settings <- findElement (simple_name "instrument_settings") node
parameter_nodes <-
instrument_settings_nodes :: Element -> [Element]
instrument_settings_nodes node = findElements (simple_name "instrument_settings") node
tube_nodes :: Element -> [Element]
tube_nodes node = findElements (simple_name "tube") node
-}

parse_diva_tube :: Element -> DivaTube
parse_diva_tube tube_node = DivaTube{..}
where
dt_tube_name = case (findAttr (simple_name "name") tube_node) of
Nothing -> error "ERROR no tube name attribute in node"
Just x -> x
dt_gates = collect_gate_set tube_node

collect_tube_info :: Element -> [DivaTube]
collect_tube_info node = map parse_diva_tube nodes
where
nodes = findElements (simple_name "tube") node


find_duplicates :: (Ord a) => (Set.Set a) -> (Set.Set a) -> [a] -> Set.Set a
find_duplicates so_far dups [] = dups
find_duplicates so_far dups (x:xs) | Set.member x so_far = find_duplicates so_far (Set.insert x dups) xs
find_duplicates so_far dups (x:xs) | otherwise = find_duplicates (Set.insert x so_far) dups xs



load_diva_info :: String -> IO DivaInfo
load_diva_info filename = do
root <- load_root_node filename
let smap = specimen_mapping root
di_specimens = map fst $ Map.toList smap
di_specimen_tubes = Map.fromList [ (s, collect_tube_info e) | (s,e) <- Map.toList smap]
all_tubes = (map dt_tube_name . concat . map snd . Map.toList ) di_specimen_tubes
di_overlapping_tube_names = Set.toList $ find_duplicates Set.empty Set.empty all_tubes
di_has_overlap = length di_overlapping_tube_names > 0
di_tube_names = Set.toList . Set.fromList $ all_tubes
return DivaInfo {..}
4 changes: 2 additions & 2 deletions test/Spec.hs
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
main :: IO ()
main = putStrLn "Test suite not yet implemented"
{-# OPTIONS_GHC -F -pgmF hspec-discover #-}

0 comments on commit 9c3202b

Please sign in to comment.