-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Libary for parsing, processing and vizualization of taxonomy data
--   
--   Haskell cabal Taxonomy libary contains tools, parsers, datastructures
--   and visualisation for the NCBI (National Center for Biotechnology
--   Information) Taxonomy datasources.
--   
--   It can utilize information from the <a>Entrez</a> REST interface via
--   <a>EntrezHTTP</a>, as well as from the files of the Taxonomy database
--   <a>dump</a>.
--   
--   Input data is parsed into a FGL based datastructure, which enables a
--   wealth of processing steps like node distances, retrieval of parent
--   nodes or extraction of subtrees.
--   
--   Trees can be visualised via dot-format (<a>graphviz</a>)
--   
--   
--   or via json-format (<a>http://d3js.org/d3js</a>).
--   
--   The <a>TaxonomyTools</a> package contains tools based on this package.
@package Taxonomy
@version 1.0.2


-- | This module contains data structures for taxonomy data
module Bio.TaxonomyData

-- | SimpleTaxon only contains the most relevant fields of a taxonomy
--   entry. For all annotaded fields use the Taxon datatype and its
--   associated functions
data SimpleTaxon
SimpleTaxon :: Int -> Text -> Int -> Rank -> SimpleTaxon
[simpleTaxId] :: SimpleTaxon -> Int
[simpleScientificName] :: SimpleTaxon -> Text
[simpleParentTaxId] :: SimpleTaxon -> Int
[simpleRank] :: SimpleTaxon -> Rank

-- | Datastructure for tree comparisons
data CompareTaxon
CompareTaxon :: Text -> Rank -> [Int] -> CompareTaxon
[compareScientificName] :: CompareTaxon -> Text
[compareRank] :: CompareTaxon -> Rank
[inTree] :: CompareTaxon -> [Int]

-- | Data structure for Entrez taxonomy fetch result
data Taxon
Taxon :: Int -> String -> Int -> Rank -> String -> TaxGenCode -> TaxGenCode -> String -> [LineageTaxon] -> String -> String -> String -> Taxon
[taxonTaxId] :: Taxon -> Int
[taxonScientificName] :: Taxon -> String
[taxonParentTaxId] :: Taxon -> Int
[taxonRank] :: Taxon -> Rank
[division] :: Taxon -> String
[geneticCode] :: Taxon -> TaxGenCode
[mitoGeneticCode] :: Taxon -> TaxGenCode
[lineage] :: Taxon -> String
[lineageEx] :: Taxon -> [LineageTaxon]
[createDate] :: Taxon -> String
[updateDate] :: Taxon -> String
[pubDate] :: Taxon -> String
data TaxonName
TaxonName :: String -> String -> TaxonName
[classCDE] :: TaxonName -> String
[dispName] :: TaxonName -> String

-- | Lineage Taxons denote all parent Taxonomy nodes of a node retrieved by
--   Entrez fetch
data LineageTaxon
LineageTaxon :: Int -> String -> Rank -> LineageTaxon
[lineageTaxId] :: LineageTaxon -> Int
[lineageScienticName] :: LineageTaxon -> String
[lineageRank] :: LineageTaxon -> Rank

-- | NCBI Taxonomy database dump hierachichal data structure as defined in
--   <a>ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump_readme.txt</a>
data NCBITaxDump
NCBITaxDump :: [TaxCitation] -> [TaxDelNode] -> [TaxDivision] -> [TaxGenCode] -> [TaxMergedNode] -> [TaxName] -> [TaxNode] -> NCBITaxDump
[taxCitations] :: NCBITaxDump -> [TaxCitation]
[taxDelNodes] :: NCBITaxDump -> [TaxDelNode]
[taxDivisions] :: NCBITaxDump -> [TaxDivision]
[taxGenCodes] :: NCBITaxDump -> [TaxGenCode]
[taxMergedNodes] :: NCBITaxDump -> [TaxMergedNode]
[taxNames] :: NCBITaxDump -> [TaxName]
[taxNodes] :: NCBITaxDump -> [TaxNode]

-- | Datastructure for entries of Taxonomy database dump citations file
data TaxCitation
TaxCitation :: Int -> Maybe String -> Maybe Int -> Maybe Int -> Maybe String -> Maybe String -> Maybe [Int] -> TaxCitation
[citId] :: TaxCitation -> Int
[citKey] :: TaxCitation -> Maybe String
[pubmedId] :: TaxCitation -> Maybe Int
[medlineId] :: TaxCitation -> Maybe Int
[url] :: TaxCitation -> Maybe String
[text] :: TaxCitation -> Maybe String
[taxIdList] :: TaxCitation -> Maybe [Int]

-- | Datastructure for entries of Taxonomy database dump deleted nodes file
data TaxDelNode
TaxDelNode :: Int -> TaxDelNode
[delTaxId] :: TaxDelNode -> Int

-- | Datastructure for entries of Taxonomy database dump division file
data TaxDivision
TaxDivision :: Int -> String -> String -> Maybe String -> TaxDivision
[divisionId] :: TaxDivision -> Int
[divisionCDE] :: TaxDivision -> String
[divisonName] :: TaxDivision -> String
[divisionComments] :: TaxDivision -> Maybe String

-- | Datastructure for entries of Taxonomy database dump gencode file
data TaxGenCode
TaxGenCode :: Int -> Maybe String -> String -> String -> String -> TaxGenCode
[geneticCodeId] :: TaxGenCode -> Int
[abbreviation] :: TaxGenCode -> Maybe String
[geneCodeName] :: TaxGenCode -> String
[cde] :: TaxGenCode -> String
[starts] :: TaxGenCode -> String

-- | Datastructure for entries of Taxonomy database dump mergednodes file
data TaxMergedNode
TaxMergedNode :: Int -> Int -> TaxMergedNode
[oldTaxId] :: TaxMergedNode -> Int
[newTaxId] :: TaxMergedNode -> Int

-- | Datastructure for entries of Taxonomy database dump names file
data TaxName
TaxName :: Int -> Text -> Text -> Text -> TaxName
[nameTaxId] :: TaxName -> Int
[nameTxt] :: TaxName -> Text
[uniqueName] :: TaxName -> Text
[nameClass] :: TaxName -> Text

-- | Taxonomic ranks: NCBI uses the uncommon Speciessubgroup
data Rank
Norank :: Rank
Form :: Rank
Variety :: Rank
Infraspecies :: Rank
Subspecies :: Rank
Speciessubgroup :: Rank
Species :: Rank
Speciesgroup :: Rank
Superspecies :: Rank
Series :: Rank
Section :: Rank
Subgenus :: Rank
Genus :: Rank
Subtribe :: Rank
Tribe :: Rank
Supertribe :: Rank
Subfamily :: Rank
Family :: Rank
Superfamily :: Rank
Parvorder :: Rank
Infraorder :: Rank
Suborder :: Rank
Order :: Rank
Superorder :: Rank
Magnorder :: Rank
Cohort :: Rank
Legion :: Rank
Parvclass :: Rank
Infraclass :: Rank
Subclass :: Rank
Class :: Rank
Superclass :: Rank
Microphylum :: Rank
Infraphylum :: Rank
Subphylum :: Rank
Phylum :: Rank
Superphylum :: Rank
Infrakingdom :: Rank
Subkingdom :: Rank
Kingdom :: Rank
Superkingdom :: Rank
Domain :: Rank
readsRank :: String -> [(Rank, String)]

-- | Datastructure for entries of Taxonomy database dump nodes file
data TaxNode
TaxNode :: Int -> Int -> Rank -> Maybe String -> String -> Bool -> String -> Bool -> String -> Bool -> Bool -> Bool -> Maybe String -> TaxNode
[taxId] :: TaxNode -> Int
[parentTaxId] :: TaxNode -> Int
[rank] :: TaxNode -> Rank
[emblCode] :: TaxNode -> Maybe String
[nodeDivisionId] :: TaxNode -> String
[inheritedDivFlag] :: TaxNode -> Bool
[nodeGeneticCodeId] :: TaxNode -> String
[inheritedGCFlag] :: TaxNode -> Bool
[mitochondrialGeneticCodeId] :: TaxNode -> String
[inheritedMGCFlag] :: TaxNode -> Bool
[genBankHiddenFlag] :: TaxNode -> Bool
[hiddenSubtreeRootFlag] :: TaxNode -> Bool
[nodeComments] :: TaxNode -> Maybe String

-- | Simple Gene2Accession table
data SimpleGene2Accession
SimpleGene2Accession :: Int -> String -> SimpleGene2Accession
[simpleTaxIdEntry] :: SimpleGene2Accession -> Int
[simpleGenomicNucleotideAccessionVersion] :: SimpleGene2Accession -> String

-- | Datastructure for Gene2Accession table
data Gene2Accession
Gene2Accession :: Int -> Int -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> String -> Gene2Accession
[taxIdEntry] :: Gene2Accession -> Int
[geneID] :: Gene2Accession -> Int
[status] :: Gene2Accession -> String
[rnaNucleotideAccessionVersion] :: Gene2Accession -> String
[rnaNucleotideGi] :: Gene2Accession -> String
[proteinAccessionVersion] :: Gene2Accession -> String
[proteinGi] :: Gene2Accession -> String
[genomicNucleotideAccessionVersion] :: Gene2Accession -> String
[genomicNucleotideGi] :: Gene2Accession -> String
[startPositionOnTheGenomicAccession] :: Gene2Accession -> String
[endPositionOnTheGenomicAccession] :: Gene2Accession -> String
[orientation] :: Gene2Accession -> String
[assembly] :: Gene2Accession -> String
[maturePeptideAccessionVersion] :: Gene2Accession -> String
[maturePeptideGi] :: Gene2Accession -> String
simpleTaxonJSONValue :: Gr SimpleTaxon Double -> Node -> Value
instance GHC.Read.Read Bio.TaxonomyData.Gene2Accession
instance GHC.Classes.Eq Bio.TaxonomyData.Gene2Accession
instance GHC.Show.Show Bio.TaxonomyData.Gene2Accession
instance GHC.Read.Read Bio.TaxonomyData.SimpleGene2Accession
instance GHC.Classes.Eq Bio.TaxonomyData.SimpleGene2Accession
instance GHC.Show.Show Bio.TaxonomyData.SimpleGene2Accession
instance GHC.Classes.Eq Bio.TaxonomyData.NCBITaxDump
instance GHC.Read.Read Bio.TaxonomyData.NCBITaxDump
instance GHC.Show.Show Bio.TaxonomyData.NCBITaxDump
instance GHC.Classes.Eq Bio.TaxonomyData.TaxNode
instance GHC.Read.Read Bio.TaxonomyData.TaxNode
instance GHC.Show.Show Bio.TaxonomyData.TaxNode
instance GHC.Classes.Eq Bio.TaxonomyData.SimpleTaxon
instance GHC.Read.Read Bio.TaxonomyData.SimpleTaxon
instance GHC.Show.Show Bio.TaxonomyData.SimpleTaxon
instance GHC.Classes.Eq Bio.TaxonomyData.CompareTaxon
instance GHC.Read.Read Bio.TaxonomyData.CompareTaxon
instance GHC.Show.Show Bio.TaxonomyData.CompareTaxon
instance GHC.Classes.Eq Bio.TaxonomyData.Taxon
instance GHC.Show.Show Bio.TaxonomyData.Taxon
instance GHC.Classes.Eq Bio.TaxonomyData.LineageTaxon
instance GHC.Show.Show Bio.TaxonomyData.LineageTaxon
instance GHC.Enum.Enum Bio.TaxonomyData.Rank
instance GHC.Enum.Bounded Bio.TaxonomyData.Rank
instance GHC.Show.Show Bio.TaxonomyData.Rank
instance GHC.Classes.Ord Bio.TaxonomyData.Rank
instance GHC.Classes.Eq Bio.TaxonomyData.Rank
instance GHC.Classes.Eq Bio.TaxonomyData.TaxName
instance GHC.Read.Read Bio.TaxonomyData.TaxName
instance GHC.Show.Show Bio.TaxonomyData.TaxName
instance GHC.Classes.Eq Bio.TaxonomyData.TaxMergedNode
instance GHC.Read.Read Bio.TaxonomyData.TaxMergedNode
instance GHC.Show.Show Bio.TaxonomyData.TaxMergedNode
instance GHC.Classes.Eq Bio.TaxonomyData.TaxGenCode
instance GHC.Read.Read Bio.TaxonomyData.TaxGenCode
instance GHC.Show.Show Bio.TaxonomyData.TaxGenCode
instance GHC.Classes.Eq Bio.TaxonomyData.TaxDivision
instance GHC.Read.Read Bio.TaxonomyData.TaxDivision
instance GHC.Show.Show Bio.TaxonomyData.TaxDivision
instance GHC.Classes.Eq Bio.TaxonomyData.TaxDelNode
instance GHC.Read.Read Bio.TaxonomyData.TaxDelNode
instance GHC.Show.Show Bio.TaxonomyData.TaxDelNode
instance GHC.Classes.Eq Bio.TaxonomyData.TaxCitation
instance GHC.Read.Read Bio.TaxonomyData.TaxCitation
instance GHC.Show.Show Bio.TaxonomyData.TaxCitation
instance GHC.Classes.Eq Bio.TaxonomyData.TaxonName
instance GHC.Show.Show Bio.TaxonomyData.TaxonName
instance GHC.Read.Read Bio.TaxonomyData.Rank
instance Data.Aeson.Types.ToJSON.ToJSON (Data.Graph.Inductive.PatriciaTree.Gr Bio.TaxonomyData.SimpleTaxon GHC.Types.Double)


-- | Functions for parsing, processing and visualization of taxonomy data.
--   
--   <h3>Usage example:</h3>
--   
--   <ul>
--   <li>Read in taxonomy data<pre>eitherTaxtree &lt;- readNamedTaxonomy
--   "/path/to/NCBI_taxonomydump_directory"</pre></li>
--   </ul>
--   
--   <ul>
--   <li>Process data<pre>let subtree = extractTaxonomySubTreebyLevel [562]
--   (fromRight eitherTaxTree) (Just 4)</pre></li>
--   <li>Visualize resulttput "<i>path</i>to/dotdirectory" subtree</li>
--   </ul>
module Bio.Taxonomy

-- | NCBI taxonomy dump nodes and names in the input directory path are
--   parsed and a SimpleTaxon tree is generated.
readTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))

-- | NCBI taxonomy dump nodes and names in the input directory path are
--   parsed and a SimpleTaxon tree with scientific names for each node is
--   generated.
readNamedTaxonomy :: String -> IO (Either ParseError (Gr SimpleTaxon Double))

-- | NCBI taxonomy dump nodes and names in the input directory path are
--   parsed and a SimpleTaxon tree is generated.
parseTaxonomy :: String -> Either ParseError (Gr SimpleTaxon Double)

-- | parse NCBITaxCitations from input string
parseNCBITaxCitations :: String -> Either ParseError [TaxCitation]

-- | parse NCBITaxCitations from input filePath
readNCBITaxCitations :: String -> IO (Either ParseError [TaxCitation])

-- | parse NCBITaxDelNodes from input string
parseNCBITaxDelNodes :: String -> Either ParseError [TaxDelNode]

-- | parse NCBITaxDelNodes from input filePath
readNCBITaxDelNodes :: String -> IO (Either ParseError [TaxDelNode])

-- | parse NCBITaxDivisons from input string
parseNCBITaxDivisions :: String -> Either ParseError [TaxDivision]

-- | parse NCBITaxDivisons from input filePath
readNCBITaxDivisions :: String -> IO (Either ParseError [TaxDivision])

-- | parse NCBITaxGenCodes from input string
parseNCBITaxGenCodes :: String -> Either ParseError [TaxGenCode]

-- | parse NCBITaxGenCodes from input filePath
readNCBITaxGenCodes :: String -> IO (Either ParseError [TaxGenCode])

-- | parse NCBITaxMergedNodes from input string
parseNCBITaxMergedNodes :: String -> Either ParseError [TaxMergedNode]

-- | parse NCBITaxMergedNodes from input filePath
readNCBITaxMergedNodes :: String -> IO (Either ParseError [TaxMergedNode])

-- | parse NCBITaxNames from input string
parseNCBITaxNames :: String -> Either ParseError [TaxName]

-- | parse NCBITaxNames from input filePath
readNCBITaxNames :: String -> IO (Either ParseError [TaxName])

-- | parse NCBITaxNames from input string
parseNCBITaxNodes :: String -> Either ParseError TaxNode

-- | parse NCBITaxCitations from input filePath
readNCBITaxNodes :: String -> IO (Either ParseError [TaxNode])

-- | parse NCBISimpleTaxNames from input string
parseNCBISimpleTaxons :: String -> Either ParseError SimpleTaxon

-- | parse NCBITaxCitations from input filePath
readNCBISimpleTaxons :: String -> IO (Either ParseError [SimpleTaxon])

-- | Parse the input as NCBITax datatype
readNCBITaxonomyDatabase :: String -> IO (Either [String] NCBITaxDump)

-- | Extract a subtree correpsonding to input node paths to root. Only
--   nodes in level number distance to root are included. Used in
--   Ids2TreeCompare tool.
compareSubTrees :: [Gr SimpleTaxon Double] -> (Int, Gr CompareTaxon Double)

-- | Extract a subtree corresponding to input node paths to root. Only
--   nodes in level number distance to root are included. Used in Ids2Tree
--   tool.
extractTaxonomySubTreebyLevel :: [Node] -> Gr SimpleTaxon Double -> Maybe Int -> Gr SimpleTaxon Double

-- | Extract a subtree corresponding to input node paths to root. Only
--   nodes in level number distance to root are included. Used in Ids2Tree
--   tool.
extractTaxonomySubTreebyLevelNew :: [Node] -> Gr SimpleTaxon Double -> Maybe Int -> Gr SimpleTaxon Double

-- | Extract a subtree corresponding to input node paths to root. If a Rank
--   is provided, all node that are less or equal are omitted
extractTaxonomySubTreebyRank :: [Node] -> Gr SimpleTaxon Double -> Maybe Rank -> Gr SimpleTaxon Double

-- | Returns path between 2 maybe nodes. Used in TreeDistance tool.
safeNodePath :: Maybe Node -> Gr SimpleTaxon Double -> Maybe Node -> Either String [Node]

-- | Extract parent node with specified Rank
getParentbyRank :: Node -> Gr SimpleTaxon Double -> Maybe Rank -> Maybe (Node, SimpleTaxon)

-- | Draw tree comparison graph in dot format. Used in Ids2TreeCompare
--   tool.
drawTaxonomyComparison :: Bool -> (Int, Gr CompareTaxon Double) -> String

-- | Draw graph in dot format. Used in Ids2Tree tool.
drawTaxonomy :: Bool -> Gr SimpleTaxon Double -> String

-- | Write tree representation either as dot or json to provided file path
writeTree :: String -> String -> Bool -> Gr SimpleTaxon Double -> IO ()

-- | Write tree representation as dot to provided file path. Graphviz tools
--   like dot can be applied to the written .dot file to generate e.g.
--   svg-format images.
writeDotTree :: String -> Bool -> Gr SimpleTaxon Double -> IO ()

-- | Write tree representation as json to provided file path. You can
--   visualize the result for example with 3Djs.
writeJsonTree :: String -> Gr SimpleTaxon Double -> IO ()
