aboutsummaryrefslogtreecommitdiffhomepage
path: root/scaffolder/src/Text/Edifact/Fetcher
diff options
context:
space:
mode:
Diffstat (limited to 'scaffolder/src/Text/Edifact/Fetcher')
-rw-r--r--scaffolder/src/Text/Edifact/Fetcher/Commons.hs87
-rw-r--r--scaffolder/src/Text/Edifact/Fetcher/Composites.hs31
-rw-r--r--scaffolder/src/Text/Edifact/Fetcher/Configuration.hs43
-rw-r--r--scaffolder/src/Text/Edifact/Fetcher/Messages.hs84
-rw-r--r--scaffolder/src/Text/Edifact/Fetcher/Segments.hs41
-rw-r--r--scaffolder/src/Text/Edifact/Fetcher/Simples.hs27
6 files changed, 313 insertions, 0 deletions
diff --git a/scaffolder/src/Text/Edifact/Fetcher/Commons.hs b/scaffolder/src/Text/Edifact/Fetcher/Commons.hs
new file mode 100644
index 0000000..1a6a058
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/Fetcher/Commons.hs
@@ -0,0 +1,87 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Fetcher.Commons where
4
5import Text.Edifact.Fetcher.Configuration
6import Text.Edifact.Scaffolder.Commons
7
8import Control.Monad ((>=>))
9import Control.Monad.Error.Class (MonadError, catchError)
10import Control.Monad.IO.Class (MonadIO)
11import Control.Monad.Reader (ReaderT, runReaderT)
12import Control.Monad.Trans.Class (lift)
13import Data.ByteString (ByteString)
14import Data.Char (toLower)
15import Data.Text as T (unpack, pack)
16import Data.Text.IO as TIO (readFile, writeFile)
17import Formatting
18import System.Directory (doesFileExist)
19import Text.Pandoc as Pandoc hiding (Format,
20 getOutputFile)
21import Text.Parsec (Parsec, runParser)
22
23type Fetcher = ReaderT FetchingEnv PandocIO
24
25runFetcher :: Fetcher () -> FetchingEnv -> IO ()
26runFetcher f = Pandoc.runIOorExplode . runReaderT f
27
28getOutputFile :: Format String (a -> String) -> FilePath -> a -> Fetcher FilePath
29getOutputFile f d c = do
30 home <- getSpecificationHome
31 pure (formatToString (string % "/" % string % "/" % f) home d c)
32
33getUrl :: Format String (a -> String) -> a -> Fetcher String
34getUrl f c = do
35 rev <- getTargetRevision
36 pure (formatToString ("https://service.unece.org/trade/untdid/" % fRevisionLower % f) rev c)
37
38getSpecificationHome :: Fetcher FilePath
39getSpecificationHome = do
40 home <- getHome
41 rev <- getTargetRevision
42 pure (home </> formatToString fRevision rev)
43
44htmlToFile :: String -> (ByteString -> Text) -> FilePath -> Fetcher ()
45htmlToFile url decoder outputFile = () <$ tryCacheOrHtml decoder url outputFile
46
47htmlToFileWithParser :: (Monoid a, Monoid u) => String -> (ByteString -> Text) -> FilePath -> Parsec String u a -> Fetcher a
48htmlToFileWithParser url decoder outputFile parser = do
49 specification <- tryCacheOrHtml decoder url outputFile
50 either (error . show) pure
51 (runParser parser mempty "" (T.unpack specification))
52
53tryCacheOrHtml :: (ByteString -> Text) -> String -> FilePath -> Fetcher Text
54tryCacheOrHtml decoder url path = do
55 fileExists' <- liftIO $ doesFileExist path
56 content <- if fileExists'
57 then liftIO $ TIO.readFile path
58 else readHtmlFromURL decoder url >>= writePlain def
59 content <$ liftIO (TIO.writeFile path content)
60
61readHtmlFromURL :: (ByteString -> Text) -> String -> Fetcher Pandoc
62readHtmlFromURL decoder = lift . (openURL >=> readHtml def . decoder . fst) . pack
63
64lower :: Format r (String -> r)
65lower = mapf (fmap toLower) string
66
67fRevision :: Format r (Revision -> r)
68fRevision = mapf getRevision string
69
70fRevisionLower :: Format r (Revision -> r)
71fRevisionLower = mapf getRevision lower
72
73fMessageCodeLower :: Format r (MessageCode -> r)
74fMessageCodeLower = mapf getMessageCode lower
75
76fSegmentCodeLower :: Format r (SegmentCode -> r)
77fSegmentCodeLower = mapf getSegmentCode lower
78
79fCompositeCodeLower :: Format r (CompositeCode -> r)
80fCompositeCodeLower = mapf getCompositeCode lower
81
82fSimpleCodeLower :: Format r (SimpleCode -> r)
83fSimpleCodeLower = mapf getSimpleCode lower
84
85retry :: (MonadIO m, MonadError b m) => Int -> m a -> m a
86retry n f | n > 1 = f `catchError` const (say "Retrying" >> retry (n-1) f)
87 | otherwise = f
diff --git a/scaffolder/src/Text/Edifact/Fetcher/Composites.hs b/scaffolder/src/Text/Edifact/Fetcher/Composites.hs
new file mode 100644
index 0000000..8f94cc9
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/Fetcher/Composites.hs
@@ -0,0 +1,31 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Fetcher.Composites
4 ( fetchComposites
5 , compositesDirectory
6 ) where
7
8import Text.Edifact.Fetcher.Commons
9import Text.Edifact.Scaffolder.Commons
10
11import Text.Edifact.Scaffolder.Composites.Specification (listSimples)
12
13import Data.List as L (nub,
14 sort)
15import Data.Text.Encoding as TE (decodeUtf8)
16import Formatting
17
18compositesDirectory :: FilePath
19compositesDirectory = "composites"
20
21fetchComposites :: ([CompositeCode], [SimpleCode]) -> Fetcher [SimpleCode]
22fetchComposites (composites, segments) =
23 let compactSimpleCodes = L.nub . L.sort . mappend segments . concat
24 in compactSimpleCodes <$> traverse (retry 3 . fetchComposite) composites
25
26fetchComposite :: CompositeCode -> Fetcher [SimpleCode]
27fetchComposite code = do
28 say ("Fetching composite " % fCompositeCode) code
29 url <- getUrl ("/trcd/trcd" % fCompositeCodeLower % ".htm") code
30 outputFile <- getOutputFile (fCompositeCodeLower % ".txt") compositesDirectory code
31 htmlToFileWithParser url TE.decodeUtf8 outputFile (snd <$> listSimples)
diff --git a/scaffolder/src/Text/Edifact/Fetcher/Configuration.hs b/scaffolder/src/Text/Edifact/Fetcher/Configuration.hs
new file mode 100644
index 0000000..a074641
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/Fetcher/Configuration.hs
@@ -0,0 +1,43 @@
1{-# LANGUAGE FlexibleContexts #-}
2{-# LANGUAGE OverloadedStrings #-}
3
4module Text.Edifact.Fetcher.Configuration
5 ( -- * Fetching environment
6 FetchingEnv(..)
7 -- * Parsing helpers
8 , readSelectMessages
9 -- * Shortcuts for reading the environment
10 , getTargetRevision
11 , getHome
12 , getSelectedMessages
13 ) where
14
15import Text.Edifact.Scaffolder.Commons (MessageCode, Revision,
16 messageCodeParser)
17
18import Control.Monad.Reader.Class (MonadReader, asks)
19import Data.List.NonEmpty (NonEmpty, nonEmpty)
20import Text.Parsec (char, parse, sepBy1)
21
22data FetchingEnv = FetchingEnv { fetchingRevision :: Revision
23 , specificationHome :: FilePath
24 , selectedMessages :: Maybe (NonEmpty MessageCode)
25 }
26
27getTargetRevision :: MonadReader FetchingEnv m => m Revision
28getTargetRevision = asks fetchingRevision
29
30getHome :: MonadReader FetchingEnv m => m FilePath
31getHome = asks specificationHome
32
33getSelectedMessages :: MonadReader FetchingEnv m => m (Maybe (NonEmpty MessageCode))
34getSelectedMessages = asks selectedMessages
35
36readSelectMessages :: Maybe String -> Maybe (NonEmpty MessageCode)
37readSelectMessages value =
38 let tryParse p s = toMaybe . parse p s
39 toMaybe (Right v) = Just v
40 toMaybe _ = Nothing
41 messageCodesParser = messageCodeParser `sepBy1` comma
42 comma = char ','
43 in value >>= tryParse messageCodesParser "" >>= nonEmpty
diff --git a/scaffolder/src/Text/Edifact/Fetcher/Messages.hs b/scaffolder/src/Text/Edifact/Fetcher/Messages.hs
new file mode 100644
index 0000000..9daf98a
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/Fetcher/Messages.hs
@@ -0,0 +1,84 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Fetcher.Messages
4 ( fetchMessages
5 , messagesDirectory
6 ) where
7
8import Text.Edifact.Fetcher.Commons
9import Text.Edifact.Fetcher.Configuration
10import Text.Edifact.Scaffolder.Commons
11
12import Text.Edifact.Scaffolder.Messages.Specification (listSegments)
13
14import Data.Foldable (toList)
15import Data.List as L (nub, sort)
16import Data.Maybe (mapMaybe)
17import Data.Text as T (map)
18import Data.Text.Encoding as TE (decodeLatin1,
19 decodeUtf8)
20import Formatting
21import Text.Pandoc as Pandoc hiding (Format,
22 getOutputFile)
23import Text.Pandoc.Walk (query)
24import Text.Parsec (parse)
25
26messagesDirectory :: FilePath
27messagesDirectory = "messages"
28
29parseMessageCode :: Text -> Maybe MessageCode
30parseMessageCode =
31 let toMaybe (Right v) = Just v
32 toMaybe _ = Nothing
33 in toMaybe . parse messageCodeParser ""
34
35scanInlineForMessageCode :: Inline -> Maybe MessageCode
36scanInlineForMessageCode (Str label) = parseMessageCode label
37scanInlineForMessageCode _ = Nothing
38
39-- The trick here is to reverse the usage of UNH which is mandatory on every single message
40listAllMessages :: Fetcher [MessageCode]
41listAllMessages =
42 let filterLink (Link _ inlines _) = mapMaybe scanInlineForMessageCode inlines
43 filterLink _ = []
44 extractMessageCodes = query filterLink
45 loadUNHUsages = readHtmlFromURL TE.decodeUtf8 =<< getUrl ("/trsd/cseg" % fSegmentCodeLower % ".htm") "UNH"
46 in extractMessageCodes <$> loadUNHUsages
47
48listMessages :: Fetcher [MessageCode]
49listMessages = getSelectedMessages >>= maybe listAllMessages (pure . toList)
50
51fetchMessages :: Fetcher [SegmentCode]
52fetchMessages = listMessages >>= fmap (L.nub . L.sort . concat) . traverse fetchMessage
53
54fetchMessage :: MessageCode -> Fetcher [SegmentCode]
55fetchMessage code = do
56 retry 3 (fetchMessageDefinition code)
57 retry 3 (fetchMessageSummary code)
58
59fetchMessageDefinition :: MessageCode -> Fetcher ()
60fetchMessageDefinition code = do
61 say ("Fetching message " % fMessageCode % " definition") code
62 url <- getUrl ("/trmd/" % fMessageCodeLower % "_d.htm") code
63 outputFile <- getOutputFile (fMessageCodeLower % "_d.txt") messagesDirectory code
64 htmlToFile url TE.decodeUtf8 outputFile
65
66fetchMessageSummary :: MessageCode -> Fetcher [SegmentCode]
67fetchMessageSummary code = do
68 say ("Fetching message " % fMessageCode % " summary") code
69 url <- getUrl ("/trmd/" % fMessageCodeLower % "_s.htm") code
70 outputFile <- getOutputFile (fMessageCodeLower % "_s.txt") messagesDirectory code
71 let decoder = cleanupAsciiArt . TE.decodeLatin1
72 htmlToFileWithParser url decoder outputFile listSegments
73
74-- This might not be the proper way to do it...
75-- Use Data.Text.Encoding.decodeUtf8With instead?
76cleanupAsciiArt :: Text -> Text
77cleanupAsciiArt =
78 let f 'Ä' = '-'
79 f '¿' = '+'
80 f '³' = '|'
81 f 'Ù' = '+'
82 f 'Á' = '+'
83 f c = c
84 in T.map f
diff --git a/scaffolder/src/Text/Edifact/Fetcher/Segments.hs b/scaffolder/src/Text/Edifact/Fetcher/Segments.hs
new file mode 100644
index 0000000..dda1d88
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/Fetcher/Segments.hs
@@ -0,0 +1,41 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Fetcher.Segments
4 ( fetchSegments
5 , segmentsDirectory
6 ) where
7
8import Text.Edifact.Fetcher.Commons
9import Text.Edifact.Scaffolder.Commons
10
11import Text.Edifact.Scaffolder.Segments.Specification (listCompositesAndSimples)
12import Text.Edifact.Scaffolder.Segments.Types (Element,
13 getComposite,
14 getSimple)
15
16import Data.Bifunctor (bimap)
17import Data.List as L (nub,
18 partition,
19 sort)
20import Data.Maybe (isJust,
21 mapMaybe)
22import Data.Text.Encoding as TE (decodeUtf8)
23import Formatting
24
25segmentsDirectory :: FilePath
26segmentsDirectory = "segments"
27
28fetchSegments :: [SegmentCode] -> Fetcher ([CompositeCode], [SimpleCode])
29fetchSegments = fmap (partitionElements . L.nub . L.sort . concat) . traverse (retry 3 . fetchSegment)
30
31partitionElements :: [Element] -> ([CompositeCode], [SimpleCode])
32partitionElements =
33 let isComposite = isJust . getComposite
34 in bimap (mapMaybe getComposite) (mapMaybe getSimple) . partition isComposite
35
36fetchSegment :: SegmentCode -> Fetcher [Element]
37fetchSegment code = do
38 say ("Fetching segment " % fSegmentCode) code
39 url <- getUrl ("/trsd/trsd" % fSegmentCodeLower % ".htm") code
40 outputFile <- getOutputFile (fSegmentCodeLower % ".txt") segmentsDirectory code
41 htmlToFileWithParser url TE.decodeUtf8 outputFile (snd <$> listCompositesAndSimples)
diff --git a/scaffolder/src/Text/Edifact/Fetcher/Simples.hs b/scaffolder/src/Text/Edifact/Fetcher/Simples.hs
new file mode 100644
index 0000000..47951ad
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/Fetcher/Simples.hs
@@ -0,0 +1,27 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Fetcher.Simples
4 ( fetchSimples
5 , simplesDirectory
6 ) where
7
8import Text.Edifact.Fetcher.Commons
9import Text.Edifact.Scaffolder.Commons
10
11
12import Data.Foldable (traverse_)
13import Data.Text.Encoding as TE (decodeUtf8)
14import Formatting
15
16simplesDirectory :: FilePath
17simplesDirectory = "simples"
18
19fetchSimples :: [SimpleCode] -> Fetcher ()
20fetchSimples = traverse_ (retry 3 . fetchSimple)
21
22fetchSimple :: SimpleCode -> Fetcher ()
23fetchSimple code = do
24 say ("Fetching simple " % fSimpleCode) code
25 url <- getUrl ("/uncl/uncl" % fSimpleCodeLower % ".htm") code
26 outputFile <- getOutputFile (fSimpleCodeLower % ".txt") simplesDirectory code
27 htmlToFile url TE.decodeUtf8 outputFile