aboutsummaryrefslogtreecommitdiffhomepage
path: root/scaffolder/src/Text/Edifact/Fetcher/Messages.hs
diff options
context:
space:
mode:
Diffstat (limited to 'scaffolder/src/Text/Edifact/Fetcher/Messages.hs')
-rw-r--r--scaffolder/src/Text/Edifact/Fetcher/Messages.hs84
1 files changed, 84 insertions, 0 deletions
diff --git a/scaffolder/src/Text/Edifact/Fetcher/Messages.hs b/scaffolder/src/Text/Edifact/Fetcher/Messages.hs
new file mode 100644
index 0000000..9daf98a
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/Fetcher/Messages.hs
@@ -0,0 +1,84 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Fetcher.Messages
4 ( fetchMessages
5 , messagesDirectory
6 ) where
7
8import Text.Edifact.Fetcher.Commons
9import Text.Edifact.Fetcher.Configuration
10import Text.Edifact.Scaffolder.Commons
11
12import Text.Edifact.Scaffolder.Messages.Specification (listSegments)
13
14import Data.Foldable (toList)
15import Data.List as L (nub, sort)
16import Data.Maybe (mapMaybe)
17import Data.Text as T (map)
18import Data.Text.Encoding as TE (decodeLatin1,
19 decodeUtf8)
20import Formatting
21import Text.Pandoc as Pandoc hiding (Format,
22 getOutputFile)
23import Text.Pandoc.Walk (query)
24import Text.Parsec (parse)
25
26messagesDirectory :: FilePath
27messagesDirectory = "messages"
28
29parseMessageCode :: Text -> Maybe MessageCode
30parseMessageCode =
31 let toMaybe (Right v) = Just v
32 toMaybe _ = Nothing
33 in toMaybe . parse messageCodeParser ""
34
35scanInlineForMessageCode :: Inline -> Maybe MessageCode
36scanInlineForMessageCode (Str label) = parseMessageCode label
37scanInlineForMessageCode _ = Nothing
38
39-- The trick here is to reverse the usage of UNH which is mandatory on every single message
40listAllMessages :: Fetcher [MessageCode]
41listAllMessages =
42 let filterLink (Link _ inlines _) = mapMaybe scanInlineForMessageCode inlines
43 filterLink _ = []
44 extractMessageCodes = query filterLink
45 loadUNHUsages = readHtmlFromURL TE.decodeUtf8 =<< getUrl ("/trsd/cseg" % fSegmentCodeLower % ".htm") "UNH"
46 in extractMessageCodes <$> loadUNHUsages
47
48listMessages :: Fetcher [MessageCode]
49listMessages = getSelectedMessages >>= maybe listAllMessages (pure . toList)
50
51fetchMessages :: Fetcher [SegmentCode]
52fetchMessages = listMessages >>= fmap (L.nub . L.sort . concat) . traverse fetchMessage
53
54fetchMessage :: MessageCode -> Fetcher [SegmentCode]
55fetchMessage code = do
56 retry 3 (fetchMessageDefinition code)
57 retry 3 (fetchMessageSummary code)
58
59fetchMessageDefinition :: MessageCode -> Fetcher ()
60fetchMessageDefinition code = do
61 say ("Fetching message " % fMessageCode % " definition") code
62 url <- getUrl ("/trmd/" % fMessageCodeLower % "_d.htm") code
63 outputFile <- getOutputFile (fMessageCodeLower % "_d.txt") messagesDirectory code
64 htmlToFile url TE.decodeUtf8 outputFile
65
66fetchMessageSummary :: MessageCode -> Fetcher [SegmentCode]
67fetchMessageSummary code = do
68 say ("Fetching message " % fMessageCode % " summary") code
69 url <- getUrl ("/trmd/" % fMessageCodeLower % "_s.htm") code
70 outputFile <- getOutputFile (fMessageCodeLower % "_s.txt") messagesDirectory code
71 let decoder = cleanupAsciiArt . TE.decodeLatin1
72 htmlToFileWithParser url decoder outputFile listSegments
73
74-- This might not be the proper way to do it...
75-- Use Data.Text.Encoding.decodeUtf8With instead?
76cleanupAsciiArt :: Text -> Text
77cleanupAsciiArt =
78 let f 'Ä' = '-'
79 f '¿' = '+'
80 f '³' = '|'
81 f 'Ù' = '+'
82 f 'Á' = '+'
83 f c = c
84 in T.map f