1 {-# LANGUAGE OverloadedStrings #-}
3 module Text.Edifact.Fetcher.Messages
8 import Text.Edifact.Fetcher.Commons
9 import Text.Edifact.Fetcher.Configuration
10 import Text.Edifact.Scaffolder.Commons
12 import Text.Edifact.Scaffolder.Messages.Specification (listSegments)
14 import Data.Foldable (toList)
15 import Data.List as L (nub, sort)
16 import Data.Maybe (mapMaybe)
17 import Data.Text as T (map)
18 import Data.Text.Encoding as TE (decodeLatin1,
21 import Text.Pandoc as Pandoc hiding (Format,
23 import Text.Pandoc.Walk (query)
24 import Text.Parsec (parse)
26 messagesDirectory :: FilePath
27 messagesDirectory = "messages"
29 parseMessageCode :: Text -> Maybe MessageCode
31 let toMaybe (Right v) = Just v
33 in toMaybe . parse messageCodeParser ""
35 scanInlineForMessageCode :: Inline -> Maybe MessageCode
36 scanInlineForMessageCode (Str label) = parseMessageCode label
37 scanInlineForMessageCode _ = Nothing
39 -- The trick here is to reverse the usage of UNH which is mandatory on every single message
40 listAllMessages :: Fetcher [MessageCode]
42 let filterLink (Link _ inlines _) = mapMaybe scanInlineForMessageCode inlines
44 extractMessageCodes = query filterLink
45 loadUNHUsages = readHtmlFromURL TE.decodeUtf8 =<< getUrl ("/trsd/cseg" % fSegmentCodeLower % ".htm") "UNH"
46 in extractMessageCodes <$> loadUNHUsages
48 listMessages :: Fetcher [MessageCode]
49 listMessages = getSelectedMessages >>= maybe listAllMessages (pure . toList)
51 fetchMessages :: Fetcher [SegmentCode]
52 fetchMessages = listMessages >>= fmap (L.nub . L.sort . concat) . traverse fetchMessage
54 fetchMessage :: MessageCode -> Fetcher [SegmentCode]
55 fetchMessage code = do
56 retry 3 (fetchMessageDefinition code)
57 retry 3 (fetchMessageSummary code)
59 fetchMessageDefinition :: MessageCode -> Fetcher ()
60 fetchMessageDefinition code = do
61 say ("Fetching message " % fMessageCode % " definition") code
62 url <- getUrl ("/trmd/" % fMessageCodeLower % "_d.htm") code
63 outputFile <- getOutputFile (fMessageCodeLower % "_d.txt") messagesDirectory code
64 htmlToFile url TE.decodeUtf8 outputFile
66 fetchMessageSummary :: MessageCode -> Fetcher [SegmentCode]
67 fetchMessageSummary code = do
68 say ("Fetching message " % fMessageCode % " summary") code
69 url <- getUrl ("/trmd/" % fMessageCodeLower % "_s.htm") code
70 outputFile <- getOutputFile (fMessageCodeLower % "_s.txt") messagesDirectory code
71 let decoder = cleanupAsciiArt . TE.decodeLatin1
72 htmlToFileWithParser url decoder outputFile listSegments
74 -- This might not be the proper way to do it...
75 -- Use Data.Text.Encoding.decodeUtf8With instead?
76 cleanupAsciiArt :: Text -> Text