]> git.immae.eu Git - github/fretlink/edi-parser.git/blob - scaffolder/src/Text/Edifact/Fetcher/Messages.hs
Release code as open source
[github/fretlink/edi-parser.git] / scaffolder / src / Text / Edifact / Fetcher / Messages.hs
1 {-# LANGUAGE OverloadedStrings #-}
2
3 module Text.Edifact.Fetcher.Messages
4 ( fetchMessages
5 , messagesDirectory
6 ) where
7
8 import Text.Edifact.Fetcher.Commons
9 import Text.Edifact.Fetcher.Configuration
10 import Text.Edifact.Scaffolder.Commons
11
12 import Text.Edifact.Scaffolder.Messages.Specification (listSegments)
13
14 import Data.Foldable (toList)
15 import Data.List as L (nub, sort)
16 import Data.Maybe (mapMaybe)
17 import Data.Text as T (map)
18 import Data.Text.Encoding as TE (decodeLatin1,
19 decodeUtf8)
20 import Formatting
21 import Text.Pandoc as Pandoc hiding (Format,
22 getOutputFile)
23 import Text.Pandoc.Walk (query)
24 import Text.Parsec (parse)
25
26 messagesDirectory :: FilePath
27 messagesDirectory = "messages"
28
29 parseMessageCode :: Text -> Maybe MessageCode
30 parseMessageCode =
31 let toMaybe (Right v) = Just v
32 toMaybe _ = Nothing
33 in toMaybe . parse messageCodeParser ""
34
35 scanInlineForMessageCode :: Inline -> Maybe MessageCode
36 scanInlineForMessageCode (Str label) = parseMessageCode label
37 scanInlineForMessageCode _ = Nothing
38
39 -- The trick here is to reverse the usage of UNH which is mandatory on every single message
40 listAllMessages :: Fetcher [MessageCode]
41 listAllMessages =
42 let filterLink (Link _ inlines _) = mapMaybe scanInlineForMessageCode inlines
43 filterLink _ = []
44 extractMessageCodes = query filterLink
45 loadUNHUsages = readHtmlFromURL TE.decodeUtf8 =<< getUrl ("/trsd/cseg" % fSegmentCodeLower % ".htm") "UNH"
46 in extractMessageCodes <$> loadUNHUsages
47
48 listMessages :: Fetcher [MessageCode]
49 listMessages = getSelectedMessages >>= maybe listAllMessages (pure . toList)
50
51 fetchMessages :: Fetcher [SegmentCode]
52 fetchMessages = listMessages >>= fmap (L.nub . L.sort . concat) . traverse fetchMessage
53
54 fetchMessage :: MessageCode -> Fetcher [SegmentCode]
55 fetchMessage code = do
56 retry 3 (fetchMessageDefinition code)
57 retry 3 (fetchMessageSummary code)
58
59 fetchMessageDefinition :: MessageCode -> Fetcher ()
60 fetchMessageDefinition code = do
61 say ("Fetching message " % fMessageCode % " definition") code
62 url <- getUrl ("/trmd/" % fMessageCodeLower % "_d.htm") code
63 outputFile <- getOutputFile (fMessageCodeLower % "_d.txt") messagesDirectory code
64 htmlToFile url TE.decodeUtf8 outputFile
65
66 fetchMessageSummary :: MessageCode -> Fetcher [SegmentCode]
67 fetchMessageSummary code = do
68 say ("Fetching message " % fMessageCode % " summary") code
69 url <- getUrl ("/trmd/" % fMessageCodeLower % "_s.htm") code
70 outputFile <- getOutputFile (fMessageCodeLower % "_s.txt") messagesDirectory code
71 let decoder = cleanupAsciiArt . TE.decodeLatin1
72 htmlToFileWithParser url decoder outputFile listSegments
73
74 -- This might not be the proper way to do it...
75 -- Use Data.Text.Encoding.decodeUtf8With instead?
76 cleanupAsciiArt :: Text -> Text
77 cleanupAsciiArt =
78 let f 'Ä' = '-'
79 f '¿' = '+'
80 f '³' = '|'
81 f 'Ù' = '+'
82 f 'Á' = '+'
83 f c = c
84 in T.map f