diff options
author | Frédéric Menou <frederic.menou@fretlink.com> | 2016-12-08 10:19:15 +0200 |
---|---|---|
committer | Ismaël Bouya <ismael.bouya@fretlink.com> | 2022-05-17 18:01:51 +0200 |
commit | a9d77a20008efe82862cc1adbfa7a6d4f09f8ff7 (patch) | |
tree | adf3186fdccaeef19151026cdfbd38a530cf9ecb /scaffolder/src/Text/Edifact/Fetcher/Messages.hs | |
download | edi-parser-master.tar.gz edi-parser-master.tar.zst edi-parser-master.zip |
Diffstat (limited to 'scaffolder/src/Text/Edifact/Fetcher/Messages.hs')
-rw-r--r-- | scaffolder/src/Text/Edifact/Fetcher/Messages.hs | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/scaffolder/src/Text/Edifact/Fetcher/Messages.hs b/scaffolder/src/Text/Edifact/Fetcher/Messages.hs new file mode 100644 index 0000000..9daf98a --- /dev/null +++ b/scaffolder/src/Text/Edifact/Fetcher/Messages.hs | |||
@@ -0,0 +1,84 @@ | |||
1 | {-# LANGUAGE OverloadedStrings #-} | ||
2 | |||
3 | module Text.Edifact.Fetcher.Messages | ||
4 | ( fetchMessages | ||
5 | , messagesDirectory | ||
6 | ) where | ||
7 | |||
8 | import Text.Edifact.Fetcher.Commons | ||
9 | import Text.Edifact.Fetcher.Configuration | ||
10 | import Text.Edifact.Scaffolder.Commons | ||
11 | |||
12 | import Text.Edifact.Scaffolder.Messages.Specification (listSegments) | ||
13 | |||
14 | import Data.Foldable (toList) | ||
15 | import Data.List as L (nub, sort) | ||
16 | import Data.Maybe (mapMaybe) | ||
17 | import Data.Text as T (map) | ||
18 | import Data.Text.Encoding as TE (decodeLatin1, | ||
19 | decodeUtf8) | ||
20 | import Formatting | ||
21 | import Text.Pandoc as Pandoc hiding (Format, | ||
22 | getOutputFile) | ||
23 | import Text.Pandoc.Walk (query) | ||
24 | import Text.Parsec (parse) | ||
25 | |||
26 | messagesDirectory :: FilePath | ||
27 | messagesDirectory = "messages" | ||
28 | |||
29 | parseMessageCode :: Text -> Maybe MessageCode | ||
30 | parseMessageCode = | ||
31 | let toMaybe (Right v) = Just v | ||
32 | toMaybe _ = Nothing | ||
33 | in toMaybe . parse messageCodeParser "" | ||
34 | |||
35 | scanInlineForMessageCode :: Inline -> Maybe MessageCode | ||
36 | scanInlineForMessageCode (Str label) = parseMessageCode label | ||
37 | scanInlineForMessageCode _ = Nothing | ||
38 | |||
39 | -- The trick here is to reverse the usage of UNH which is mandatory on every single message | ||
40 | listAllMessages :: Fetcher [MessageCode] | ||
41 | listAllMessages = | ||
42 | let filterLink (Link _ inlines _) = mapMaybe scanInlineForMessageCode inlines | ||
43 | filterLink _ = [] | ||
44 | extractMessageCodes = query filterLink | ||
45 | loadUNHUsages = readHtmlFromURL TE.decodeUtf8 =<< getUrl ("/trsd/cseg" % fSegmentCodeLower % ".htm") "UNH" | ||
46 | in extractMessageCodes <$> loadUNHUsages | ||
47 | |||
48 | listMessages :: Fetcher [MessageCode] | ||
49 | listMessages = getSelectedMessages >>= maybe listAllMessages (pure . toList) | ||
50 | |||
51 | fetchMessages :: Fetcher [SegmentCode] | ||
52 | fetchMessages = listMessages >>= fmap (L.nub . L.sort . concat) . traverse fetchMessage | ||
53 | |||
54 | fetchMessage :: MessageCode -> Fetcher [SegmentCode] | ||
55 | fetchMessage code = do | ||
56 | retry 3 (fetchMessageDefinition code) | ||
57 | retry 3 (fetchMessageSummary code) | ||
58 | |||
59 | fetchMessageDefinition :: MessageCode -> Fetcher () | ||
60 | fetchMessageDefinition code = do | ||
61 | say ("Fetching message " % fMessageCode % " definition") code | ||
62 | url <- getUrl ("/trmd/" % fMessageCodeLower % "_d.htm") code | ||
63 | outputFile <- getOutputFile (fMessageCodeLower % "_d.txt") messagesDirectory code | ||
64 | htmlToFile url TE.decodeUtf8 outputFile | ||
65 | |||
66 | fetchMessageSummary :: MessageCode -> Fetcher [SegmentCode] | ||
67 | fetchMessageSummary code = do | ||
68 | say ("Fetching message " % fMessageCode % " summary") code | ||
69 | url <- getUrl ("/trmd/" % fMessageCodeLower % "_s.htm") code | ||
70 | outputFile <- getOutputFile (fMessageCodeLower % "_s.txt") messagesDirectory code | ||
71 | let decoder = cleanupAsciiArt . TE.decodeLatin1 | ||
72 | htmlToFileWithParser url decoder outputFile listSegments | ||
73 | |||
74 | -- This might not be the proper way to do it... | ||
75 | -- Use Data.Text.Encoding.decodeUtf8With instead? | ||
76 | cleanupAsciiArt :: Text -> Text | ||
77 | cleanupAsciiArt = | ||
78 | let f 'Ä' = '-' | ||
79 | f '¿' = '+' | ||
80 | f '³' = '|' | ||
81 | f 'Ù' = '+' | ||
82 | f 'Á' = '+' | ||
83 | f c = c | ||
84 | in T.map f | ||