blob: 9daf98ab11da2db8eca69e0895a16db1b64df466 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
{-# LANGUAGE OverloadedStrings #-}
module Text.Edifact.Fetcher.Messages
( fetchMessages
, messagesDirectory
) where
import Text.Edifact.Fetcher.Commons
import Text.Edifact.Fetcher.Configuration
import Text.Edifact.Scaffolder.Commons
import Text.Edifact.Scaffolder.Messages.Specification (listSegments)
import Data.Foldable (toList)
import Data.List as L (nub, sort)
import Data.Maybe (mapMaybe)
import Data.Text as T (map)
import Data.Text.Encoding as TE (decodeLatin1,
decodeUtf8)
import Formatting
import Text.Pandoc as Pandoc hiding (Format,
getOutputFile)
import Text.Pandoc.Walk (query)
import Text.Parsec (parse)
messagesDirectory :: FilePath
messagesDirectory = "messages"
parseMessageCode :: Text -> Maybe MessageCode
parseMessageCode =
let toMaybe (Right v) = Just v
toMaybe _ = Nothing
in toMaybe . parse messageCodeParser ""
scanInlineForMessageCode :: Inline -> Maybe MessageCode
scanInlineForMessageCode (Str label) = parseMessageCode label
scanInlineForMessageCode _ = Nothing
-- The trick here is to reverse the usage of UNH which is mandatory on every single message
listAllMessages :: Fetcher [MessageCode]
listAllMessages =
let filterLink (Link _ inlines _) = mapMaybe scanInlineForMessageCode inlines
filterLink _ = []
extractMessageCodes = query filterLink
loadUNHUsages = readHtmlFromURL TE.decodeUtf8 =<< getUrl ("/trsd/cseg" % fSegmentCodeLower % ".htm") "UNH"
in extractMessageCodes <$> loadUNHUsages
listMessages :: Fetcher [MessageCode]
listMessages = getSelectedMessages >>= maybe listAllMessages (pure . toList)
fetchMessages :: Fetcher [SegmentCode]
fetchMessages = listMessages >>= fmap (L.nub . L.sort . concat) . traverse fetchMessage
fetchMessage :: MessageCode -> Fetcher [SegmentCode]
fetchMessage code = do
retry 3 (fetchMessageDefinition code)
retry 3 (fetchMessageSummary code)
fetchMessageDefinition :: MessageCode -> Fetcher ()
fetchMessageDefinition code = do
say ("Fetching message " % fMessageCode % " definition") code
url <- getUrl ("/trmd/" % fMessageCodeLower % "_d.htm") code
outputFile <- getOutputFile (fMessageCodeLower % "_d.txt") messagesDirectory code
htmlToFile url TE.decodeUtf8 outputFile
fetchMessageSummary :: MessageCode -> Fetcher [SegmentCode]
fetchMessageSummary code = do
say ("Fetching message " % fMessageCode % " summary") code
url <- getUrl ("/trmd/" % fMessageCodeLower % "_s.htm") code
outputFile <- getOutputFile (fMessageCodeLower % "_s.txt") messagesDirectory code
let decoder = cleanupAsciiArt . TE.decodeLatin1
htmlToFileWithParser url decoder outputFile listSegments
-- This might not be the proper way to do it...
-- Use Data.Text.Encoding.decodeUtf8With instead?
cleanupAsciiArt :: Text -> Text
cleanupAsciiArt =
let f 'Ä' = '-'
f '¿' = '+'
f '³' = '|'
f 'Ù' = '+'
f 'Á' = '+'
f c = c
in T.map f
|