aboutsummaryrefslogtreecommitdiffhomepage
path: root/scaffolder/src/Text/Edifact/BundleReader
diff options
context:
space:
mode:
Diffstat (limited to 'scaffolder/src/Text/Edifact/BundleReader')
-rw-r--r--scaffolder/src/Text/Edifact/BundleReader/CodedSimples.hs38
-rw-r--r--scaffolder/src/Text/Edifact/BundleReader/Commons.hs90
-rw-r--r--scaffolder/src/Text/Edifact/BundleReader/Composites.hs43
-rw-r--r--scaffolder/src/Text/Edifact/BundleReader/Configuration.hs30
-rw-r--r--scaffolder/src/Text/Edifact/BundleReader/Extractor.hs75
-rw-r--r--scaffolder/src/Text/Edifact/BundleReader/Messages.hs61
-rw-r--r--scaffolder/src/Text/Edifact/BundleReader/Segments.hs55
-rw-r--r--scaffolder/src/Text/Edifact/BundleReader/Simples.hs43
8 files changed, 435 insertions, 0 deletions
diff --git a/scaffolder/src/Text/Edifact/BundleReader/CodedSimples.hs b/scaffolder/src/Text/Edifact/BundleReader/CodedSimples.hs
new file mode 100644
index 0000000..7dc92fd
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/BundleReader/CodedSimples.hs
@@ -0,0 +1,38 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.BundleReader.CodedSimples
4 ( readCodedSimples
5 ) where
6
7import Text.Edifact.BundleReader.Commons
8import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..))
9import Text.Edifact.BundleReader.Simples (simplesDirectory)
10import Text.Edifact.Scaffolder.Commons
11
12import Text.Edifact.Scaffolder.CodedSimples.Specification
13
14import Control.Monad (when)
15import Control.Monad.Reader (asks)
16import Data.ByteString as BS (ByteString,
17 readFile)
18import Formatting
19
20readCodedSimples :: [ByteString] -> [SimpleCode] -> BundleReader ()
21readCodedSimples contents simples = do
22 let parsedFile path = parseFile simples =<< liftIO (BS.readFile path)
23 parsedString = parseFile simples
24 files <- asks codedSimplesFiles
25 mapM_ parsedFile files
26 mapM_ parsedString contents
27
28parseFile :: [SimpleCode] -> ByteString -> BundleReader [SimpleCode]
29parseFile simples content =
30 let chunks = tail $ splitFileByDash 70 $ decodeContent content
31 in traverse (parseChunk simples) chunks
32
33parseChunk :: [SimpleCode] -> Text -> BundleReader SimpleCode
34parseChunk simples chunk = do
35 parsed <- parseOrFail chunk specificationParser
36 outputFile <- getOutputFile (fSimpleCodeLower % ".txt") simplesDirectory (fst parsed)
37 when (fst parsed `elem` simples) $ toFile chunk outputFile
38 pure $ fst parsed
diff --git a/scaffolder/src/Text/Edifact/BundleReader/Commons.hs b/scaffolder/src/Text/Edifact/BundleReader/Commons.hs
new file mode 100644
index 0000000..0c8334a
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/BundleReader/Commons.hs
@@ -0,0 +1,90 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.BundleReader.Commons where
4
5import Text.Edifact.BundleReader.Configuration
6import Text.Edifact.Scaffolder.Commons
7
8import Control.Monad.Reader (ReaderT, runReaderT)
9import Data.ByteString (ByteString)
10import Data.Char (toLower)
11import Data.List.Split (splitWhen)
12import Data.Text as T (isInfixOf, lines,
13 map, null,
14 replicate, strip,
15 unlines, unpack)
16import Data.Text.Encoding as TE (decodeLatin1,
17 decodeUtf8')
18import Data.Text.IO as TIO (writeFile)
19import Formatting
20import Text.Parsec (Parsec, runParser)
21
22type BundleReader = ReaderT BundleReaderEnv IO
23
24decodeContent :: ByteString -> Text
25decodeContent content = either (const $ cleanupAsciiArt $ decodeLatin1 content) id (decodeUtf8' content)
26
27splitFileByDash :: Int -> Text -> [Text]
28splitFileByDash n =
29 let separator = T.replicate n "-"
30 isNotEmpty = not . T.null . T.strip
31 in
32 filter isNotEmpty . fmap T.unlines . splitWhen (separator `T.isInfixOf`) . T.lines
33
34runBundleReader :: BundleReader () -> BundleReaderEnv -> IO ()
35runBundleReader = runReaderT
36
37getOutputFile :: Format String (a -> String) -> FilePath -> a -> BundleReader FilePath
38getOutputFile f d c = do
39 home <- getSpecificationHome
40 pure (formatToString (string % "/" % string % "/" % f) home d c)
41
42getSpecificationHome :: BundleReader FilePath
43getSpecificationHome = do
44 home <- getHome
45 rev <- getTargetRevision
46 pure (home </> formatToString fRevision rev)
47
48toFile :: Text -> FilePath -> BundleReader ()
49toFile specification outputFile = liftIO (TIO.writeFile outputFile specification)
50
51parseOrFail :: (Monoid u) => Text -> Parsec String u a -> BundleReader a
52parseOrFail specification parser = either (error . (\a -> show specification <> show a)) pure (runParser parser mempty "" (T.unpack specification))
53
54toFileWithParser :: (Monoid a, Monoid u) => Text -> FilePath -> Parsec String u a -> BundleReader a
55toFileWithParser specification outputFile parser = do
56 liftIO (TIO.writeFile outputFile specification)
57 either (error . show) pure (runParser parser mempty "" (T.unpack specification))
58
59lower :: Format r (String -> r)
60lower = mapf (fmap toLower) string
61
62fRevision :: Format r (Revision -> r)
63fRevision = mapf getRevision string
64
65fRevisionLower :: Format r (Revision -> r)
66fRevisionLower = mapf getRevision lower
67
68fMessageCodeLower :: Format r (MessageCode -> r)
69fMessageCodeLower = mapf getMessageCode lower
70
71fSegmentCodeLower :: Format r (SegmentCode -> r)
72fSegmentCodeLower = mapf getSegmentCode lower
73
74fCompositeCodeLower :: Format r (CompositeCode -> r)
75fCompositeCodeLower = mapf getCompositeCode lower
76
77fSimpleCodeLower :: Format r (SimpleCode -> r)
78fSimpleCodeLower = mapf getSimpleCode lower
79
80-- This might not be the proper way to do it...
81-- Use Data.Text.Encoding.decodeUtf8With instead?
82cleanupAsciiArt :: Text -> Text
83cleanupAsciiArt =
84 let f 'Ä' = '-'
85 f '¿' = '+'
86 f '³' = '|'
87 f 'Ù' = '+'
88 f 'Á' = '+'
89 f c = c
90 in T.map f
diff --git a/scaffolder/src/Text/Edifact/BundleReader/Composites.hs b/scaffolder/src/Text/Edifact/BundleReader/Composites.hs
new file mode 100644
index 0000000..2880b95
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/BundleReader/Composites.hs
@@ -0,0 +1,43 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.BundleReader.Composites
4 ( readComposites
5 , compositesDirectory
6 ) where
7
8import Text.Edifact.BundleReader.Commons
9import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..))
10import Text.Edifact.Scaffolder.Commons
11
12import Text.Edifact.Scaffolder.Composites.Specification (listSimples)
13
14import Control.Monad (when)
15import Control.Monad.Reader (asks)
16import Data.ByteString as BS (ByteString,
17 readFile)
18import Formatting
19
20compositesDirectory :: FilePath
21compositesDirectory = "composites"
22
23readComposites :: [ByteString] -> ([CompositeCode], [SimpleCode]) -> BundleReader [SimpleCode]
24readComposites contents (composites, simples) = do
25 let parsedFile path = parseFile composites =<< liftIO (BS.readFile path)
26 parsedString = parseFile composites
27 files <- asks compositesFiles
28 parsedFiles <- traverse parsedFile files
29 parsedStrings <- traverse parsedString contents
30 let filtered = mappend simples $ concatMap snd $ filter (\s -> fst s `elem` composites) $ concat (parsedFiles <> parsedStrings)
31 pure filtered
32
33parseFile :: [CompositeCode] -> ByteString -> BundleReader [(CompositeCode, [SimpleCode])]
34parseFile composites content =
35 let chunks = tail $ splitFileByDash 70 $ decodeContent content
36 in traverse (parseChunk composites) chunks
37
38parseChunk :: [CompositeCode] -> Text -> BundleReader (CompositeCode, [SimpleCode])
39parseChunk composites chunk = do
40 parsed <- parseOrFail chunk listSimples
41 outputFile <- getOutputFile (fCompositeCodeLower % ".txt") compositesDirectory (fst parsed)
42 when (fst parsed `elem` composites) $ toFile chunk outputFile
43 pure parsed
diff --git a/scaffolder/src/Text/Edifact/BundleReader/Configuration.hs b/scaffolder/src/Text/Edifact/BundleReader/Configuration.hs
new file mode 100644
index 0000000..0609c03
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/BundleReader/Configuration.hs
@@ -0,0 +1,30 @@
1{-# LANGUAGE FlexibleContexts #-}
2
3module Text.Edifact.BundleReader.Configuration
4 ( -- * Parsing environment
5 BundleReaderEnv(..)
6 -- * Shortcuts for reading the environment
7 , getTargetRevision
8 , getHome
9 ) where
10
11import Text.Edifact.Scaffolder.Commons (MessageCode, Revision)
12
13import Control.Monad.Reader.Class (MonadReader, asks)
14
15data BundleReaderEnv = BundleReaderEnv { parserRevision :: Revision
16 , specificationHome :: FilePath
17 , bundle :: [FilePath]
18 , messageNames :: [MessageCode]
19 , messagesFiles :: [FilePath]
20 , segmentsFiles :: [FilePath]
21 , compositesFiles :: [FilePath]
22 , simplesFiles :: [FilePath]
23 , codedSimplesFiles :: [FilePath]
24 }
25
26getTargetRevision :: MonadReader BundleReaderEnv m => m Revision
27getTargetRevision = asks parserRevision
28
29getHome :: MonadReader BundleReaderEnv m => m FilePath
30getHome = asks specificationHome
diff --git a/scaffolder/src/Text/Edifact/BundleReader/Extractor.hs b/scaffolder/src/Text/Edifact/BundleReader/Extractor.hs
new file mode 100644
index 0000000..f4be7e9
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/BundleReader/Extractor.hs
@@ -0,0 +1,75 @@
1{-# LANGUAGE NamedFieldPuns #-}
2{-# LANGUAGE OverloadedStrings #-}
3
4module Text.Edifact.BundleReader.Extractor
5 ( FileContents(..)
6 , readZip
7 ) where
8
9import Text.Edifact.Scaffolder.Commons (Revision (..))
10
11import Codec.Archive.Zip
12import Data.ByteString as B (ByteString, isInfixOf,
13 isPrefixOf, readFile)
14import Data.ByteString.Lazy as BS (fromStrict, toStrict)
15import Data.Char (toLower)
16import Data.List as L (intercalate, isPrefixOf)
17import Data.List.Split (splitOn)
18import Data.Maybe (maybeToList)
19
20data FileContent =
21 FileContent
22 { fileType :: FileType
23 , fileContent :: ByteString
24 }
25
26data FileType = Message | Segment | Composite | Simple | CodedSimple deriving Eq
27
28data FileContents =
29 FileContents
30 { messages :: [ByteString]
31 , segments :: [ByteString]
32 , composites :: [ByteString]
33 , simples :: [ByteString]
34 , codedSimples :: [ByteString]
35 }
36
37readZip :: Revision -> FilePath -> IO FileContents
38readZip specification f = toFileContents . parseFile (getExtension f) (getName f) specification <$> B.readFile f
39
40toFileContents :: [FileContent] -> FileContents
41toFileContents t = FileContents
42 { messages = fileContent <$> filter ((==) Message . fileType) t
43 , segments = fileContent <$> filter ((==) Segment . fileType) t
44 , composites = fileContent <$> filter ((==) Composite . fileType) t
45 , simples = fileContent <$> filter ((==) Simple . fileType) t
46 , codedSimples = fileContent <$> filter ((==) CodedSimple . fileType) t
47 }
48
49getName :: FilePath -> String
50getName = intercalate "." . init . splitOn "." . last . splitOn "/"
51
52getExtension :: FilePath -> String
53getExtension = fmap toLower . last . splitOn "."
54
55parseFile :: String -> String -> Revision -> ByteString -> [FileContent]
56parseFile "zip" _ specification content = unzipAndRead specification content
57parseFile extension name specification content
58 | ("d" <> extension) == (toLower <$> getRevision specification) = maybeToList $ identifyFile name content
59parseFile _ _ _ _ = []
60
61unzipAndRead :: Revision -> ByteString -> [FileContent]
62unzipAndRead specification content = let
63 archive = zEntries $ toArchive $ BS.fromStrict content
64 toContents e@Entry{eRelativePath} = parseFile (getExtension eRelativePath) (getName eRelativePath) specification (BS.toStrict $ fromEntry e)
65 in
66 concatMap toContents archive
67
68identifyFile :: String -> ByteString -> Maybe FileContent
69identifyFile name content
70 | " Message Type : " `isInfixOf` content = pure $ FileContent Message content
71 | "2. Composite specifications" `B.isPrefixOf` content = pure $ FileContent Composite content
72 | "2. Segment specifications" `B.isPrefixOf` content = pure $ FileContent Segment content
73 | "2. Data element specifications" `B.isPrefixOf` content = pure $ FileContent Simple content
74 | "UNCL" `L.isPrefixOf` name = pure $ FileContent CodedSimple content
75identifyFile _ _ = Nothing
diff --git a/scaffolder/src/Text/Edifact/BundleReader/Messages.hs b/scaffolder/src/Text/Edifact/BundleReader/Messages.hs
new file mode 100644
index 0000000..5537d28
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/BundleReader/Messages.hs
@@ -0,0 +1,61 @@
1{-# LANGUAGE OverloadedStrings #-}
2{-# LANGUAGE TupleSections #-}
3
4module Text.Edifact.BundleReader.Messages
5 ( readMessages
6 , messagesDirectory
7 ) where
8
9import Text.Edifact.BundleReader.Commons (BundleReader,
10 decodeContent,
11 fMessageCodeLower,
12 getOutputFile,
13 parseOrFail,
14 toFile)
15import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..))
16import Text.Edifact.Scaffolder.Commons
17import Text.Edifact.Scaffolder.Messages.Specification (listSegments, messageNameParser)
18
19import Control.Monad (when)
20import Control.Monad.Reader (asks)
21import Data.ByteString as BS (ByteString,
22 readFile)
23import qualified Data.Text as T (isPrefixOf,
24 lines,
25 unlines)
26import Formatting
27
28messagesDirectory :: FilePath
29messagesDirectory = "messages"
30
31readMessages :: [ByteString] -> BundleReader [(MessageCode, [SegmentCode])]
32readMessages contents = do
33 selectedMessages <- asks messageNames
34 let parsedFile path = parseFile selectedMessages =<< liftIO (BS.readFile path)
35 parsedString = parseFile selectedMessages
36 parsedFiles <- traverse parsedFile =<< asks messagesFiles
37 parsedStrings <- traverse parsedString contents
38 let filtered = parsedFiles <> filter (\s -> null selectedMessages || fst s `elem` selectedMessages) parsedStrings
39 pure filtered
40
41parseFile :: [MessageCode] -> ByteString -> BundleReader (MessageCode, [SegmentCode])
42parseFile selectedMessages content = do
43 let (definition, summary) = splitFile $ decodeContent content
44 messageCode <- parseOrFail definition messageNameParser
45 summaryOutputFile <- getOutputFile (fMessageCodeLower % "_s.txt") messagesDirectory messageCode
46 definitionOutputFile <- getOutputFile (fMessageCodeLower % "_d.txt") messagesDirectory messageCode
47 when (messageCode `elem` selectedMessages) $ toFile definition definitionOutputFile
48 when (messageCode `elem` selectedMessages) $ toFile summary summaryOutputFile
49 (messageCode,) <$> parseOrFail summary listSegments
50
51splitFile :: Text -> (Text, Text)
52splitFile content = let
53 separatorBefore = "4.3 Message structure"
54 separatorAfter = "Annex"
55 textBefore = takeWhile (not . T.isPrefixOf separatorBefore) $ T.lines content
56 textInsideAndAfter = dropWhile (not . T.isPrefixOf separatorBefore) $ T.lines content
57 textAfter = dropWhile (not . T.isPrefixOf separatorAfter) textInsideAndAfter
58 textSummary = T.unlines $ takeWhile (not . T.isPrefixOf separatorAfter) textInsideAndAfter
59 textDefinition = T.unlines $ textBefore <> [separatorBefore, "", "See summary file", ""] <> textAfter
60 in
61 (textDefinition, textSummary)
diff --git a/scaffolder/src/Text/Edifact/BundleReader/Segments.hs b/scaffolder/src/Text/Edifact/BundleReader/Segments.hs
new file mode 100644
index 0000000..6b71266
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/BundleReader/Segments.hs
@@ -0,0 +1,55 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.BundleReader.Segments
4 ( readSegments
5 , segmentsDirectory
6 ) where
7
8import Text.Edifact.BundleReader.Commons
9import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..))
10import Text.Edifact.Scaffolder.Commons
11
12import Text.Edifact.Scaffolder.Segments.Specification (listCompositesAndSimples)
13import Text.Edifact.Scaffolder.Segments.Types (Element,
14 getComposite,
15 getSimple)
16
17import Control.Monad (when)
18import Control.Monad.Reader (asks)
19import Data.Bifunctor (bimap)
20import Data.ByteString as BS (ByteString,
21 readFile)
22import Data.List as L (partition)
23import Data.Maybe (isJust,
24 mapMaybe)
25import Formatting
26
27segmentsDirectory :: FilePath
28segmentsDirectory = "segments"
29
30readSegments :: [ByteString] -> [SegmentCode] -> BundleReader ([CompositeCode], [SimpleCode])
31readSegments contents segments = do
32 let parsedFile path = parseFile segments =<< liftIO (BS.readFile path)
33 parsedString = parseFile segments
34 files <- asks segmentsFiles
35 parsedFiles <- traverse parsedFile files
36 parsedStrings <- traverse parsedString contents
37 let filtered = concatMap snd $ filter (\s -> fst s `elem` segments) $ concat (parsedFiles <> parsedStrings)
38 pure $ partitionElements filtered
39
40parseFile :: [SegmentCode] -> ByteString -> BundleReader [(SegmentCode, [Element])]
41parseFile segments content =
42 let chunks = tail $ splitFileByDash 70 $ decodeContent content
43 in traverse (parseChunk segments) chunks
44
45parseChunk :: [SegmentCode] -> Text -> BundleReader (SegmentCode, [Element])
46parseChunk segments chunk = do
47 parsed <- parseOrFail chunk listCompositesAndSimples
48 outputFile <- getOutputFile (fSegmentCodeLower % ".txt") segmentsDirectory (fst parsed)
49 when (fst parsed `elem` segments) $ toFile chunk outputFile
50 pure parsed
51
52partitionElements :: [Element] -> ([CompositeCode], [SimpleCode])
53partitionElements =
54 let isComposite = isJust . getComposite
55 in bimap (mapMaybe getComposite) (mapMaybe getSimple) . partition isComposite
diff --git a/scaffolder/src/Text/Edifact/BundleReader/Simples.hs b/scaffolder/src/Text/Edifact/BundleReader/Simples.hs
new file mode 100644
index 0000000..df7d662
--- /dev/null
+++ b/scaffolder/src/Text/Edifact/BundleReader/Simples.hs
@@ -0,0 +1,43 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.BundleReader.Simples
4 ( readSimples
5 , simplesDirectory
6 ) where
7
8import Text.Edifact.BundleReader.Commons
9import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..))
10import Text.Edifact.Scaffolder.Commons
11
12import Text.Edifact.Scaffolder.Simples.Specification
13
14import Control.Monad (when)
15import Control.Monad.Reader (asks)
16import Data.ByteString as BS (ByteString,
17 readFile)
18import Formatting
19
20simplesDirectory :: FilePath
21simplesDirectory = "simples"
22
23readSimples :: [ByteString] -> [SimpleCode] -> BundleReader [SimpleCode]
24readSimples contents simples = do
25 let parsedFile path = parseFile simples =<< liftIO (BS.readFile path)
26 parsedString = parseFile simples
27 files <- asks simplesFiles
28 parsedFiles <- traverse parsedFile files
29 parsedStrings <- traverse parsedString contents
30 let filtered = filter (`elem` simples) $ concat (parsedFiles <> parsedStrings)
31 pure filtered
32
33parseFile :: [SimpleCode] -> ByteString -> BundleReader [SimpleCode]
34parseFile simples content =
35 let chunks = tail $ splitFileByDash 70 $ decodeContent content
36 in traverse (parseChunk simples) chunks
37
38parseChunk :: [SimpleCode] -> Text -> BundleReader SimpleCode
39parseChunk simples chunk = do
40 parsed <- parseOrFail chunk specificationParser
41 outputFile <- getOutputFile (fSimpleCodeLower % ".txt") simplesDirectory (fst parsed)
42 when (fst parsed `elem` simples) $ toFile chunk outputFile
43 pure $ fst parsed