diff options
Diffstat (limited to 'scaffolder/src/Text/Edifact/BundleReader/Extractor.hs')
-rw-r--r-- | scaffolder/src/Text/Edifact/BundleReader/Extractor.hs | 75 |
1 files changed, 75 insertions, 0 deletions
diff --git a/scaffolder/src/Text/Edifact/BundleReader/Extractor.hs b/scaffolder/src/Text/Edifact/BundleReader/Extractor.hs new file mode 100644 index 0000000..f4be7e9 --- /dev/null +++ b/scaffolder/src/Text/Edifact/BundleReader/Extractor.hs | |||
@@ -0,0 +1,75 @@ | |||
1 | {-# LANGUAGE NamedFieldPuns #-} | ||
2 | {-# LANGUAGE OverloadedStrings #-} | ||
3 | |||
4 | module Text.Edifact.BundleReader.Extractor | ||
5 | ( FileContents(..) | ||
6 | , readZip | ||
7 | ) where | ||
8 | |||
9 | import Text.Edifact.Scaffolder.Commons (Revision (..)) | ||
10 | |||
11 | import Codec.Archive.Zip | ||
12 | import Data.ByteString as B (ByteString, isInfixOf, | ||
13 | isPrefixOf, readFile) | ||
14 | import Data.ByteString.Lazy as BS (fromStrict, toStrict) | ||
15 | import Data.Char (toLower) | ||
16 | import Data.List as L (intercalate, isPrefixOf) | ||
17 | import Data.List.Split (splitOn) | ||
18 | import Data.Maybe (maybeToList) | ||
19 | |||
20 | data FileContent = | ||
21 | FileContent | ||
22 | { fileType :: FileType | ||
23 | , fileContent :: ByteString | ||
24 | } | ||
25 | |||
26 | data FileType = Message | Segment | Composite | Simple | CodedSimple deriving Eq | ||
27 | |||
28 | data FileContents = | ||
29 | FileContents | ||
30 | { messages :: [ByteString] | ||
31 | , segments :: [ByteString] | ||
32 | , composites :: [ByteString] | ||
33 | , simples :: [ByteString] | ||
34 | , codedSimples :: [ByteString] | ||
35 | } | ||
36 | |||
37 | readZip :: Revision -> FilePath -> IO FileContents | ||
38 | readZip specification f = toFileContents . parseFile (getExtension f) (getName f) specification <$> B.readFile f | ||
39 | |||
40 | toFileContents :: [FileContent] -> FileContents | ||
41 | toFileContents t = FileContents | ||
42 | { messages = fileContent <$> filter ((==) Message . fileType) t | ||
43 | , segments = fileContent <$> filter ((==) Segment . fileType) t | ||
44 | , composites = fileContent <$> filter ((==) Composite . fileType) t | ||
45 | , simples = fileContent <$> filter ((==) Simple . fileType) t | ||
46 | , codedSimples = fileContent <$> filter ((==) CodedSimple . fileType) t | ||
47 | } | ||
48 | |||
49 | getName :: FilePath -> String | ||
50 | getName = intercalate "." . init . splitOn "." . last . splitOn "/" | ||
51 | |||
52 | getExtension :: FilePath -> String | ||
53 | getExtension = fmap toLower . last . splitOn "." | ||
54 | |||
55 | parseFile :: String -> String -> Revision -> ByteString -> [FileContent] | ||
56 | parseFile "zip" _ specification content = unzipAndRead specification content | ||
57 | parseFile extension name specification content | ||
58 | | ("d" <> extension) == (toLower <$> getRevision specification) = maybeToList $ identifyFile name content | ||
59 | parseFile _ _ _ _ = [] | ||
60 | |||
61 | unzipAndRead :: Revision -> ByteString -> [FileContent] | ||
62 | unzipAndRead specification content = let | ||
63 | archive = zEntries $ toArchive $ BS.fromStrict content | ||
64 | toContents e@Entry{eRelativePath} = parseFile (getExtension eRelativePath) (getName eRelativePath) specification (BS.toStrict $ fromEntry e) | ||
65 | in | ||
66 | concatMap toContents archive | ||
67 | |||
68 | identifyFile :: String -> ByteString -> Maybe FileContent | ||
69 | identifyFile name content | ||
70 | | " Message Type : " `isInfixOf` content = pure $ FileContent Message content | ||
71 | | "2. Composite specifications" `B.isPrefixOf` content = pure $ FileContent Composite content | ||
72 | | "2. Segment specifications" `B.isPrefixOf` content = pure $ FileContent Segment content | ||
73 | | "2. Data element specifications" `B.isPrefixOf` content = pure $ FileContent Simple content | ||
74 | | "UNCL" `L.isPrefixOf` name = pure $ FileContent CodedSimple content | ||
75 | identifyFile _ _ = Nothing | ||