]> git.immae.eu Git - github/fretlink/edi-parser.git/blob - scaffolder/src/Text/Edifact/BundleReader/Extractor.hs
Release code as open source
[github/fretlink/edi-parser.git] / scaffolder / src / Text / Edifact / BundleReader / Extractor.hs
1 {-# LANGUAGE NamedFieldPuns #-}
2 {-# LANGUAGE OverloadedStrings #-}
3
4 module Text.Edifact.BundleReader.Extractor
5 ( FileContents(..)
6 , readZip
7 ) where
8
9 import Text.Edifact.Scaffolder.Commons (Revision (..))
10
11 import Codec.Archive.Zip
12 import Data.ByteString as B (ByteString, isInfixOf,
13 isPrefixOf, readFile)
14 import Data.ByteString.Lazy as BS (fromStrict, toStrict)
15 import Data.Char (toLower)
16 import Data.List as L (intercalate, isPrefixOf)
17 import Data.List.Split (splitOn)
18 import Data.Maybe (maybeToList)
19
20 data FileContent =
21 FileContent
22 { fileType :: FileType
23 , fileContent :: ByteString
24 }
25
26 data FileType = Message | Segment | Composite | Simple | CodedSimple deriving Eq
27
28 data FileContents =
29 FileContents
30 { messages :: [ByteString]
31 , segments :: [ByteString]
32 , composites :: [ByteString]
33 , simples :: [ByteString]
34 , codedSimples :: [ByteString]
35 }
36
37 readZip :: Revision -> FilePath -> IO FileContents
38 readZip specification f = toFileContents . parseFile (getExtension f) (getName f) specification <$> B.readFile f
39
40 toFileContents :: [FileContent] -> FileContents
41 toFileContents t = FileContents
42 { messages = fileContent <$> filter ((==) Message . fileType) t
43 , segments = fileContent <$> filter ((==) Segment . fileType) t
44 , composites = fileContent <$> filter ((==) Composite . fileType) t
45 , simples = fileContent <$> filter ((==) Simple . fileType) t
46 , codedSimples = fileContent <$> filter ((==) CodedSimple . fileType) t
47 }
48
49 getName :: FilePath -> String
50 getName = intercalate "." . init . splitOn "." . last . splitOn "/"
51
52 getExtension :: FilePath -> String
53 getExtension = fmap toLower . last . splitOn "."
54
55 parseFile :: String -> String -> Revision -> ByteString -> [FileContent]
56 parseFile "zip" _ specification content = unzipAndRead specification content
57 parseFile extension name specification content
58 | ("d" <> extension) == (toLower <$> getRevision specification) = maybeToList $ identifyFile name content
59 parseFile _ _ _ _ = []
60
61 unzipAndRead :: Revision -> ByteString -> [FileContent]
62 unzipAndRead specification content = let
63 archive = zEntries $ toArchive $ BS.fromStrict content
64 toContents e@Entry{eRelativePath} = parseFile (getExtension eRelativePath) (getName eRelativePath) specification (BS.toStrict $ fromEntry e)
65 in
66 concatMap toContents archive
67
68 identifyFile :: String -> ByteString -> Maybe FileContent
69 identifyFile name content
70 | " Message Type : " `isInfixOf` content = pure $ FileContent Message content
71 | "2. Composite specifications" `B.isPrefixOf` content = pure $ FileContent Composite content
72 | "2. Segment specifications" `B.isPrefixOf` content = pure $ FileContent Segment content
73 | "2. Data element specifications" `B.isPrefixOf` content = pure $ FileContent Simple content
74 | "UNCL" `L.isPrefixOf` name = pure $ FileContent CodedSimple content
75 identifyFile _ _ = Nothing