1 {-# LANGUAGE NamedFieldPuns #-}
2 {-# LANGUAGE OverloadedStrings #-}
4 module Text.Edifact.BundleReader.Extractor
9 import Text.Edifact.Scaffolder.Commons (Revision (..))
11 import Codec.Archive.Zip
12 import Data.ByteString as B (ByteString, isInfixOf,
14 import Data.ByteString.Lazy as BS (fromStrict, toStrict)
15 import Data.Char (toLower)
16 import Data.List as L (intercalate, isPrefixOf)
17 import Data.List.Split (splitOn)
18 import Data.Maybe (maybeToList)
22 { fileType :: FileType
23 , fileContent :: ByteString
26 data FileType = Message | Segment | Composite | Simple | CodedSimple deriving Eq
30 { messages :: [ByteString]
31 , segments :: [ByteString]
32 , composites :: [ByteString]
33 , simples :: [ByteString]
34 , codedSimples :: [ByteString]
37 readZip :: Revision -> FilePath -> IO FileContents
38 readZip specification f = toFileContents . parseFile (getExtension f) (getName f) specification <$> B.readFile f
40 toFileContents :: [FileContent] -> FileContents
41 toFileContents t = FileContents
42 { messages = fileContent <$> filter ((==) Message . fileType) t
43 , segments = fileContent <$> filter ((==) Segment . fileType) t
44 , composites = fileContent <$> filter ((==) Composite . fileType) t
45 , simples = fileContent <$> filter ((==) Simple . fileType) t
46 , codedSimples = fileContent <$> filter ((==) CodedSimple . fileType) t
49 getName :: FilePath -> String
50 getName = intercalate "." . init . splitOn "." . last . splitOn "/"
52 getExtension :: FilePath -> String
53 getExtension = fmap toLower . last . splitOn "."
55 parseFile :: String -> String -> Revision -> ByteString -> [FileContent]
56 parseFile "zip" _ specification content = unzipAndRead specification content
57 parseFile extension name specification content
58 | ("d" <> extension) == (toLower <$> getRevision specification) = maybeToList $ identifyFile name content
59 parseFile _ _ _ _ = []
61 unzipAndRead :: Revision -> ByteString -> [FileContent]
62 unzipAndRead specification content = let
63 archive = zEntries $ toArchive $ BS.fromStrict content
64 toContents e@Entry{eRelativePath} = parseFile (getExtension eRelativePath) (getName eRelativePath) specification (BS.toStrict $ fromEntry e)
66 concatMap toContents archive
68 identifyFile :: String -> ByteString -> Maybe FileContent
69 identifyFile name content
70 | " Message Type : " `isInfixOf` content = pure $ FileContent Message content
71 | "2. Composite specifications" `B.isPrefixOf` content = pure $ FileContent Composite content
72 | "2. Segment specifications" `B.isPrefixOf` content = pure $ FileContent Segment content
73 | "2. Data element specifications" `B.isPrefixOf` content = pure $ FileContent Simple content
74 | "UNCL" `L.isPrefixOf` name = pure $ FileContent CodedSimple content
75 identifyFile _ _ = Nothing