From a9d77a20008efe82862cc1adbfa7a6d4f09f8ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Menou?= Date: Thu, 8 Dec 2016 10:19:15 +0200 Subject: Release code as open source --- .../src/Text/Edifact/BundleReader/CodedSimples.hs | 38 +++++++++ .../src/Text/Edifact/BundleReader/Commons.hs | 90 ++++++++++++++++++++++ .../src/Text/Edifact/BundleReader/Composites.hs | 43 +++++++++++ .../src/Text/Edifact/BundleReader/Configuration.hs | 30 ++++++++ .../src/Text/Edifact/BundleReader/Extractor.hs | 75 ++++++++++++++++++ .../src/Text/Edifact/BundleReader/Messages.hs | 61 +++++++++++++++ .../src/Text/Edifact/BundleReader/Segments.hs | 55 +++++++++++++ .../src/Text/Edifact/BundleReader/Simples.hs | 43 +++++++++++ 8 files changed, 435 insertions(+) create mode 100644 scaffolder/src/Text/Edifact/BundleReader/CodedSimples.hs create mode 100644 scaffolder/src/Text/Edifact/BundleReader/Commons.hs create mode 100644 scaffolder/src/Text/Edifact/BundleReader/Composites.hs create mode 100644 scaffolder/src/Text/Edifact/BundleReader/Configuration.hs create mode 100644 scaffolder/src/Text/Edifact/BundleReader/Extractor.hs create mode 100644 scaffolder/src/Text/Edifact/BundleReader/Messages.hs create mode 100644 scaffolder/src/Text/Edifact/BundleReader/Segments.hs create mode 100644 scaffolder/src/Text/Edifact/BundleReader/Simples.hs (limited to 'scaffolder/src/Text/Edifact/BundleReader') diff --git a/scaffolder/src/Text/Edifact/BundleReader/CodedSimples.hs b/scaffolder/src/Text/Edifact/BundleReader/CodedSimples.hs new file mode 100644 index 0000000..7dc92fd --- /dev/null +++ b/scaffolder/src/Text/Edifact/BundleReader/CodedSimples.hs @@ -0,0 +1,38 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.BundleReader.CodedSimples + ( readCodedSimples + ) where + +import Text.Edifact.BundleReader.Commons +import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..)) +import Text.Edifact.BundleReader.Simples (simplesDirectory) +import Text.Edifact.Scaffolder.Commons + +import Text.Edifact.Scaffolder.CodedSimples.Specification + +import Control.Monad (when) +import Control.Monad.Reader (asks) +import Data.ByteString as BS (ByteString, + readFile) +import Formatting + +readCodedSimples :: [ByteString] -> [SimpleCode] -> BundleReader () +readCodedSimples contents simples = do + let parsedFile path = parseFile simples =<< liftIO (BS.readFile path) + parsedString = parseFile simples + files <- asks codedSimplesFiles + mapM_ parsedFile files + mapM_ parsedString contents + +parseFile :: [SimpleCode] -> ByteString -> BundleReader [SimpleCode] +parseFile simples content = + let chunks = tail $ splitFileByDash 70 $ decodeContent content + in traverse (parseChunk simples) chunks + +parseChunk :: [SimpleCode] -> Text -> BundleReader SimpleCode +parseChunk simples chunk = do + parsed <- parseOrFail chunk specificationParser + outputFile <- getOutputFile (fSimpleCodeLower % ".txt") simplesDirectory (fst parsed) + when (fst parsed `elem` simples) $ toFile chunk outputFile + pure $ fst parsed diff --git a/scaffolder/src/Text/Edifact/BundleReader/Commons.hs b/scaffolder/src/Text/Edifact/BundleReader/Commons.hs new file mode 100644 index 0000000..0c8334a --- /dev/null +++ b/scaffolder/src/Text/Edifact/BundleReader/Commons.hs @@ -0,0 +1,90 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.BundleReader.Commons where + +import Text.Edifact.BundleReader.Configuration +import Text.Edifact.Scaffolder.Commons + +import Control.Monad.Reader (ReaderT, runReaderT) +import Data.ByteString (ByteString) +import Data.Char (toLower) +import Data.List.Split (splitWhen) +import Data.Text as T (isInfixOf, lines, + map, null, + replicate, strip, + unlines, unpack) +import Data.Text.Encoding as TE (decodeLatin1, + decodeUtf8') +import Data.Text.IO as TIO (writeFile) +import Formatting +import Text.Parsec (Parsec, runParser) + +type BundleReader = ReaderT BundleReaderEnv IO + +decodeContent :: ByteString -> Text +decodeContent content = either (const $ cleanupAsciiArt $ decodeLatin1 content) id (decodeUtf8' content) + +splitFileByDash :: Int -> Text -> [Text] +splitFileByDash n = + let separator = T.replicate n "-" + isNotEmpty = not . T.null . T.strip + in + filter isNotEmpty . fmap T.unlines . splitWhen (separator `T.isInfixOf`) . T.lines + +runBundleReader :: BundleReader () -> BundleReaderEnv -> IO () +runBundleReader = runReaderT + +getOutputFile :: Format String (a -> String) -> FilePath -> a -> BundleReader FilePath +getOutputFile f d c = do + home <- getSpecificationHome + pure (formatToString (string % "/" % string % "/" % f) home d c) + +getSpecificationHome :: BundleReader FilePath +getSpecificationHome = do + home <- getHome + rev <- getTargetRevision + pure (home formatToString fRevision rev) + +toFile :: Text -> FilePath -> BundleReader () +toFile specification outputFile = liftIO (TIO.writeFile outputFile specification) + +parseOrFail :: (Monoid u) => Text -> Parsec String u a -> BundleReader a +parseOrFail specification parser = either (error . (\a -> show specification <> show a)) pure (runParser parser mempty "" (T.unpack specification)) + +toFileWithParser :: (Monoid a, Monoid u) => Text -> FilePath -> Parsec String u a -> BundleReader a +toFileWithParser specification outputFile parser = do + liftIO (TIO.writeFile outputFile specification) + either (error . show) pure (runParser parser mempty "" (T.unpack specification)) + +lower :: Format r (String -> r) +lower = mapf (fmap toLower) string + +fRevision :: Format r (Revision -> r) +fRevision = mapf getRevision string + +fRevisionLower :: Format r (Revision -> r) +fRevisionLower = mapf getRevision lower + +fMessageCodeLower :: Format r (MessageCode -> r) +fMessageCodeLower = mapf getMessageCode lower + +fSegmentCodeLower :: Format r (SegmentCode -> r) +fSegmentCodeLower = mapf getSegmentCode lower + +fCompositeCodeLower :: Format r (CompositeCode -> r) +fCompositeCodeLower = mapf getCompositeCode lower + +fSimpleCodeLower :: Format r (SimpleCode -> r) +fSimpleCodeLower = mapf getSimpleCode lower + +-- This might not be the proper way to do it... +-- Use Data.Text.Encoding.decodeUtf8With instead? +cleanupAsciiArt :: Text -> Text +cleanupAsciiArt = + let f 'Ä' = '-' + f '¿' = '+' + f '³' = '|' + f 'Ù' = '+' + f 'Á' = '+' + f c = c + in T.map f diff --git a/scaffolder/src/Text/Edifact/BundleReader/Composites.hs b/scaffolder/src/Text/Edifact/BundleReader/Composites.hs new file mode 100644 index 0000000..2880b95 --- /dev/null +++ b/scaffolder/src/Text/Edifact/BundleReader/Composites.hs @@ -0,0 +1,43 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.BundleReader.Composites + ( readComposites + , compositesDirectory + ) where + +import Text.Edifact.BundleReader.Commons +import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..)) +import Text.Edifact.Scaffolder.Commons + +import Text.Edifact.Scaffolder.Composites.Specification (listSimples) + +import Control.Monad (when) +import Control.Monad.Reader (asks) +import Data.ByteString as BS (ByteString, + readFile) +import Formatting + +compositesDirectory :: FilePath +compositesDirectory = "composites" + +readComposites :: [ByteString] -> ([CompositeCode], [SimpleCode]) -> BundleReader [SimpleCode] +readComposites contents (composites, simples) = do + let parsedFile path = parseFile composites =<< liftIO (BS.readFile path) + parsedString = parseFile composites + files <- asks compositesFiles + parsedFiles <- traverse parsedFile files + parsedStrings <- traverse parsedString contents + let filtered = mappend simples $ concatMap snd $ filter (\s -> fst s `elem` composites) $ concat (parsedFiles <> parsedStrings) + pure filtered + +parseFile :: [CompositeCode] -> ByteString -> BundleReader [(CompositeCode, [SimpleCode])] +parseFile composites content = + let chunks = tail $ splitFileByDash 70 $ decodeContent content + in traverse (parseChunk composites) chunks + +parseChunk :: [CompositeCode] -> Text -> BundleReader (CompositeCode, [SimpleCode]) +parseChunk composites chunk = do + parsed <- parseOrFail chunk listSimples + outputFile <- getOutputFile (fCompositeCodeLower % ".txt") compositesDirectory (fst parsed) + when (fst parsed `elem` composites) $ toFile chunk outputFile + pure parsed diff --git a/scaffolder/src/Text/Edifact/BundleReader/Configuration.hs b/scaffolder/src/Text/Edifact/BundleReader/Configuration.hs new file mode 100644 index 0000000..0609c03 --- /dev/null +++ b/scaffolder/src/Text/Edifact/BundleReader/Configuration.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE FlexibleContexts #-} + +module Text.Edifact.BundleReader.Configuration + ( -- * Parsing environment + BundleReaderEnv(..) + -- * Shortcuts for reading the environment + , getTargetRevision + , getHome + ) where + +import Text.Edifact.Scaffolder.Commons (MessageCode, Revision) + +import Control.Monad.Reader.Class (MonadReader, asks) + +data BundleReaderEnv = BundleReaderEnv { parserRevision :: Revision + , specificationHome :: FilePath + , bundle :: [FilePath] + , messageNames :: [MessageCode] + , messagesFiles :: [FilePath] + , segmentsFiles :: [FilePath] + , compositesFiles :: [FilePath] + , simplesFiles :: [FilePath] + , codedSimplesFiles :: [FilePath] + } + +getTargetRevision :: MonadReader BundleReaderEnv m => m Revision +getTargetRevision = asks parserRevision + +getHome :: MonadReader BundleReaderEnv m => m FilePath +getHome = asks specificationHome diff --git a/scaffolder/src/Text/Edifact/BundleReader/Extractor.hs b/scaffolder/src/Text/Edifact/BundleReader/Extractor.hs new file mode 100644 index 0000000..f4be7e9 --- /dev/null +++ b/scaffolder/src/Text/Edifact/BundleReader/Extractor.hs @@ -0,0 +1,75 @@ +{-# LANGUAGE NamedFieldPuns #-} +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.BundleReader.Extractor + ( FileContents(..) + , readZip + ) where + +import Text.Edifact.Scaffolder.Commons (Revision (..)) + +import Codec.Archive.Zip +import Data.ByteString as B (ByteString, isInfixOf, + isPrefixOf, readFile) +import Data.ByteString.Lazy as BS (fromStrict, toStrict) +import Data.Char (toLower) +import Data.List as L (intercalate, isPrefixOf) +import Data.List.Split (splitOn) +import Data.Maybe (maybeToList) + +data FileContent = + FileContent + { fileType :: FileType + , fileContent :: ByteString + } + +data FileType = Message | Segment | Composite | Simple | CodedSimple deriving Eq + +data FileContents = + FileContents + { messages :: [ByteString] + , segments :: [ByteString] + , composites :: [ByteString] + , simples :: [ByteString] + , codedSimples :: [ByteString] + } + +readZip :: Revision -> FilePath -> IO FileContents +readZip specification f = toFileContents . parseFile (getExtension f) (getName f) specification <$> B.readFile f + +toFileContents :: [FileContent] -> FileContents +toFileContents t = FileContents + { messages = fileContent <$> filter ((==) Message . fileType) t + , segments = fileContent <$> filter ((==) Segment . fileType) t + , composites = fileContent <$> filter ((==) Composite . fileType) t + , simples = fileContent <$> filter ((==) Simple . fileType) t + , codedSimples = fileContent <$> filter ((==) CodedSimple . fileType) t + } + +getName :: FilePath -> String +getName = intercalate "." . init . splitOn "." . last . splitOn "/" + +getExtension :: FilePath -> String +getExtension = fmap toLower . last . splitOn "." + +parseFile :: String -> String -> Revision -> ByteString -> [FileContent] +parseFile "zip" _ specification content = unzipAndRead specification content +parseFile extension name specification content + | ("d" <> extension) == (toLower <$> getRevision specification) = maybeToList $ identifyFile name content +parseFile _ _ _ _ = [] + +unzipAndRead :: Revision -> ByteString -> [FileContent] +unzipAndRead specification content = let + archive = zEntries $ toArchive $ BS.fromStrict content + toContents e@Entry{eRelativePath} = parseFile (getExtension eRelativePath) (getName eRelativePath) specification (BS.toStrict $ fromEntry e) + in + concatMap toContents archive + +identifyFile :: String -> ByteString -> Maybe FileContent +identifyFile name content + | " Message Type : " `isInfixOf` content = pure $ FileContent Message content + | "2. Composite specifications" `B.isPrefixOf` content = pure $ FileContent Composite content + | "2. Segment specifications" `B.isPrefixOf` content = pure $ FileContent Segment content + | "2. Data element specifications" `B.isPrefixOf` content = pure $ FileContent Simple content + | "UNCL" `L.isPrefixOf` name = pure $ FileContent CodedSimple content +identifyFile _ _ = Nothing diff --git a/scaffolder/src/Text/Edifact/BundleReader/Messages.hs b/scaffolder/src/Text/Edifact/BundleReader/Messages.hs new file mode 100644 index 0000000..5537d28 --- /dev/null +++ b/scaffolder/src/Text/Edifact/BundleReader/Messages.hs @@ -0,0 +1,61 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} + +module Text.Edifact.BundleReader.Messages + ( readMessages + , messagesDirectory + ) where + +import Text.Edifact.BundleReader.Commons (BundleReader, + decodeContent, + fMessageCodeLower, + getOutputFile, + parseOrFail, + toFile) +import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..)) +import Text.Edifact.Scaffolder.Commons +import Text.Edifact.Scaffolder.Messages.Specification (listSegments, messageNameParser) + +import Control.Monad (when) +import Control.Monad.Reader (asks) +import Data.ByteString as BS (ByteString, + readFile) +import qualified Data.Text as T (isPrefixOf, + lines, + unlines) +import Formatting + +messagesDirectory :: FilePath +messagesDirectory = "messages" + +readMessages :: [ByteString] -> BundleReader [(MessageCode, [SegmentCode])] +readMessages contents = do + selectedMessages <- asks messageNames + let parsedFile path = parseFile selectedMessages =<< liftIO (BS.readFile path) + parsedString = parseFile selectedMessages + parsedFiles <- traverse parsedFile =<< asks messagesFiles + parsedStrings <- traverse parsedString contents + let filtered = parsedFiles <> filter (\s -> null selectedMessages || fst s `elem` selectedMessages) parsedStrings + pure filtered + +parseFile :: [MessageCode] -> ByteString -> BundleReader (MessageCode, [SegmentCode]) +parseFile selectedMessages content = do + let (definition, summary) = splitFile $ decodeContent content + messageCode <- parseOrFail definition messageNameParser + summaryOutputFile <- getOutputFile (fMessageCodeLower % "_s.txt") messagesDirectory messageCode + definitionOutputFile <- getOutputFile (fMessageCodeLower % "_d.txt") messagesDirectory messageCode + when (messageCode `elem` selectedMessages) $ toFile definition definitionOutputFile + when (messageCode `elem` selectedMessages) $ toFile summary summaryOutputFile + (messageCode,) <$> parseOrFail summary listSegments + +splitFile :: Text -> (Text, Text) +splitFile content = let + separatorBefore = "4.3 Message structure" + separatorAfter = "Annex" + textBefore = takeWhile (not . T.isPrefixOf separatorBefore) $ T.lines content + textInsideAndAfter = dropWhile (not . T.isPrefixOf separatorBefore) $ T.lines content + textAfter = dropWhile (not . T.isPrefixOf separatorAfter) textInsideAndAfter + textSummary = T.unlines $ takeWhile (not . T.isPrefixOf separatorAfter) textInsideAndAfter + textDefinition = T.unlines $ textBefore <> [separatorBefore, "", "See summary file", ""] <> textAfter + in + (textDefinition, textSummary) diff --git a/scaffolder/src/Text/Edifact/BundleReader/Segments.hs b/scaffolder/src/Text/Edifact/BundleReader/Segments.hs new file mode 100644 index 0000000..6b71266 --- /dev/null +++ b/scaffolder/src/Text/Edifact/BundleReader/Segments.hs @@ -0,0 +1,55 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.BundleReader.Segments + ( readSegments + , segmentsDirectory + ) where + +import Text.Edifact.BundleReader.Commons +import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..)) +import Text.Edifact.Scaffolder.Commons + +import Text.Edifact.Scaffolder.Segments.Specification (listCompositesAndSimples) +import Text.Edifact.Scaffolder.Segments.Types (Element, + getComposite, + getSimple) + +import Control.Monad (when) +import Control.Monad.Reader (asks) +import Data.Bifunctor (bimap) +import Data.ByteString as BS (ByteString, + readFile) +import Data.List as L (partition) +import Data.Maybe (isJust, + mapMaybe) +import Formatting + +segmentsDirectory :: FilePath +segmentsDirectory = "segments" + +readSegments :: [ByteString] -> [SegmentCode] -> BundleReader ([CompositeCode], [SimpleCode]) +readSegments contents segments = do + let parsedFile path = parseFile segments =<< liftIO (BS.readFile path) + parsedString = parseFile segments + files <- asks segmentsFiles + parsedFiles <- traverse parsedFile files + parsedStrings <- traverse parsedString contents + let filtered = concatMap snd $ filter (\s -> fst s `elem` segments) $ concat (parsedFiles <> parsedStrings) + pure $ partitionElements filtered + +parseFile :: [SegmentCode] -> ByteString -> BundleReader [(SegmentCode, [Element])] +parseFile segments content = + let chunks = tail $ splitFileByDash 70 $ decodeContent content + in traverse (parseChunk segments) chunks + +parseChunk :: [SegmentCode] -> Text -> BundleReader (SegmentCode, [Element]) +parseChunk segments chunk = do + parsed <- parseOrFail chunk listCompositesAndSimples + outputFile <- getOutputFile (fSegmentCodeLower % ".txt") segmentsDirectory (fst parsed) + when (fst parsed `elem` segments) $ toFile chunk outputFile + pure parsed + +partitionElements :: [Element] -> ([CompositeCode], [SimpleCode]) +partitionElements = + let isComposite = isJust . getComposite + in bimap (mapMaybe getComposite) (mapMaybe getSimple) . partition isComposite diff --git a/scaffolder/src/Text/Edifact/BundleReader/Simples.hs b/scaffolder/src/Text/Edifact/BundleReader/Simples.hs new file mode 100644 index 0000000..df7d662 --- /dev/null +++ b/scaffolder/src/Text/Edifact/BundleReader/Simples.hs @@ -0,0 +1,43 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.BundleReader.Simples + ( readSimples + , simplesDirectory + ) where + +import Text.Edifact.BundleReader.Commons +import Text.Edifact.BundleReader.Configuration (BundleReaderEnv (..)) +import Text.Edifact.Scaffolder.Commons + +import Text.Edifact.Scaffolder.Simples.Specification + +import Control.Monad (when) +import Control.Monad.Reader (asks) +import Data.ByteString as BS (ByteString, + readFile) +import Formatting + +simplesDirectory :: FilePath +simplesDirectory = "simples" + +readSimples :: [ByteString] -> [SimpleCode] -> BundleReader [SimpleCode] +readSimples contents simples = do + let parsedFile path = parseFile simples =<< liftIO (BS.readFile path) + parsedString = parseFile simples + files <- asks simplesFiles + parsedFiles <- traverse parsedFile files + parsedStrings <- traverse parsedString contents + let filtered = filter (`elem` simples) $ concat (parsedFiles <> parsedStrings) + pure filtered + +parseFile :: [SimpleCode] -> ByteString -> BundleReader [SimpleCode] +parseFile simples content = + let chunks = tail $ splitFileByDash 70 $ decodeContent content + in traverse (parseChunk simples) chunks + +parseChunk :: [SimpleCode] -> Text -> BundleReader SimpleCode +parseChunk simples chunk = do + parsed <- parseOrFail chunk specificationParser + outputFile <- getOutputFile (fSimpleCodeLower % ".txt") simplesDirectory (fst parsed) + when (fst parsed `elem` simples) $ toFile chunk outputFile + pure $ fst parsed -- cgit v1.2.3