From a9d77a20008efe82862cc1adbfa7a6d4f09f8ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Menou?= Date: Thu, 8 Dec 2016 10:19:15 +0200 Subject: Release code as open source --- core/src/Text/Edifact/Common.hs | 39 ++++ core/src/Text/Edifact/Common/Composites.hs | 135 +++++++++++++ core/src/Text/Edifact/Common/Segments.hs | 10 + core/src/Text/Edifact/Common/Segments/UNA.hs | 34 ++++ core/src/Text/Edifact/Common/Segments/UNB.hs | 63 +++++++ core/src/Text/Edifact/Common/Segments/UNH.hs | 44 +++++ core/src/Text/Edifact/Common/Segments/UNS.hs | 27 +++ core/src/Text/Edifact/Common/Segments/UNT.hs | 30 +++ core/src/Text/Edifact/Common/Segments/UNZ.hs | 24 +++ core/src/Text/Edifact/Common/Simples.hs | 271 +++++++++++++++++++++++++++ core/src/Text/Edifact/Inspect.hs | 108 +++++++++++ core/src/Text/Edifact/Parsing.hs | 72 +++++++ core/src/Text/Edifact/Parsing/Combinators.hs | 230 +++++++++++++++++++++++ core/src/Text/Edifact/Parsing/Commons.hs | 173 +++++++++++++++++ core/src/Text/Edifact/Parsing/Primitives.hs | 127 +++++++++++++ core/src/Text/Edifact/Types.hs | 124 ++++++++++++ 16 files changed, 1511 insertions(+) create mode 100644 core/src/Text/Edifact/Common.hs create mode 100644 core/src/Text/Edifact/Common/Composites.hs create mode 100644 core/src/Text/Edifact/Common/Segments.hs create mode 100644 core/src/Text/Edifact/Common/Segments/UNA.hs create mode 100644 core/src/Text/Edifact/Common/Segments/UNB.hs create mode 100644 core/src/Text/Edifact/Common/Segments/UNH.hs create mode 100644 core/src/Text/Edifact/Common/Segments/UNS.hs create mode 100644 core/src/Text/Edifact/Common/Segments/UNT.hs create mode 100644 core/src/Text/Edifact/Common/Segments/UNZ.hs create mode 100644 core/src/Text/Edifact/Common/Simples.hs create mode 100644 core/src/Text/Edifact/Inspect.hs create mode 100644 core/src/Text/Edifact/Parsing.hs create mode 100644 core/src/Text/Edifact/Parsing/Combinators.hs create mode 100644 core/src/Text/Edifact/Parsing/Commons.hs create mode 100644 core/src/Text/Edifact/Parsing/Primitives.hs create mode 100644 core/src/Text/Edifact/Types.hs (limited to 'core/src/Text/Edifact') diff --git a/core/src/Text/Edifact/Common.hs b/core/src/Text/Edifact/Common.hs new file mode 100644 index 0000000..c938d48 --- /dev/null +++ b/core/src/Text/Edifact/Common.hs @@ -0,0 +1,39 @@ +{-| +Module : Text.Edifact.Common +Description : Common syntax + +This module should handle the main revision of the Edifact specification, which +covers UN* segments. + +Currently it barely covers the revision 3 and this has not been scaffolded. + +One future evolution will be to scaffold the various revisions from a more +general specification. + -} +module Text.Edifact.Common + ( + -- * Routine + parseFull + + -- * Reexports + , ParseError + , Parser + , Text + ) where + +import Text.Edifact.Parsing +import Text.Edifact.Parsing.Commons + +import Text.Edifact.Common.Segments + +import Data.Text (Text) + +parseFull :: Parser value -> Text -> Either ParseError value +parseFull = parse . fullSyntaxParser + +fullSyntaxParser :: Parser a -> Parser a +fullSyntaxParser messageParser = + segmentUNA >> + tries [ segmentUNB *> messageParser <* segmentUNZ + , messageParser + ] diff --git a/core/src/Text/Edifact/Common/Composites.hs b/core/src/Text/Edifact/Common/Composites.hs new file mode 100644 index 0000000..39f61e7 --- /dev/null +++ b/core/src/Text/Edifact/Common/Composites.hs @@ -0,0 +1,135 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.Common.Composites + ( compositeS001 + , compositeS002 + , compositeS003 + , compositeS004 + , compositeS005 + , compositeS009 + , compositeS010 + ) where + +import Text.Edifact.Common.Simples (simple0001, simple0002, + simple0004, simple0007, + simple0008, simple0010, + simple0014, simple0017, + simple0019, simple0022, + simple0025, simple0042, + simple0046, simple0051, + simple0052, simple0054, + simple0057, simple0065, + simple0070, simple0073, + simple0080, simple0133) + +import Text.Edifact.Parsing +import Text.Edifact.Types (Value) + +-- | Derived from this specification: +-- +-- > 010 0001 Syntax identifier M a4 +-- > 020 0002 Syntax version number M an1 +-- > 030 0080 Service code list directory version number C an..6 +-- > 040 0133 Character encoding, coded C an..3 +-- +-- Dependencies: 'simple0001', 'simple0002', 'simple0080', 'simple0133'. +compositeS001 :: Parser Value +compositeS001 = + composite "S001" + [ "010" .@ mandatory simple0001 + , "020" .@ mandatory simple0002 + , "030" .@ optional simple0080 + , "040" .@ optional simple0133 + ] + +-- | Derived from this specification: +-- +-- > 010 0004 Interchange sender identification M an..35 +-- > 020 0007 Identification code qualifier C an..4 +-- > 030 0008 Interchange sender internal identification C an..35 +-- > 040 0042 Interchange sender internal sub-identification C an..35 +-- +-- Dependencies: 'simple0004', 'simple0007', 'simple0008', 'simple0042'. +compositeS002 :: Parser Value +compositeS002 = + composite "S002" + [ "010" .@ mandatory simple0004 + , "020" .@ optional simple0007 + , "030" .@ optional simple0008 + , "040" .@ optional simple0042 + ] + +-- | Derived from this specification: +-- +-- > 010 0010 Interchange recipient identification M an..35 +-- > 020 0007 Identification code qualifier C an..4 +-- > 030 0014 Interchange recipient internal identification C an..35 +-- > 040 0046 Interchange recipient internal sub-identification C an..35 +-- +-- Dependencies: 'simple0007', 'simple0010', 'simple0014', 'simple0046'. +compositeS003 :: Parser Value +compositeS003 = + composite "S003" + [ "010" .@ mandatory simple0010 + , "020" .@ optional simple0007 + , "030" .@ optional simple0014 + , "040" .@ optional simple0046 + ] + +-- | Derived from this specification: +-- +-- > 010 0017 Date M n6 +-- > 020 0019 Time M n4 +-- +-- Dependencies: 'simple0017', 'simple0019'. +compositeS004 :: Parser Value +compositeS004 = + composite "S004" + [ "010" .@ mandatory simple0017 + , "020" .@ mandatory simple0019 + ] + +-- | Derived from this specification: +-- +-- > 010 0022 Recipient reference/password M an..14 +-- > 020 0025 Recipient reference/password qualifier C an2 +-- +-- Dependencies: 'simple0022', 'simple0025'. +compositeS005 :: Parser Value +compositeS005 = + composite "S005" + [ "010" .@ mandatory simple0022 + , "020" .@ optional simple0025 + ] + +-- | Derived from this specification: +-- +-- > 010 0065 Message type M an..6 +-- > 020 0052 Message version number M an..3 +-- > 030 0054 Message release number M an..3 +-- > 040 0051 Controlling agency M an..2 +-- > 050 0057 Association assigned code C an..6 +-- +-- Dependencies: 'simple0051', 'simple0052', 'simple0054', 'simple0057', 'simple0065'. +compositeS009 :: Parser Value +compositeS009 = + composite "S009" + [ "010" .@ mandatory simple0065 + , "020" .@ mandatory simple0052 + , "030" .@ mandatory simple0054 + , "040" .@ mandatory simple0051 + , "050" .@ optional simple0057 + ] + +-- | Derived from this specification: +-- +-- > 010 0070 Sequence of transfers M n..2 +-- > 020 0073 First and last transfer C a1 +-- +-- Dependencies: 'simple0070', 'simple0073'. +compositeS010 :: Parser Value +compositeS010 = + composite "S010" + [ "010" .@ mandatory simple0070 + , "020" .@ optional simple0073 + ] diff --git a/core/src/Text/Edifact/Common/Segments.hs b/core/src/Text/Edifact/Common/Segments.hs new file mode 100644 index 0000000..e73719f --- /dev/null +++ b/core/src/Text/Edifact/Common/Segments.hs @@ -0,0 +1,10 @@ +module Text.Edifact.Common.Segments + ( module S + ) where + +import Text.Edifact.Common.Segments.UNA as S +import Text.Edifact.Common.Segments.UNB as S +import Text.Edifact.Common.Segments.UNH as S +import Text.Edifact.Common.Segments.UNS as S +import Text.Edifact.Common.Segments.UNT as S +import Text.Edifact.Common.Segments.UNZ as S diff --git a/core/src/Text/Edifact/Common/Segments/UNA.hs b/core/src/Text/Edifact/Common/Segments/UNA.hs new file mode 100644 index 0000000..1b20a9f --- /dev/null +++ b/core/src/Text/Edifact/Common/Segments/UNA.hs @@ -0,0 +1,34 @@ +module Text.Edifact.Common.Segments.UNA + ( segmentUNA + ) where + +import Text.Edifact.Parsing +import Text.Edifact.Parsing.Commons (updateSyntax) +import Text.Edifact.Types (Syntax (..), defaultSyntax) + +import Control.Monad (void) +import Text.Parsec (anyChar, char, endOfLine, + optionMaybe, string, try) +import qualified Text.Parsec as P (optional) + +segmentUNA :: Parser () +segmentUNA = + let segmentParser = string "UNA" *> parseSyntax <* P.optional endOfLine + nothing = pure () + in optionMaybe (try segmentParser) >>= maybe nothing updateSyntax + +parseSyntax :: Parser Syntax +parseSyntax = do + compositeSeparator' <- anyChar + elementSeparator' <- anyChar + decimalSign' <- anyChar + escape' <- anyChar + void $ char ' ' -- reserved, not used + segmentSeparator' <- anyChar + pure defaultSyntax + { compositeSeparator = compositeSeparator' + , elementSeparator = elementSeparator' + , decimalSign = decimalSign' + , escape = escape' + , segmentSeparator = segmentSeparator' + } diff --git a/core/src/Text/Edifact/Common/Segments/UNB.hs b/core/src/Text/Edifact/Common/Segments/UNB.hs new file mode 100644 index 0000000..55f1eea --- /dev/null +++ b/core/src/Text/Edifact/Common/Segments/UNB.hs @@ -0,0 +1,63 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.Common.Segments.UNB + ( segmentUNB + ) where + +import Text.Edifact.Common.Composites (compositeS001, compositeS002, + compositeS003, compositeS004, + compositeS005) +import Text.Edifact.Common.Simples (simple0020, simple0026, + simple0029, simple0031, + simple0032, simple0035) + +import Text.Edifact.Parsing +import Text.Edifact.Types (Value) + +-- | Derived from this specification: +-- +-- > Pos Segment M/C Repeat Repr. Notes +-- > 010 S001 SYNTAX IDENTIFIER M 1 +-- > 0001 Syntax identifier M a4 +-- > 0002 Syntax version number M an1 +-- > 0080 Service code list directory version number C an..6 +-- > 0133 Character encoding, coded C an..3 +-- > 020 S002 INTERCHANGE SENDER M 1 +-- > 0004 Interchange sender identification M an..35 +-- > 0007 Identification code qualifier C an..4 +-- > 0008 Interchange sender internal identification C an..35 +-- > 0042 Interchange sender internal sub-identification C an..35 +-- > 030 S003 INTERCHANGE RECIPIENT M 1 +-- > 0010 Interchange recipient identification M an..35 +-- > 0007 Identification code qualifier C an..4 +-- > 0014 Interchange recipient internal identification C an..35 +-- > 0046 Interchange recipient internal sub-identification C an..35 +-- > 040 S004 DATE AND TIME OF PREPARATION M 1 +-- > 0017 Date M n8 +-- > 0019 Time M n4 +-- > 050 0020 Interchange control reference M 1 an..14 +-- > 060 S005 RECIPIENT'S REFERENCE/PASSWORD DETAILS C 1 +-- > 0022 Recipient reference/password M an..14 +-- > 0025 Recipient reference/password qualifier C an2 +-- > 070 0026 Application reference C 1 an..14 +-- > 080 0029 Processing priority code C 1 a1 +-- > 090 0031 Acknowledgement request C 1 n1 +-- > 100 0032 Interchange agreement identifier C 1 an..35 +-- > 110 0035 Test indicator C 1 n1 +-- +-- Dependencies: 'compositeS001', 'compositeS002', 'compositeS003', 'compositeS004', 'compositeS005', 'simple0020', 'simple0026', 'simple0029', 'simple0031', 'simple0032', 'simple0035'. +segmentUNB :: Parser Value +segmentUNB = + segment "UNB" + [ "010" .@ mandatory compositeS001 + , "020" .@ mandatory compositeS002 + , "030" .@ mandatory compositeS003 + , "040" .@ mandatory compositeS004 + , "050" .@ mandatory simple0020 + , "060" .@ optional compositeS005 + , "070" .@ optional simple0026 + , "080" .@ optional simple0029 + , "090" .@ optional simple0031 + , "100" .@ optional simple0032 + , "110" .@ optional simple0035 + ] diff --git a/core/src/Text/Edifact/Common/Segments/UNH.hs b/core/src/Text/Edifact/Common/Segments/UNH.hs new file mode 100644 index 0000000..61cb6b3 --- /dev/null +++ b/core/src/Text/Edifact/Common/Segments/UNH.hs @@ -0,0 +1,44 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.Common.Segments.UNH + ( segmentUNH + ) where + +import Text.Edifact.Common.Composites (compositeS009, compositeS010) +import Text.Edifact.Common.Simples (simple0062, simple0068) + +import Text.Edifact.Parsing +import Text.Edifact.Types (Value) + +-- | Derived from this specification: +-- +-- > Change indicators +-- > +-- > UNH MESSAGE HEADER +-- > +-- > Function: To head, identify and specify a message. +-- > +-- > 010 0062 MESSAGE REFERENCE NUMBER M an..14 +-- > +-- > 020 S009 MESSAGE IDENTIFIER M +-- > 0065 Message type M an..6 +-- > 0052 Message version number M an..3 +-- > 0054 Message release number M an..3 +-- > 0051 Controlling agency M an..2 +-- > 0057 Association assigned code C an..6 +-- > +-- > 030 0068 COMMON ACCESS REFERENCE C an..35 +-- > +-- > 040 S010 STATUS OF THE TRANSFER C +-- > 0070 Sequence of transfers M n..2 +-- > 0073 First and last transfer C a1 +-- +-- Dependencies: 'compositeS009', 'compositeS010', 'simple0062', 'simple0068'. +segmentUNH :: Parser Value +segmentUNH = + segment "UNH" + [ "010" .@ mandatory simple0062 + , "020" .@ mandatory compositeS009 + , "030" .@ optional simple0068 + , "040" .@ optional compositeS010 + ] diff --git a/core/src/Text/Edifact/Common/Segments/UNS.hs b/core/src/Text/Edifact/Common/Segments/UNS.hs new file mode 100644 index 0000000..3d9b395 --- /dev/null +++ b/core/src/Text/Edifact/Common/Segments/UNS.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.Common.Segments.UNS + ( segmentUNS + ) where + +import Text.Edifact.Common.Simples (simple0081) + +import Text.Edifact.Parsing +import Text.Edifact.Types (Value) + +-- | Derived from this specification: +-- +-- > Change indicators +-- > +-- > UNS SECTION CONTROL +-- > +-- > Function: To separate Header, Detail and Summary sections of a message +-- > +-- > 010 0081 SECTION IDENTIFICATION M a1 +-- +-- Dependencies: 'simple0081'. +segmentUNS :: Parser Value +segmentUNS = + segment "UNS" + [ "010" .@ mandatory simple0081 + ] diff --git a/core/src/Text/Edifact/Common/Segments/UNT.hs b/core/src/Text/Edifact/Common/Segments/UNT.hs new file mode 100644 index 0000000..e91f9cf --- /dev/null +++ b/core/src/Text/Edifact/Common/Segments/UNT.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.Common.Segments.UNT + ( segmentUNT + ) where + +import Text.Edifact.Common.Simples (simple0062, simple0074) + +import Text.Edifact.Parsing +import Text.Edifact.Types (Value) + +-- | Derived from this specification: +-- +-- > Change indicators +-- > +-- > UNT MESSAGE TRAILER +-- > +-- > Function: To end and check the completeness of a message. +-- > +-- > 010 0074 NUMBER OF SEGMENTS IN THE MESSAGE M n..6 +-- > +-- > 020 0062 MESSAGE REFERENCE NUMBER M an..14 +-- +-- Dependencies: 'simple0062', 'simple0074'. +segmentUNT :: Parser Value +segmentUNT = + segment "UNT" + [ "010" .@ mandatory simple0074 + , "020" .@ mandatory simple0062 + ] diff --git a/core/src/Text/Edifact/Common/Segments/UNZ.hs b/core/src/Text/Edifact/Common/Segments/UNZ.hs new file mode 100644 index 0000000..0566860 --- /dev/null +++ b/core/src/Text/Edifact/Common/Segments/UNZ.hs @@ -0,0 +1,24 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.Common.Segments.UNZ + ( segmentUNZ + ) where + +import Text.Edifact.Common.Simples (simple0020, simple0036) + +import Text.Edifact.Parsing +import Text.Edifact.Types (Value) + +-- | Derived from this specification: +-- +-- > Pos Segment M/C Repeat Repr. Notes +-- > 010 0036 Interchange control count M 1 n..6 +-- > 020 0020 Interchange control reference M 1 an..14 +-- +-- Dependencies: 'simple0020', 'simple0036'. +segmentUNZ :: Parser Value +segmentUNZ = + segment "UNZ" + [ "010" .@ mandatory simple0036 + , "030" .@ mandatory simple0020 + ] diff --git a/core/src/Text/Edifact/Common/Simples.hs b/core/src/Text/Edifact/Common/Simples.hs new file mode 100644 index 0000000..537b128 --- /dev/null +++ b/core/src/Text/Edifact/Common/Simples.hs @@ -0,0 +1,271 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.Common.Simples + ( simple0001 + , simple0002 + , simple0004 + , simple0007 + , simple0008 + , simple0010 + , simple0014 + , simple0017 + , simple0019 + , simple0020 + , simple0022 + , simple0025 + , simple0026 + , simple0029 + , simple0031 + , simple0032 + , simple0035 + , simple0036 + , simple0042 + , simple0046 + , simple0051 + , simple0052 + , simple0054 + , simple0057 + , simple0062 + , simple0065 + , simple0068 + , simple0070 + , simple0073 + , simple0074 + , simple0080 + , simple0081 + , simple0133 + ) where + +import Text.Edifact.Parsing +import Text.Edifact.Types (Value) + +-- | Derived from this specification: +-- +-- > 0001 Syntax identifier +-- > Repr: a4 +simple0001 :: Parser Value +simple0001 = simple "0001" (alpha `exactly` 4) + +-- | Derived from this specification: +-- +-- > 0002 Syntax version number +-- > Repr: an1 +simple0002 :: Parser Value +simple0002 = simple "0002" (alphaNumeric `exactly` 1) + +-- | Derived from this specification: +-- +-- > 0004 Interchange sender identification +-- > Repr: an..35 +simple0004 :: Parser Value +simple0004 = simple "0004" (alphaNumeric `upTo` 35) + +-- | Derived from this specification: +-- +-- > 0007 Identification code qualifier +-- > Repr: an..4 +simple0007 :: Parser Value +simple0007 = simple "0007" (alphaNumeric `upTo` 4) + +-- | Derived from this specification: +-- +-- > 0008 Interchange sender internal identification +-- > Repr: an..35 +simple0008 :: Parser Value +simple0008 = simple "0008" (alphaNumeric `upTo` 35) + +-- | Derived from this specification: +-- +-- > 0010 Interchange recipient identification +-- > Repr: an..35 +simple0010 :: Parser Value +simple0010 = simple "0010" (alphaNumeric `upTo` 35) + +-- | Derived from this specification: +-- +-- > 0014 Interchange recipient internal identification +-- > Repr: an..35 +simple0014 :: Parser Value +simple0014 = simple "0014" (alphaNumeric `upTo` 35) + +-- | Derived from this specification: +-- +-- > 0017 Date +-- > Repr: n6 +simple0017 :: Parser Value +simple0017 = simple "0017" (numeric `exactly` 6) + +-- | Derived from this specification: +-- +-- > 0019 Time +-- > Repr: n4 +simple0019 :: Parser Value +simple0019 = simple "0019" (numeric `exactly` 4) + +-- | Derived from this specification: +-- +-- > 0020 Interchange control reference +-- > Repr: an..14 +simple0020 :: Parser Value +simple0020 = simple "0020" (alphaNumeric `upTo` 14) + +-- | Derived from this specification: +-- +-- > 0022 Recipient reference/password +-- > Repr: an..14 +simple0022 :: Parser Value +simple0022 = simple "0022" (alphaNumeric `upTo` 14) + +-- | Derived from this specification: +-- +-- > 0025 Recipient reference/password qualifier +-- > Repr: an2 +simple0025 :: Parser Value +simple0025 = simple "0025" (alphaNumeric `exactly` 2) + +-- | Derived from this specification: +-- +-- > 0026 Application reference +-- > Repr: an..14 +simple0026 :: Parser Value +simple0026 = simple "0026" (alphaNumeric `upTo` 14) + +-- | Derived from this specification: +-- +-- > 0029 Processing priority code +-- > Repr: a1 +simple0029 :: Parser Value +simple0029 = simple "0029" (alphaNumeric `exactly` 1) + +-- | Derived from this specification: +-- +-- > 0031 Acknowledgement request +-- > Repr: n1 +simple0031 :: Parser Value +simple0031 = simple "0031" (numeric `exactly` 1) + +-- | Derived from this specification: +-- +-- > 0032 Interchange agreement identifier +-- > Repr: an..35 +simple0032 :: Parser Value +simple0032 = simple "0032" (alphaNumeric `upTo` 35) + +-- | Derived from this specification: +-- +-- > 0035 Test indicator +-- > Repr: n1 +simple0035 :: Parser Value +simple0035 = simple "0035" (numeric `exactly` 1) + +-- | Derived from this specification: +-- +-- > 0036 Interchange control count +-- > Repr: n..6 +simple0036 :: Parser Value +simple0036 = simple "0036" (numeric `upTo` 6) + +-- | Derived from this specification: +-- +-- > 0042 Interchange sender internal sub-identification +-- > Repr: an..35 +simple0042 :: Parser Value +simple0042 = simple "0042" (alphaNumeric `upTo` 35) + +-- | Derived from this specification: +-- +-- > 0046 Interchange recipient internal sub-identification +-- > Repr: an..35 +simple0046 :: Parser Value +simple0046 = simple "0046" (alphaNumeric `upTo` 35) + +-- | Derived from this specification: +-- +-- > 0051 Controlling agency +-- > Repr: an..2 +simple0051 :: Parser Value +simple0051 = simple "0051" (alphaNumeric `upTo` 2) + +-- | Derived from this specification: +-- +-- > 0052 Message version number +-- > Repr: an..3 +simple0052 :: Parser Value +simple0052 = simple "0052" (alphaNumeric `upTo` 3) + +-- | Derived from this specification: +-- +-- > 0054 Message release number +-- > Repr: an..3 +simple0054 :: Parser Value +simple0054 = simple "0054" (alphaNumeric `upTo` 3) + +-- | Derived from this specification: +-- +-- > 0057 Association assigned code +-- > Repr: an..6 +simple0057 :: Parser Value +simple0057 = simple "0057" (alphaNumeric `upTo` 6) + +-- | Derived from this specification: +-- +-- > 0062 MESSAGE REFERENCE NUMBER +-- > Repr: an..14 +simple0062 :: Parser Value +simple0062 = simple "0062" (alphaNumeric `upTo` 14) + +-- | Derived from this specification: +-- +-- > 0065 Message type +-- > Repr: an..6 +simple0065 :: Parser Value +simple0065 = simple "0065" (alphaNumeric `upTo` 6) + +-- | Derived from this specification: +-- +-- > 0068 COMMON ACCESS REFERENCE +-- > Repr: an..35 +simple0068 :: Parser Value +simple0068 = simple "0068" (alphaNumeric `upTo` 35) + +-- | Derived from this specification: +-- +-- > 0070 Sequence of transfers +-- > Repr: n..2 +simple0070 :: Parser Value +simple0070 = simple "0070" (numeric `upTo` 2) + +-- | Derived from this specification: +-- +-- > 0073 First and last transfer +-- > Repr: a1 +simple0073 :: Parser Value +simple0073 = simple "0073" (alpha `exactly` 1) + +-- | Derived from this specification: +-- +-- > 0074 NUMBER OF SEGMENTS IN THE MESSAGE +-- > Repr: n..6 +simple0074 :: Parser Value +simple0074 = simple "0074" (numeric `upTo` 6) + +-- | Derived from this specification: +-- +-- > 0080 Service code list directory version number +-- > Repr: an..6 +simple0080 :: Parser Value +simple0080 = simple "0080" (alphaNumeric `upTo` 6) + +-- | Derived from this specification: +-- +-- > 0081 SECTION IDENTIFICATION +-- > Repr: a1 +simple0081 :: Parser Value +simple0081 = simple "0081" (alpha `exactly` 1) + +-- | Derived from this specification: +-- +-- > 0133 Character encoding, coded +-- > Repr: an..3 +simple0133 :: Parser Value +simple0133 = simple "0133" (alphaNumeric `upTo` 3) diff --git a/core/src/Text/Edifact/Inspect.hs b/core/src/Text/Edifact/Inspect.hs new file mode 100644 index 0000000..fb3755a --- /dev/null +++ b/core/src/Text/Edifact/Inspect.hs @@ -0,0 +1,108 @@ +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} + +module Text.Edifact.Inspect + ( inspect + ) where + +import Text.Edifact.Types + +import Control.Monad.Reader (Reader, ask, local, runReader) +import Data.Maybe (catMaybes) +import Data.String (IsString) +import Data.Text (Text) +import Formatting + +type Indent = Int + +type Rendering = Reader Indent + +indent :: Rendering a -> Rendering a +indent = local (+1) + +getIndentation :: Rendering Int +getIndentation = ask + +inspect :: Value -> Text +inspect = renderInspection . valueRenderer + +renderInspection :: Rendering a -> a +renderInspection r = runReader r 0 + +valueRenderer :: Value -> Rendering Text +valueRenderer (Simple _ primitive) = primitiveRenderer primitive +valueRenderer (Composite _ values) = sformat inBrackets . commaSeparated . catMaybes <$> traverse positionRenderer values +valueRenderer (Segment code values) = indentedPrefix fSegmentCode code " " . spaceSeparated . catMaybes =<< traverse positionRenderer values +valueRenderer (Group code values) = indentedPrefix fGroupCode code "\n" . lineSeparated =<< indent (traverse silentPositionRenderer values) +valueRenderer (Message code values) = indentedPrefix fMessageCode code "\n" . lineSeparated =<< indent (traverse silentPositionRenderer values) + +inBrackets :: Format r (Text -> r) +inBrackets = "[" % stext % "]" + +indentedPrefix :: Format (String -> Text -> Text) (code -> String -> Text -> Text) -> code -> String -> Text -> Rendering Text +indentedPrefix codeFormatter code sep t = do + i <- getIndentation + let prefix = replicate (i * 2) ' ' + pure (sformat (string % codeFormatter % string % stext) prefix code sep t) + +fMessageCode :: Format r (MessageCode -> r) +fMessageCode = mapf getMessageCode ("message " % string) + +fGroupCode :: Format r (GroupCode -> r) +fGroupCode = mapf getGroupCode ("Segment Group - " % string) + +fSegmentCode :: Format r (SegmentCode -> r) +fSegmentCode = mapf getSegmentCode string + +positionRenderer :: (Position, Maybe Value) -> Rendering (Maybe Text) +positionRenderer (pos, value) = fmap (flip (sformat (stext % fPosition)) pos) <$> traverse valueRenderer value + +silentPositionRenderer :: (Position, [Value]) -> Rendering Text +silentPositionRenderer (_, value) = lineSeparated <$> traverse valueRenderer value + +fPosition :: Format r (Position -> r) +fPosition = mapf getPosition ("@" % string) + +primitiveRenderer :: Primitive -> Rendering Text +primitiveRenderer (String t) = pure (sformat ("\"" % stext % "\"") t) +primitiveRenderer (Number s) = pure (sformat shown s) + +newtype CommaSeparated = CommaSeparated { getCommaSeparated :: Text } deriving newtype (IsString, Eq) + +instance Semigroup CommaSeparated where + t1 <> "" = t1 + "" <> t2 = t2 + t1 <> t2 = CommaSeparated (getCommaSeparated t1 <> "," <> getCommaSeparated t2) + +instance Monoid CommaSeparated where + mempty = "" + +commaSeparated :: Foldable f => f Text -> Text +commaSeparated = getCommaSeparated . foldMap CommaSeparated + +newtype SpaceSeparated = SpaceSeparated { getSpaceSeparated :: Text } deriving newtype (IsString, Eq) + +instance Semigroup SpaceSeparated where + t1 <> "" = t1 + "" <> t2 = t2 + t1 <> t2 = SpaceSeparated (getSpaceSeparated t1 <> " " <> getSpaceSeparated t2) + +instance Monoid SpaceSeparated where + mempty = "" + +spaceSeparated :: Foldable f => f Text -> Text +spaceSeparated = getSpaceSeparated . foldMap SpaceSeparated + +newtype LineSeparated = LineSeparated { getLineSeparated :: Text } deriving newtype (IsString, Eq) + +instance Semigroup LineSeparated where + t1 <> "" = t1 + "" <> t2 = t2 + t1 <> t2 = LineSeparated (getLineSeparated t1 <> "\n" <> getLineSeparated t2) + +instance Monoid LineSeparated where + mempty = "" + +lineSeparated :: Foldable f => f Text -> Text +lineSeparated = getLineSeparated . foldMap LineSeparated diff --git a/core/src/Text/Edifact/Parsing.hs b/core/src/Text/Edifact/Parsing.hs new file mode 100644 index 0000000..0b1ece8 --- /dev/null +++ b/core/src/Text/Edifact/Parsing.hs @@ -0,0 +1,72 @@ +{-| +Module : Text.Edifact.Parsing +Description : Parsing routines and combinators + +This module is there to reexport most of the combinators and helpers required +to parse an Edifact payload. + +For high level combinators, have a look at "Text.Edifact.Parsing.Combinators". + +For low level combinators, have a look at "Text.Edifact.Parsing.Primitives". + -} +module Text.Edifact.Parsing + ( + -- * Parsing routines + parse + + -- * Combinators + -- | See "Text.Edifact.Parsing.Combinators" for more details + + -- ** Values parsers + , message + , segment + , segmentGroup + , composite + , simple + + -- ** Position and strictness + , position + , (.@) + , (@.) + , mandatory + , optional + + -- ** Repetition of segments and segment groups + , repeated + , repeatedAtLeastOnce + , once + , maybeOnce + + -- * Primitives + -- | See "Text.Edifact.Parsing.Primitives" for more details and known limitations. + + -- ** Simple elements definition + , alphaNumeric + , alpha + , numeric + -- ** Cardinality + , exactly + , upTo + , many + + -- * Types + , Parser + -- ** Reexported + , ParseError + ) where + +import Text.Edifact.Parsing.Combinators (composite, mandatory, + maybeOnce, message, once, + optional, position, repeated, + repeatedAtLeastOnce, segment, + segmentGroup, simple, (.@), + (@.)) +import Text.Edifact.Parsing.Commons (Parser, defaultContext) +import Text.Edifact.Parsing.Primitives (alpha, alphaNumeric, exactly, + many, numeric, upTo) + +import Data.Text (Text) +import Text.Parsec (ParseError, runParser) + +parse :: Parser value -> Text -> Either ParseError value +parse p = runParser p defaultContext "" diff --git a/core/src/Text/Edifact/Parsing/Combinators.hs b/core/src/Text/Edifact/Parsing/Combinators.hs new file mode 100644 index 0000000..ce3f4be --- /dev/null +++ b/core/src/Text/Edifact/Parsing/Combinators.hs @@ -0,0 +1,230 @@ +{-# LANGUAGE TupleSections #-} + +{-| +Module : Text.Edifact.Parsing.Combinators +Description : High level combinators + -} +module Text.Edifact.Parsing.Combinators + ( -- * Combinators + -- ** Values parsers + message + , segmentGroup + , segment + , composite + , simple + + -- ** Position and strictness + , position + , (.@) + , (@.) + , mandatory + , optional + + -- ** Repetition of segments and segment groups + , repeated + , repeatedAtLeastOnce + , once + , maybeOnce + ) where + +import Text.Edifact.Parsing.Commons +import Text.Edifact.Types + +import Text.Parsec (lookAhead, many1, optionMaybe, + string, try) +import qualified Text.Parsec as P (many) + +-- | Parses a 'Message'. +-- +-- > messageABCDEF :: Parser Value +-- > messageABCDEF = +-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) +-- > c101 = composite "C101" [ position "010" (mandatory simple1234) +-- > , position "020" (optional simple1234) +-- > ] +-- > segmentABC = segment "ABC" [ position "010" (mandatory c101) +-- > ] +-- > in message "ABCDEF" [ position "0010" (mandatory segmentABC) +-- > ] +message :: MessageCode -> [Parser (Position, [Value])] -> Parser Value +message code ps = + let description = "message " <> show code + in Message code <$> sequence ps description + +-- | Parses a 'Group'. +-- +-- A Segment Group is the way Edifact format represents hierarchy. One can view +-- a segment group as a sub message. A segment group can be repeated like +-- segments. A segment group wraps segments and segment groups. +segmentGroup :: GroupCode -> [Parser (Position, [Value])] -> Parser Value +segmentGroup code ps = + let description = "segment-group " <> show code + in Group code <$> sequence ps description + +-- | Parses a 'Segment'. +-- +-- Following parser: +-- +-- > segmentABC :: Parser Value +-- > segmentABC = +-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) +-- > simple2001 = simple "2001" (alphaNumeric `exactly` 3) +-- > c101 = composite "C101" [ position "010" (mandatory simple1234) +-- > , position "020" (optional simple1234) +-- > , position "030" (optional simple1234) +-- > ] +-- > in segment "ABC" [ position "010" (mandatory simple2001) +-- > , position "020" (optional c101) +-- > ] +-- +-- would parse strings such as: +-- +-- >>> parse segmentABC "ABC+123'" +-- Segment "ABC" [ ("010", Just (Simple "2001" "123")) +-- ] +-- >>> parse segmentABC "ABC+123+abcdefgh'" +-- Segment "ABC" [ ("010", Just (Simple "2001" "123")) +-- , ("020", Just (Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) +-- ] +-- )) +-- ] +-- >>> parse segmentABC "ABC+123+abcdefgh:ijklmno'" +-- Segment "ABC" [ ("010", Just (Simple "2001" "123")) +-- , ("020", Just (Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) +-- , ("020", Just (Simple "1234" "ijklmno")) +-- ] +-- )) +-- ] +segment :: SegmentCode -> [Parser (Position, Maybe Value)] -> Parser Value +segment code parsers = + let go [] = [] <$ parseSegmentSeparator + go (p:ps) = + tries [ [] <$ parseSegmentSeparator + , (:) <$> (parseElementSeparator *> p) + <*> go ps + ] + description = "segment " <> show code + in Segment <$> parseSegmentCode code + <*> go parsers + description + +parseSegmentCode :: SegmentCode -> Parser SegmentCode +parseSegmentCode (SegmentCode code) = + let description = "segment code " <> show code + in SegmentCode <$> string code description + +-- | Parses a 'Composite' element. +-- +-- Following parser: +-- +-- > compositeC101 :: Parser Value +-- > compositeC101 = +-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) +-- > in composite "C101" [ position "010" (mandatory simple1234) +-- > , position "020" (optional simple1234) +-- > , position "030" (optional simple1234) +-- > ] +-- +-- would parse strings such as: +-- +-- >>> parse compositeC101 "abcdefgh" +-- Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) +-- ] +-- >>> parse compositeC101 "abcdefgh:ijklmno" +-- Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) +-- , ("020", Just (Simple "1234" "ijklmno")) +-- ] +-- >>> parse compositeC101 "abcdefgh::pqrstu" +-- Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh")) +-- , ("020", Just (Simple "1234" "")) +-- , ("030", Just (Simple "1234" "pqrstu")) +-- ] +composite :: CompositeCode -> [Parser (Position, Maybe Value)] -> Parser Value +composite code parsers = + let go [] = pure [] + go (p:ps) = do + let parseSeparator = tries [ parseCompositeSeparator + , lookAhead parseElementSeparator + , lookAhead parseSegmentSeparator + ] + (value, continuation) <- tries [ (, ps) <$> p <* parseSeparator + , (, []) <$> p + ] + (:) value <$> go continuation + description = "composite element " <> show code + in Composite code <$> go parsers description + +-- | Parses a 'Simple' element. +-- +-- Following parser would parse strings of size between 0 and 35 characters. +-- +-- > simple1234 :: Parser Value +-- > simple1234 = simple "1234" (alphaNumeric `upTo` 35) +simple :: SimpleCode -> Parser Primitive -> Parser Value +simple code p = + let description = "simple element " <> show code + in Simple code <$> p description + +-- | Makes the parsing of the element optional, which doesn't consume input if the given parser doesn't succeed. +optional :: Parser Value -> Parser (Maybe Value) +optional = optionMaybe + +-- | Makes the parsing of the element mandatory. +mandatory :: Parser Value -> Parser (Maybe Value) +mandatory = fmap Just + +-- | Sets the current 'Position'. This is relevant for segment in a message, for composite or simple element in a segment, and for simple element in a composite. +position :: Position -> Parser (f Value) -> Parser (Position, f Value) +position pos p = + let decorated = (pos,) <$> p + in setCurrentPosition pos *> decorated <* resetCurrentPosition + +-- | Alias to 'position'. +-- +-- > compositeC101 :: Parser Value +-- > compositeC101 = +-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) +-- > in composite "C101" [ "010" .@ mandatory simple1234 +-- > , "020" .@ optional simple1234 +-- > , "030" .@ optional simple1234 +-- > ] +(.@) :: Position -> Parser (f Value) -> Parser (Position, f Value) +(.@) = position + +-- | Flipped alias to 'position'. +-- +-- > compositeC101 :: Parser Value +-- > compositeC101 = +-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35) +-- > in composite "C101" [ mandatory simple1234 @. "010" +-- > , optional simple1234 @. "020" +-- > , optional simple1234 @. "030" +-- > ] +(@.) :: Parser (f Value) -> Position -> Parser (Position, f Value) +(@.) = flip position + +-- | For segments or segment groups, let you express how many occurrences. +repeated :: Int -> Parser a -> Parser [a] +repeated limit p = do + values <- P.many (try p) + let parsed = length values + if parsed > limit + then failWithPosition ("expected up to " <> show limit <> " items, but encountered " <> show parsed) + else pure values + +-- | For segments or segment groups, let you express how many occurrences with at least one occurrence. +repeatedAtLeastOnce :: Int -> Parser a -> Parser [a] +repeatedAtLeastOnce limit p = do + values <- many1 (try p) + let parsed = length values + if parsed > limit + then failWithPosition ("expected up to " <> show limit <> " items, but encountered " <> show parsed) + else pure values + +-- | For segments or segment groups, let you express you expect only one occurrence. +once :: Parser a -> Parser [a] +once = fmap pure + +-- | For segments or segment groups, let you express you expect one or no occurrence. +maybeOnce :: Parser a -> Parser [a] +maybeOnce = fmap (maybe [] pure) . optionMaybe diff --git a/core/src/Text/Edifact/Parsing/Commons.hs b/core/src/Text/Edifact/Parsing/Commons.hs new file mode 100644 index 0000000..a1c6150 --- /dev/null +++ b/core/src/Text/Edifact/Parsing/Commons.hs @@ -0,0 +1,173 @@ +module Text.Edifact.Parsing.Commons + ( -- * Parsing context + Parser + , Context(..) + , CurrentPosition(..) + , defaultContext + + -- * State combinators + , updateSyntax + , setCurrentPosition + , resetCurrentPosition + + -- * Syntax helpers + -- ** Parsing combinators + , parseCompositeSeparator + , parseElementSeparator + , parseSegmentSeparator + , parseEscape + + -- ** State accessors + -- | Shortcuts to the syntax in current state. Doesn't alter input stream. + , getCompositeSeparator + , getElementSeparator + , getSegmentSeparator + , getDecimalSign + + -- * Context aware failure helpers + , failWithPosition + , () + + -- * Parsec extras + , tries + + -- * Technical combinators + , notYetImplemented + ) where + +import Text.Edifact.Types (Position, Syntax (..), defaultSyntax) + +import Data.Text (Text) +import Text.Parsec (Parsec, char, choice, endOfLine, getState, + modifyState, try, updateState, ()) + +-- | Defines our "Text.Parsec" context. +type Parser = Parsec Text Context + +data Context = + Context + { parsingSyntax :: Syntax -- ^ State of the syntax. To be updated on the encounter of the @UNA@ segment. + , currentPosition :: CurrentPosition -- ^ Pointer for current position in the parser. Used for enriched parsing error messages. + } + +defaultContext :: Context +defaultContext = Context defaultSyntax Undefined + +-- | Current position in the parser. +-- +-- For now it only stores the current position in a message, a segment group, +-- a segment, or a composite. +-- +-- Future version could store the whole path to improve debugging. +data CurrentPosition = Undefined + | Defined Position + deriving Show + +getSyntax :: Parser Syntax +getSyntax = parsingSyntax <$> getState + +-- | Get current charactor for decimal sign. +-- It doesn't parse nor consume input. +getDecimalSign :: Parser Char +getDecimalSign = decimalSign <$> getSyntax + +-- | Get current charactor for segment separator. +-- It doesn't parse nor consume input. +getSegmentSeparator :: Parser Char +getSegmentSeparator = segmentSeparator <$> getSyntax + +-- | Get current charactor for element separator. +-- It doesn't parse nor consume input. +getElementSeparator :: Parser Char +getElementSeparator = elementSeparator <$> getSyntax + +-- | Get current charactor for composite separator. +-- It doesn't parse nor consume input. +getCompositeSeparator :: Parser Char +getCompositeSeparator = compositeSeparator <$> getSyntax + +-- | This let change the operators used in the parsing. This is designed for the @UNA@ segment. +updateSyntax :: Syntax -> Parser () +updateSyntax s = updateState (\ c -> c { parsingSyntax = s }) + +-- | Read the parser state to extract current position. +-- It doesn't parse nor consume input. +getCurrentPosition :: Parser CurrentPosition +getCurrentPosition = currentPosition <$> getState + +-- | Write the parser state to update current position. +-- It doesn't parse nor consume input. +setCurrentPosition :: Position -> Parser () +setCurrentPosition = updateCurrentPosition . Defined + +-- | Write the parser state to reset current position. +-- It doesn't parse nor consume input. +resetCurrentPosition :: Parser () +resetCurrentPosition = updateCurrentPosition Undefined + +updateCurrentPosition :: CurrentPosition -> Parser () +updateCurrentPosition pos = modifyState (\s -> s { currentPosition = pos }) + +-- | Parse current charactor for element separator. +-- It does parse and consume input. +parseElementSeparator :: Parser Char +parseElementSeparator = parseSpecialChar "element separator" elementSeparator + +-- | Parse current charactor for composite separator. +-- It does parse and consume input. +parseCompositeSeparator :: Parser Char +parseCompositeSeparator = parseSpecialChar "composite separator" compositeSeparator + +-- | Parse current charactor for escape separator. +-- It does parse and consume input. +parseEscape :: Parser Char +parseEscape = parseSpecialChar "escape character" escape + +-- | Parse current charactor for segment separator. +-- It does parse and consume input. +-- +-- It also tries consuming end of line after segment separator if any. +parseSegmentSeparator :: Parser Char +parseSegmentSeparator = tries [ parseSpecialChar "segment separator" segmentSeparator <* endOfLine + , parseSpecialChar "segment separator" segmentSeparator + ] + +parseSpecialChar :: String -> (Syntax -> Char) -> Parser Char +parseSpecialChar description reader = do + c <- reader <$> getSyntax + let escape' '\"' = "\\\"" + escape' c' = [c'] + comment = description <> " (\"" <> escape' c <> "\")" + char c comment + +-- | Let you try various parsers, not consuming until success. +tries :: [Parser a] -> Parser a +tries = choice . map try + +-- | Like 'fail', this operator let you annotate a parser if it were to fail. +-- The difference with the standard "Text.Parsec" operator is that it appends +-- the current position if any. +failWithPosition :: String -> Parser a +failWithPosition = withPosition fail + +-- | Like '', this operator let you annotate a parser if it were to fail. +-- The difference with the standard "Text.Parsec" operator is that it appends +-- the current position if any. +() :: Parser a -> String -> Parser a +() = withPosition . () + +-- Same priority as from Text.Parsec +infix 0 + +withPosition :: (String -> Parser a) -> String -> Parser a +withPosition continuation msg = + let mkMessage Undefined = msg + mkMessage (Defined d) = msg <> " at position " <> show d + getMessage = mkMessage <$> getCurrentPosition + in getMessage >>= continuation + +-- | Alias to 'failWithPosition' to convey semantics of work-in-progress when +-- writing a parser. This might be useful if you want to partially support a +-- message. +notYetImplemented :: String -> Parser a +notYetImplemented = failWithPosition diff --git a/core/src/Text/Edifact/Parsing/Primitives.hs b/core/src/Text/Edifact/Parsing/Primitives.hs new file mode 100644 index 0000000..61659c8 --- /dev/null +++ b/core/src/Text/Edifact/Parsing/Primitives.hs @@ -0,0 +1,127 @@ +{-| +Module : Text.Edifact.Parsing.Primitives +Description : Low level combinators + +This module let you build parsers for primitive values, ie. values contained +in a simple element, either text or number. + += Examples + +To parse a text of 3 characters (@an3@ in standard Edifact representation): + +> an3 :: Parser Primitive +> an3 = alphaNumeric `exactly` 3 + +To parse a text of up to 10 characters (@an..10@ in standard Edifact representation): + +> an_10 :: Parser Primitive +> an_10 = alphaNumeric `upTo` 10 + += Known limitations + +Numeric representation is not strictly compatible to the specification. +The specification tells that negative sign (@-@) and decimal sign (@.@) are not +to be counted in the length of the field. + +Therefore the following parser will fail even it's legal according to the +specification: + +> n_3 :: Parser Primitive +> n_3 = numeric `upTo` 3 +> +> parse n_3 "-12.3" + +To be fixed, we have to change the way primitives combinators are built so that +the 'upTo' and 'exactly' combinators are aware of the inner parser. + -} +module Text.Edifact.Parsing.Primitives + ( + -- * Primitives + -- ** Simple elements definition + alphaNumeric + , alpha + , numeric + + -- ** Cardinality + , exactly + , upTo + , many + + ) where + +import Text.Edifact.Parsing.Commons +import Text.Edifact.Types + +import Data.String (fromString) +import qualified Data.Text as T (length) +import Text.Parsec (count, lookAhead, many1, noneOf, + oneOf) +import qualified Text.Parsec as P (many) + +-- | Parser associated with the @an@ notation. +alphaNumeric :: Parser Char +alphaNumeric = do + separators <- sequence [ getSegmentSeparator + , getElementSeparator + , getCompositeSeparator + ] + tries [ parseEscape *> parseSegmentSeparator + , parseEscape *> parseElementSeparator + , parseEscape *> parseCompositeSeparator + , parseEscape *> parseEscape + , noneOf separators + ] + +-- | Parser associated with the @a@ notation. +-- +-- So far it's simply an alias to 'alphaNumeric'. +alpha :: Parser Char +alpha = alphaNumeric + +-- | Parser associated with the @n@ notation. +numeric :: Parser Char +numeric = do + punctuationSign <- getDecimalSign + oneOf (punctuationSign : "0123456789-") + +-- | Combinator to build a parser of primitive which length is unspecified. +-- +-- Correspondance with the Edifact notation: +-- +-- > many alpha # same as a +-- > many numeric # same as n +-- > many alphaNumeric # same as an +many :: Parser Char -> Parser Primitive +many = fmap fromString . many1 + +-- | Combinator to build a parser of primitive which length is capped. +-- +-- Correspondance with the Edifact notation: +-- +-- > alpha `upTo` 3 # same as a..3 +-- > numeric `upTo` 3 # same as n..3 +-- > alphaNumeric `upTo` 3 # same as an..3 +upTo :: Parser Char -> Int -> Parser Primitive +upTo p c = + let check t = + let c' = T.length t + in if c' > c + then failWithPosition ("expected up to " <> show c <> " characters, but encountered " <> show c') + else pure (String t) + maybeEmpty = (<$) mempty . lookAhead + in check =<< + tries [ maybeEmpty parseSegmentSeparator + , maybeEmpty parseElementSeparator + , maybeEmpty parseCompositeSeparator + , fromString <$> P.many p + ] + +-- | Combinator to build a parser of primitive which length is fixed. +-- +-- Correspondance with the Edifact notation: +-- +-- > alpha `exactly` 3 # same as a3 +-- > numeric `exactly` 3 # same as n3 +-- > alphaNumeric `exactly` 3 # same as an3 +exactly :: Parser Char -> Int -> Parser Primitive +exactly p c = fromString <$> count c p diff --git a/core/src/Text/Edifact/Types.hs b/core/src/Text/Edifact/Types.hs new file mode 100644 index 0000000..d0bbe0d --- /dev/null +++ b/core/src/Text/Edifact/Types.hs @@ -0,0 +1,124 @@ +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +{-| + Data types to represent Edifact values. + + See "Text.Edifact.Parsing" to learn how to build parsers and use such parsers. + -} +module Text.Edifact.Types + ( -- * Values + Value(..) + , Primitive(..) + , Position(..) + -- ** Element codes + , MessageCode(..) + , GroupCode(..) + , SegmentCode(..) + , CompositeCode(..) + , SimpleCode(..) + -- * Syntax + , Syntax(..) + , defaultSyntax + ) where + +import Data.Scientific (Scientific) +import Data.String (IsString (..)) +import Data.Text (Text) + +-- | Code for a message. +-- +-- Content is expected to match this regexp: @[A-Z]{6}@. +newtype MessageCode = MessageCode { getMessageCode :: String } deriving newtype (Eq, Show, IsString) + +-- | Code for a segment group. +-- +-- It's a code local to the message definition. +newtype GroupCode = GroupCode { getGroupCode :: String } deriving newtype (Eq, Show, IsString) + +-- | Code for a segment. +-- +-- Content is expected to match this regexp: @[A-Z]{3}@. +-- +-- Standard segment codes are expected to match this regexp: @UN[A-Z]@. +newtype SegmentCode = SegmentCode { getSegmentCode :: String } deriving newtype (Eq, Show, IsString) + +-- | Code for a composite element. +-- +-- Content is expected to match this regexp: @C[0-9]{3}@. +-- +-- It can also be used for standalone composites, frequently in standard segment +-- definitions. In this case the codes are expected to match this regexp: @S[0-9]{3}@. +newtype CompositeCode = CompositeCode String deriving newtype (Eq, Show, IsString) + +-- | Code for a simple element. +-- +-- Content is expected to match this regexp: @[0-9]{4}@. +newtype SimpleCode = SimpleCode String deriving newtype (Eq, Show, IsString) + +-- | Annotation of the position of the value relative to the parent value. +-- +-- Content is expected to match this regexp: @[0-9]{3,4}@. +-- +-- Example values: +-- +-- > "010" :: Position +-- > "0210" :: Position +-- +-- See 'Text.Edifact.Parsing.position' for how to parse one. +newtype Position = Position { getPosition :: String } deriving newtype (Eq, Show, IsString) + +-- | Representation of a simple component. +-- +-- When defined by the 'Text.Edifact.Parsing.numeric' combinator, the simple +-- component will produce a 'Number'. +-- +-- When parsed by the 'Text.Edifact.Parsing.alphaNumeric' or +-- 'Text.Edifact.Parsing.alpha' combinators, the simple component will produce a +-- 'Text.Edifact.Types.String' from the raw textual representation. +data Primitive = String Text -- ^ Default representation of a simple component. + | Number Scientific -- ^ Representation of a numerical simple component. + deriving stock (Eq, Show) + +-- | String like primitive values can be constructed via overloaded strings. +-- This is convenient, but might be removed. +instance IsString Primitive where + fromString = String . fromString + +-- | Recursive data structure to represent parsed Edifact values. +data Value = Message MessageCode [(Position, [Value])] + | Group GroupCode [(Position, [Value])] + | Segment SegmentCode [(Position, Maybe Value)] + | Composite CompositeCode [(Position, Maybe Value)] + | Simple SimpleCode Primitive + deriving stock (Show, Eq) + +-- | Defines the special charactors the parser should respect. +-- +-- This is defined in every payload via the @UNA@ segment (first segment expected). +data Syntax = Syntax { compositeSeparator :: Char + , elementSeparator :: Char + , decimalSign :: Char + , escape :: Char + , segmentSeparator :: Char + } + +-- | Default value to initialize the parser. +-- +-- > Syntax { compositeSeparator = ':' +-- > , elementSeparator = '+' +-- > , decimalSign = '.' +-- > , escape = '?' +-- > , segmentSeparator = '\'' +-- > } +-- +-- Those default charactors should be considered as recommended values rather +-- than official default values. +defaultSyntax :: Syntax +defaultSyntax = + Syntax { compositeSeparator = ':' + , elementSeparator = '+' + , decimalSign = '.' + , escape = '?' + , segmentSeparator = '\'' + } -- cgit v1.2.3