aboutsummaryrefslogtreecommitdiffhomepage
path: root/core/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/Text')
-rw-r--r--core/src/Text/Edifact/Common.hs39
-rw-r--r--core/src/Text/Edifact/Common/Composites.hs135
-rw-r--r--core/src/Text/Edifact/Common/Segments.hs10
-rw-r--r--core/src/Text/Edifact/Common/Segments/UNA.hs34
-rw-r--r--core/src/Text/Edifact/Common/Segments/UNB.hs63
-rw-r--r--core/src/Text/Edifact/Common/Segments/UNH.hs44
-rw-r--r--core/src/Text/Edifact/Common/Segments/UNS.hs27
-rw-r--r--core/src/Text/Edifact/Common/Segments/UNT.hs30
-rw-r--r--core/src/Text/Edifact/Common/Segments/UNZ.hs24
-rw-r--r--core/src/Text/Edifact/Common/Simples.hs271
-rw-r--r--core/src/Text/Edifact/Inspect.hs108
-rw-r--r--core/src/Text/Edifact/Parsing.hs72
-rw-r--r--core/src/Text/Edifact/Parsing/Combinators.hs230
-rw-r--r--core/src/Text/Edifact/Parsing/Commons.hs173
-rw-r--r--core/src/Text/Edifact/Parsing/Primitives.hs127
-rw-r--r--core/src/Text/Edifact/Types.hs124
16 files changed, 1511 insertions, 0 deletions
diff --git a/core/src/Text/Edifact/Common.hs b/core/src/Text/Edifact/Common.hs
new file mode 100644
index 0000000..c938d48
--- /dev/null
+++ b/core/src/Text/Edifact/Common.hs
@@ -0,0 +1,39 @@
1{-|
2Module : Text.Edifact.Common
3Description : Common syntax
4
5This module should handle the main revision of the Edifact specification, which
6covers UN* segments.
7
8Currently it barely covers the revision 3 and this has not been scaffolded.
9
10One future evolution will be to scaffold the various revisions from a more
11general specification.
12 -}
13module Text.Edifact.Common
14 (
15 -- * Routine
16 parseFull
17
18 -- * Reexports
19 , ParseError
20 , Parser
21 , Text
22 ) where
23
24import Text.Edifact.Parsing
25import Text.Edifact.Parsing.Commons
26
27import Text.Edifact.Common.Segments
28
29import Data.Text (Text)
30
31parseFull :: Parser value -> Text -> Either ParseError value
32parseFull = parse . fullSyntaxParser
33
34fullSyntaxParser :: Parser a -> Parser a
35fullSyntaxParser messageParser =
36 segmentUNA >>
37 tries [ segmentUNB *> messageParser <* segmentUNZ
38 , messageParser
39 ]
diff --git a/core/src/Text/Edifact/Common/Composites.hs b/core/src/Text/Edifact/Common/Composites.hs
new file mode 100644
index 0000000..39f61e7
--- /dev/null
+++ b/core/src/Text/Edifact/Common/Composites.hs
@@ -0,0 +1,135 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Common.Composites
4 ( compositeS001
5 , compositeS002
6 , compositeS003
7 , compositeS004
8 , compositeS005
9 , compositeS009
10 , compositeS010
11 ) where
12
13import Text.Edifact.Common.Simples (simple0001, simple0002,
14 simple0004, simple0007,
15 simple0008, simple0010,
16 simple0014, simple0017,
17 simple0019, simple0022,
18 simple0025, simple0042,
19 simple0046, simple0051,
20 simple0052, simple0054,
21 simple0057, simple0065,
22 simple0070, simple0073,
23 simple0080, simple0133)
24
25import Text.Edifact.Parsing
26import Text.Edifact.Types (Value)
27
28-- | Derived from this specification:
29--
30-- > 010 0001 Syntax identifier M a4
31-- > 020 0002 Syntax version number M an1
32-- > 030 0080 Service code list directory version number C an..6
33-- > 040 0133 Character encoding, coded C an..3
34--
35-- Dependencies: 'simple0001', 'simple0002', 'simple0080', 'simple0133'.
36compositeS001 :: Parser Value
37compositeS001 =
38 composite "S001"
39 [ "010" .@ mandatory simple0001
40 , "020" .@ mandatory simple0002
41 , "030" .@ optional simple0080
42 , "040" .@ optional simple0133
43 ]
44
45-- | Derived from this specification:
46--
47-- > 010 0004 Interchange sender identification M an..35
48-- > 020 0007 Identification code qualifier C an..4
49-- > 030 0008 Interchange sender internal identification C an..35
50-- > 040 0042 Interchange sender internal sub-identification C an..35
51--
52-- Dependencies: 'simple0004', 'simple0007', 'simple0008', 'simple0042'.
53compositeS002 :: Parser Value
54compositeS002 =
55 composite "S002"
56 [ "010" .@ mandatory simple0004
57 , "020" .@ optional simple0007
58 , "030" .@ optional simple0008
59 , "040" .@ optional simple0042
60 ]
61
62-- | Derived from this specification:
63--
64-- > 010 0010 Interchange recipient identification M an..35
65-- > 020 0007 Identification code qualifier C an..4
66-- > 030 0014 Interchange recipient internal identification C an..35
67-- > 040 0046 Interchange recipient internal sub-identification C an..35
68--
69-- Dependencies: 'simple0007', 'simple0010', 'simple0014', 'simple0046'.
70compositeS003 :: Parser Value
71compositeS003 =
72 composite "S003"
73 [ "010" .@ mandatory simple0010
74 , "020" .@ optional simple0007
75 , "030" .@ optional simple0014
76 , "040" .@ optional simple0046
77 ]
78
79-- | Derived from this specification:
80--
81-- > 010 0017 Date M n6
82-- > 020 0019 Time M n4
83--
84-- Dependencies: 'simple0017', 'simple0019'.
85compositeS004 :: Parser Value
86compositeS004 =
87 composite "S004"
88 [ "010" .@ mandatory simple0017
89 , "020" .@ mandatory simple0019
90 ]
91
92-- | Derived from this specification:
93--
94-- > 010 0022 Recipient reference/password M an..14
95-- > 020 0025 Recipient reference/password qualifier C an2
96--
97-- Dependencies: 'simple0022', 'simple0025'.
98compositeS005 :: Parser Value
99compositeS005 =
100 composite "S005"
101 [ "010" .@ mandatory simple0022
102 , "020" .@ optional simple0025
103 ]
104
105-- | Derived from this specification:
106--
107-- > 010 0065 Message type M an..6
108-- > 020 0052 Message version number M an..3
109-- > 030 0054 Message release number M an..3
110-- > 040 0051 Controlling agency M an..2
111-- > 050 0057 Association assigned code C an..6
112--
113-- Dependencies: 'simple0051', 'simple0052', 'simple0054', 'simple0057', 'simple0065'.
114compositeS009 :: Parser Value
115compositeS009 =
116 composite "S009"
117 [ "010" .@ mandatory simple0065
118 , "020" .@ mandatory simple0052
119 , "030" .@ mandatory simple0054
120 , "040" .@ mandatory simple0051
121 , "050" .@ optional simple0057
122 ]
123
124-- | Derived from this specification:
125--
126-- > 010 0070 Sequence of transfers M n..2
127-- > 020 0073 First and last transfer C a1
128--
129-- Dependencies: 'simple0070', 'simple0073'.
130compositeS010 :: Parser Value
131compositeS010 =
132 composite "S010"
133 [ "010" .@ mandatory simple0070
134 , "020" .@ optional simple0073
135 ]
diff --git a/core/src/Text/Edifact/Common/Segments.hs b/core/src/Text/Edifact/Common/Segments.hs
new file mode 100644
index 0000000..e73719f
--- /dev/null
+++ b/core/src/Text/Edifact/Common/Segments.hs
@@ -0,0 +1,10 @@
1module Text.Edifact.Common.Segments
2 ( module S
3 ) where
4
5import Text.Edifact.Common.Segments.UNA as S
6import Text.Edifact.Common.Segments.UNB as S
7import Text.Edifact.Common.Segments.UNH as S
8import Text.Edifact.Common.Segments.UNS as S
9import Text.Edifact.Common.Segments.UNT as S
10import Text.Edifact.Common.Segments.UNZ as S
diff --git a/core/src/Text/Edifact/Common/Segments/UNA.hs b/core/src/Text/Edifact/Common/Segments/UNA.hs
new file mode 100644
index 0000000..1b20a9f
--- /dev/null
+++ b/core/src/Text/Edifact/Common/Segments/UNA.hs
@@ -0,0 +1,34 @@
1module Text.Edifact.Common.Segments.UNA
2 ( segmentUNA
3 ) where
4
5import Text.Edifact.Parsing
6import Text.Edifact.Parsing.Commons (updateSyntax)
7import Text.Edifact.Types (Syntax (..), defaultSyntax)
8
9import Control.Monad (void)
10import Text.Parsec (anyChar, char, endOfLine,
11 optionMaybe, string, try)
12import qualified Text.Parsec as P (optional)
13
14segmentUNA :: Parser ()
15segmentUNA =
16 let segmentParser = string "UNA" *> parseSyntax <* P.optional endOfLine
17 nothing = pure ()
18 in optionMaybe (try segmentParser) >>= maybe nothing updateSyntax
19
20parseSyntax :: Parser Syntax
21parseSyntax = do
22 compositeSeparator' <- anyChar
23 elementSeparator' <- anyChar
24 decimalSign' <- anyChar
25 escape' <- anyChar
26 void $ char ' ' -- reserved, not used
27 segmentSeparator' <- anyChar
28 pure defaultSyntax
29 { compositeSeparator = compositeSeparator'
30 , elementSeparator = elementSeparator'
31 , decimalSign = decimalSign'
32 , escape = escape'
33 , segmentSeparator = segmentSeparator'
34 }
diff --git a/core/src/Text/Edifact/Common/Segments/UNB.hs b/core/src/Text/Edifact/Common/Segments/UNB.hs
new file mode 100644
index 0000000..55f1eea
--- /dev/null
+++ b/core/src/Text/Edifact/Common/Segments/UNB.hs
@@ -0,0 +1,63 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Common.Segments.UNB
4 ( segmentUNB
5 ) where
6
7import Text.Edifact.Common.Composites (compositeS001, compositeS002,
8 compositeS003, compositeS004,
9 compositeS005)
10import Text.Edifact.Common.Simples (simple0020, simple0026,
11 simple0029, simple0031,
12 simple0032, simple0035)
13
14import Text.Edifact.Parsing
15import Text.Edifact.Types (Value)
16
17-- | Derived from this specification:
18--
19-- > Pos Segment M/C Repeat Repr. Notes
20-- > 010 S001 SYNTAX IDENTIFIER M 1
21-- > 0001 Syntax identifier M a4
22-- > 0002 Syntax version number M an1
23-- > 0080 Service code list directory version number C an..6
24-- > 0133 Character encoding, coded C an..3
25-- > 020 S002 INTERCHANGE SENDER M 1
26-- > 0004 Interchange sender identification M an..35
27-- > 0007 Identification code qualifier C an..4
28-- > 0008 Interchange sender internal identification C an..35
29-- > 0042 Interchange sender internal sub-identification C an..35
30-- > 030 S003 INTERCHANGE RECIPIENT M 1
31-- > 0010 Interchange recipient identification M an..35
32-- > 0007 Identification code qualifier C an..4
33-- > 0014 Interchange recipient internal identification C an..35
34-- > 0046 Interchange recipient internal sub-identification C an..35
35-- > 040 S004 DATE AND TIME OF PREPARATION M 1
36-- > 0017 Date M n8
37-- > 0019 Time M n4
38-- > 050 0020 Interchange control reference M 1 an..14
39-- > 060 S005 RECIPIENT'S REFERENCE/PASSWORD DETAILS C 1
40-- > 0022 Recipient reference/password M an..14
41-- > 0025 Recipient reference/password qualifier C an2
42-- > 070 0026 Application reference C 1 an..14
43-- > 080 0029 Processing priority code C 1 a1
44-- > 090 0031 Acknowledgement request C 1 n1
45-- > 100 0032 Interchange agreement identifier C 1 an..35
46-- > 110 0035 Test indicator C 1 n1
47--
48-- Dependencies: 'compositeS001', 'compositeS002', 'compositeS003', 'compositeS004', 'compositeS005', 'simple0020', 'simple0026', 'simple0029', 'simple0031', 'simple0032', 'simple0035'.
49segmentUNB :: Parser Value
50segmentUNB =
51 segment "UNB"
52 [ "010" .@ mandatory compositeS001
53 , "020" .@ mandatory compositeS002
54 , "030" .@ mandatory compositeS003
55 , "040" .@ mandatory compositeS004
56 , "050" .@ mandatory simple0020
57 , "060" .@ optional compositeS005
58 , "070" .@ optional simple0026
59 , "080" .@ optional simple0029
60 , "090" .@ optional simple0031
61 , "100" .@ optional simple0032
62 , "110" .@ optional simple0035
63 ]
diff --git a/core/src/Text/Edifact/Common/Segments/UNH.hs b/core/src/Text/Edifact/Common/Segments/UNH.hs
new file mode 100644
index 0000000..61cb6b3
--- /dev/null
+++ b/core/src/Text/Edifact/Common/Segments/UNH.hs
@@ -0,0 +1,44 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Common.Segments.UNH
4 ( segmentUNH
5 ) where
6
7import Text.Edifact.Common.Composites (compositeS009, compositeS010)
8import Text.Edifact.Common.Simples (simple0062, simple0068)
9
10import Text.Edifact.Parsing
11import Text.Edifact.Types (Value)
12
13-- | Derived from this specification:
14--
15-- > Change indicators
16-- >
17-- > UNH MESSAGE HEADER
18-- >
19-- > Function: To head, identify and specify a message.
20-- >
21-- > 010 0062 MESSAGE REFERENCE NUMBER M an..14
22-- >
23-- > 020 S009 MESSAGE IDENTIFIER M
24-- > 0065 Message type M an..6
25-- > 0052 Message version number M an..3
26-- > 0054 Message release number M an..3
27-- > 0051 Controlling agency M an..2
28-- > 0057 Association assigned code C an..6
29-- >
30-- > 030 0068 COMMON ACCESS REFERENCE C an..35
31-- >
32-- > 040 S010 STATUS OF THE TRANSFER C
33-- > 0070 Sequence of transfers M n..2
34-- > 0073 First and last transfer C a1
35--
36-- Dependencies: 'compositeS009', 'compositeS010', 'simple0062', 'simple0068'.
37segmentUNH :: Parser Value
38segmentUNH =
39 segment "UNH"
40 [ "010" .@ mandatory simple0062
41 , "020" .@ mandatory compositeS009
42 , "030" .@ optional simple0068
43 , "040" .@ optional compositeS010
44 ]
diff --git a/core/src/Text/Edifact/Common/Segments/UNS.hs b/core/src/Text/Edifact/Common/Segments/UNS.hs
new file mode 100644
index 0000000..3d9b395
--- /dev/null
+++ b/core/src/Text/Edifact/Common/Segments/UNS.hs
@@ -0,0 +1,27 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Common.Segments.UNS
4 ( segmentUNS
5 ) where
6
7import Text.Edifact.Common.Simples (simple0081)
8
9import Text.Edifact.Parsing
10import Text.Edifact.Types (Value)
11
12-- | Derived from this specification:
13--
14-- > Change indicators
15-- >
16-- > UNS SECTION CONTROL
17-- >
18-- > Function: To separate Header, Detail and Summary sections of a message
19-- >
20-- > 010 0081 SECTION IDENTIFICATION M a1
21--
22-- Dependencies: 'simple0081'.
23segmentUNS :: Parser Value
24segmentUNS =
25 segment "UNS"
26 [ "010" .@ mandatory simple0081
27 ]
diff --git a/core/src/Text/Edifact/Common/Segments/UNT.hs b/core/src/Text/Edifact/Common/Segments/UNT.hs
new file mode 100644
index 0000000..e91f9cf
--- /dev/null
+++ b/core/src/Text/Edifact/Common/Segments/UNT.hs
@@ -0,0 +1,30 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Common.Segments.UNT
4 ( segmentUNT
5 ) where
6
7import Text.Edifact.Common.Simples (simple0062, simple0074)
8
9import Text.Edifact.Parsing
10import Text.Edifact.Types (Value)
11
12-- | Derived from this specification:
13--
14-- > Change indicators
15-- >
16-- > UNT MESSAGE TRAILER
17-- >
18-- > Function: To end and check the completeness of a message.
19-- >
20-- > 010 0074 NUMBER OF SEGMENTS IN THE MESSAGE M n..6
21-- >
22-- > 020 0062 MESSAGE REFERENCE NUMBER M an..14
23--
24-- Dependencies: 'simple0062', 'simple0074'.
25segmentUNT :: Parser Value
26segmentUNT =
27 segment "UNT"
28 [ "010" .@ mandatory simple0074
29 , "020" .@ mandatory simple0062
30 ]
diff --git a/core/src/Text/Edifact/Common/Segments/UNZ.hs b/core/src/Text/Edifact/Common/Segments/UNZ.hs
new file mode 100644
index 0000000..0566860
--- /dev/null
+++ b/core/src/Text/Edifact/Common/Segments/UNZ.hs
@@ -0,0 +1,24 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Common.Segments.UNZ
4 ( segmentUNZ
5 ) where
6
7import Text.Edifact.Common.Simples (simple0020, simple0036)
8
9import Text.Edifact.Parsing
10import Text.Edifact.Types (Value)
11
12-- | Derived from this specification:
13--
14-- > Pos Segment M/C Repeat Repr. Notes
15-- > 010 0036 Interchange control count M 1 n..6
16-- > 020 0020 Interchange control reference M 1 an..14
17--
18-- Dependencies: 'simple0020', 'simple0036'.
19segmentUNZ :: Parser Value
20segmentUNZ =
21 segment "UNZ"
22 [ "010" .@ mandatory simple0036
23 , "030" .@ mandatory simple0020
24 ]
diff --git a/core/src/Text/Edifact/Common/Simples.hs b/core/src/Text/Edifact/Common/Simples.hs
new file mode 100644
index 0000000..537b128
--- /dev/null
+++ b/core/src/Text/Edifact/Common/Simples.hs
@@ -0,0 +1,271 @@
1{-# LANGUAGE OverloadedStrings #-}
2
3module Text.Edifact.Common.Simples
4 ( simple0001
5 , simple0002
6 , simple0004
7 , simple0007
8 , simple0008
9 , simple0010
10 , simple0014
11 , simple0017
12 , simple0019
13 , simple0020
14 , simple0022
15 , simple0025
16 , simple0026
17 , simple0029
18 , simple0031
19 , simple0032
20 , simple0035
21 , simple0036
22 , simple0042
23 , simple0046
24 , simple0051
25 , simple0052
26 , simple0054
27 , simple0057
28 , simple0062
29 , simple0065
30 , simple0068
31 , simple0070
32 , simple0073
33 , simple0074
34 , simple0080
35 , simple0081
36 , simple0133
37 ) where
38
39import Text.Edifact.Parsing
40import Text.Edifact.Types (Value)
41
42-- | Derived from this specification:
43--
44-- > 0001 Syntax identifier
45-- > Repr: a4
46simple0001 :: Parser Value
47simple0001 = simple "0001" (alpha `exactly` 4)
48
49-- | Derived from this specification:
50--
51-- > 0002 Syntax version number
52-- > Repr: an1
53simple0002 :: Parser Value
54simple0002 = simple "0002" (alphaNumeric `exactly` 1)
55
56-- | Derived from this specification:
57--
58-- > 0004 Interchange sender identification
59-- > Repr: an..35
60simple0004 :: Parser Value
61simple0004 = simple "0004" (alphaNumeric `upTo` 35)
62
63-- | Derived from this specification:
64--
65-- > 0007 Identification code qualifier
66-- > Repr: an..4
67simple0007 :: Parser Value
68simple0007 = simple "0007" (alphaNumeric `upTo` 4)
69
70-- | Derived from this specification:
71--
72-- > 0008 Interchange sender internal identification
73-- > Repr: an..35
74simple0008 :: Parser Value
75simple0008 = simple "0008" (alphaNumeric `upTo` 35)
76
77-- | Derived from this specification:
78--
79-- > 0010 Interchange recipient identification
80-- > Repr: an..35
81simple0010 :: Parser Value
82simple0010 = simple "0010" (alphaNumeric `upTo` 35)
83
84-- | Derived from this specification:
85--
86-- > 0014 Interchange recipient internal identification
87-- > Repr: an..35
88simple0014 :: Parser Value
89simple0014 = simple "0014" (alphaNumeric `upTo` 35)
90
91-- | Derived from this specification:
92--
93-- > 0017 Date
94-- > Repr: n6
95simple0017 :: Parser Value
96simple0017 = simple "0017" (numeric `exactly` 6)
97
98-- | Derived from this specification:
99--
100-- > 0019 Time
101-- > Repr: n4
102simple0019 :: Parser Value
103simple0019 = simple "0019" (numeric `exactly` 4)
104
105-- | Derived from this specification:
106--
107-- > 0020 Interchange control reference
108-- > Repr: an..14
109simple0020 :: Parser Value
110simple0020 = simple "0020" (alphaNumeric `upTo` 14)
111
112-- | Derived from this specification:
113--
114-- > 0022 Recipient reference/password
115-- > Repr: an..14
116simple0022 :: Parser Value
117simple0022 = simple "0022" (alphaNumeric `upTo` 14)
118
119-- | Derived from this specification:
120--
121-- > 0025 Recipient reference/password qualifier
122-- > Repr: an2
123simple0025 :: Parser Value
124simple0025 = simple "0025" (alphaNumeric `exactly` 2)
125
126-- | Derived from this specification:
127--
128-- > 0026 Application reference
129-- > Repr: an..14
130simple0026 :: Parser Value
131simple0026 = simple "0026" (alphaNumeric `upTo` 14)
132
133-- | Derived from this specification:
134--
135-- > 0029 Processing priority code
136-- > Repr: a1
137simple0029 :: Parser Value
138simple0029 = simple "0029" (alphaNumeric `exactly` 1)
139
140-- | Derived from this specification:
141--
142-- > 0031 Acknowledgement request
143-- > Repr: n1
144simple0031 :: Parser Value
145simple0031 = simple "0031" (numeric `exactly` 1)
146
147-- | Derived from this specification:
148--
149-- > 0032 Interchange agreement identifier
150-- > Repr: an..35
151simple0032 :: Parser Value
152simple0032 = simple "0032" (alphaNumeric `upTo` 35)
153
154-- | Derived from this specification:
155--
156-- > 0035 Test indicator
157-- > Repr: n1
158simple0035 :: Parser Value
159simple0035 = simple "0035" (numeric `exactly` 1)
160
161-- | Derived from this specification:
162--
163-- > 0036 Interchange control count
164-- > Repr: n..6
165simple0036 :: Parser Value
166simple0036 = simple "0036" (numeric `upTo` 6)
167
168-- | Derived from this specification:
169--
170-- > 0042 Interchange sender internal sub-identification
171-- > Repr: an..35
172simple0042 :: Parser Value
173simple0042 = simple "0042" (alphaNumeric `upTo` 35)
174
175-- | Derived from this specification:
176--
177-- > 0046 Interchange recipient internal sub-identification
178-- > Repr: an..35
179simple0046 :: Parser Value
180simple0046 = simple "0046" (alphaNumeric `upTo` 35)
181
182-- | Derived from this specification:
183--
184-- > 0051 Controlling agency
185-- > Repr: an..2
186simple0051 :: Parser Value
187simple0051 = simple "0051" (alphaNumeric `upTo` 2)
188
189-- | Derived from this specification:
190--
191-- > 0052 Message version number
192-- > Repr: an..3
193simple0052 :: Parser Value
194simple0052 = simple "0052" (alphaNumeric `upTo` 3)
195
196-- | Derived from this specification:
197--
198-- > 0054 Message release number
199-- > Repr: an..3
200simple0054 :: Parser Value
201simple0054 = simple "0054" (alphaNumeric `upTo` 3)
202
203-- | Derived from this specification:
204--
205-- > 0057 Association assigned code
206-- > Repr: an..6
207simple0057 :: Parser Value
208simple0057 = simple "0057" (alphaNumeric `upTo` 6)
209
210-- | Derived from this specification:
211--
212-- > 0062 MESSAGE REFERENCE NUMBER
213-- > Repr: an..14
214simple0062 :: Parser Value
215simple0062 = simple "0062" (alphaNumeric `upTo` 14)
216
217-- | Derived from this specification:
218--
219-- > 0065 Message type
220-- > Repr: an..6
221simple0065 :: Parser Value
222simple0065 = simple "0065" (alphaNumeric `upTo` 6)
223
224-- | Derived from this specification:
225--
226-- > 0068 COMMON ACCESS REFERENCE
227-- > Repr: an..35
228simple0068 :: Parser Value
229simple0068 = simple "0068" (alphaNumeric `upTo` 35)
230
231-- | Derived from this specification:
232--
233-- > 0070 Sequence of transfers
234-- > Repr: n..2
235simple0070 :: Parser Value
236simple0070 = simple "0070" (numeric `upTo` 2)
237
238-- | Derived from this specification:
239--
240-- > 0073 First and last transfer
241-- > Repr: a1
242simple0073 :: Parser Value
243simple0073 = simple "0073" (alpha `exactly` 1)
244
245-- | Derived from this specification:
246--
247-- > 0074 NUMBER OF SEGMENTS IN THE MESSAGE
248-- > Repr: n..6
249simple0074 :: Parser Value
250simple0074 = simple "0074" (numeric `upTo` 6)
251
252-- | Derived from this specification:
253--
254-- > 0080 Service code list directory version number
255-- > Repr: an..6
256simple0080 :: Parser Value
257simple0080 = simple "0080" (alphaNumeric `upTo` 6)
258
259-- | Derived from this specification:
260--
261-- > 0081 SECTION IDENTIFICATION
262-- > Repr: a1
263simple0081 :: Parser Value
264simple0081 = simple "0081" (alpha `exactly` 1)
265
266-- | Derived from this specification:
267--
268-- > 0133 Character encoding, coded
269-- > Repr: an..3
270simple0133 :: Parser Value
271simple0133 = simple "0133" (alphaNumeric `upTo` 3)
diff --git a/core/src/Text/Edifact/Inspect.hs b/core/src/Text/Edifact/Inspect.hs
new file mode 100644
index 0000000..fb3755a
--- /dev/null
+++ b/core/src/Text/Edifact/Inspect.hs
@@ -0,0 +1,108 @@
1{-# LANGUAGE DerivingStrategies #-}
2{-# LANGUAGE GeneralizedNewtypeDeriving #-}
3{-# LANGUAGE OverloadedStrings #-}
4
5module Text.Edifact.Inspect
6 ( inspect
7 ) where
8
9import Text.Edifact.Types
10
11import Control.Monad.Reader (Reader, ask, local, runReader)
12import Data.Maybe (catMaybes)
13import Data.String (IsString)
14import Data.Text (Text)
15import Formatting
16
17type Indent = Int
18
19type Rendering = Reader Indent
20
21indent :: Rendering a -> Rendering a
22indent = local (+1)
23
24getIndentation :: Rendering Int
25getIndentation = ask
26
27inspect :: Value -> Text
28inspect = renderInspection . valueRenderer
29
30renderInspection :: Rendering a -> a
31renderInspection r = runReader r 0
32
33valueRenderer :: Value -> Rendering Text
34valueRenderer (Simple _ primitive) = primitiveRenderer primitive
35valueRenderer (Composite _ values) = sformat inBrackets . commaSeparated . catMaybes <$> traverse positionRenderer values
36valueRenderer (Segment code values) = indentedPrefix fSegmentCode code " " . spaceSeparated . catMaybes =<< traverse positionRenderer values
37valueRenderer (Group code values) = indentedPrefix fGroupCode code "\n" . lineSeparated =<< indent (traverse silentPositionRenderer values)
38valueRenderer (Message code values) = indentedPrefix fMessageCode code "\n" . lineSeparated =<< indent (traverse silentPositionRenderer values)
39
40inBrackets :: Format r (Text -> r)
41inBrackets = "[" % stext % "]"
42
43indentedPrefix :: Format (String -> Text -> Text) (code -> String -> Text -> Text) -> code -> String -> Text -> Rendering Text
44indentedPrefix codeFormatter code sep t = do
45 i <- getIndentation
46 let prefix = replicate (i * 2) ' '
47 pure (sformat (string % codeFormatter % string % stext) prefix code sep t)
48
49fMessageCode :: Format r (MessageCode -> r)
50fMessageCode = mapf getMessageCode ("message " % string)
51
52fGroupCode :: Format r (GroupCode -> r)
53fGroupCode = mapf getGroupCode ("Segment Group - " % string)
54
55fSegmentCode :: Format r (SegmentCode -> r)
56fSegmentCode = mapf getSegmentCode string
57
58positionRenderer :: (Position, Maybe Value) -> Rendering (Maybe Text)
59positionRenderer (pos, value) = fmap (flip (sformat (stext % fPosition)) pos) <$> traverse valueRenderer value
60
61silentPositionRenderer :: (Position, [Value]) -> Rendering Text
62silentPositionRenderer (_, value) = lineSeparated <$> traverse valueRenderer value
63
64fPosition :: Format r (Position -> r)
65fPosition = mapf getPosition ("@" % string)
66
67primitiveRenderer :: Primitive -> Rendering Text
68primitiveRenderer (String t) = pure (sformat ("\"" % stext % "\"") t)
69primitiveRenderer (Number s) = pure (sformat shown s)
70
71newtype CommaSeparated = CommaSeparated { getCommaSeparated :: Text } deriving newtype (IsString, Eq)
72
73instance Semigroup CommaSeparated where
74 t1 <> "" = t1
75 "" <> t2 = t2
76 t1 <> t2 = CommaSeparated (getCommaSeparated t1 <> "," <> getCommaSeparated t2)
77
78instance Monoid CommaSeparated where
79 mempty = ""
80
81commaSeparated :: Foldable f => f Text -> Text
82commaSeparated = getCommaSeparated . foldMap CommaSeparated
83
84newtype SpaceSeparated = SpaceSeparated { getSpaceSeparated :: Text } deriving newtype (IsString, Eq)
85
86instance Semigroup SpaceSeparated where
87 t1 <> "" = t1
88 "" <> t2 = t2
89 t1 <> t2 = SpaceSeparated (getSpaceSeparated t1 <> " " <> getSpaceSeparated t2)
90
91instance Monoid SpaceSeparated where
92 mempty = ""
93
94spaceSeparated :: Foldable f => f Text -> Text
95spaceSeparated = getSpaceSeparated . foldMap SpaceSeparated
96
97newtype LineSeparated = LineSeparated { getLineSeparated :: Text } deriving newtype (IsString, Eq)
98
99instance Semigroup LineSeparated where
100 t1 <> "" = t1
101 "" <> t2 = t2
102 t1 <> t2 = LineSeparated (getLineSeparated t1 <> "\n" <> getLineSeparated t2)
103
104instance Monoid LineSeparated where
105 mempty = ""
106
107lineSeparated :: Foldable f => f Text -> Text
108lineSeparated = getLineSeparated . foldMap LineSeparated
diff --git a/core/src/Text/Edifact/Parsing.hs b/core/src/Text/Edifact/Parsing.hs
new file mode 100644
index 0000000..0b1ece8
--- /dev/null
+++ b/core/src/Text/Edifact/Parsing.hs
@@ -0,0 +1,72 @@
1{-|
2Module : Text.Edifact.Parsing
3Description : Parsing routines and combinators
4
5This module is there to reexport most of the combinators and helpers required
6to parse an Edifact payload.
7
8For high level combinators, have a look at "Text.Edifact.Parsing.Combinators".
9
10For low level combinators, have a look at "Text.Edifact.Parsing.Primitives".
11 -}
12module Text.Edifact.Parsing
13 (
14 -- * Parsing routines
15 parse
16
17 -- * Combinators
18 -- | See "Text.Edifact.Parsing.Combinators" for more details
19
20 -- ** Values parsers
21 , message
22 , segment
23 , segmentGroup
24 , composite
25 , simple
26
27 -- ** Position and strictness
28 , position
29 , (.@)
30 , (@.)
31 , mandatory
32 , optional
33
34 -- ** Repetition of segments and segment groups
35 , repeated
36 , repeatedAtLeastOnce
37 , once
38 , maybeOnce
39
40 -- * Primitives
41 -- | See "Text.Edifact.Parsing.Primitives" for more details and known limitations.
42
43 -- ** Simple elements definition
44 , alphaNumeric
45 , alpha
46 , numeric
47 -- ** Cardinality
48 , exactly
49 , upTo
50 , many
51
52 -- * Types
53 , Parser
54 -- ** Reexported
55 , ParseError
56 ) where
57
58import Text.Edifact.Parsing.Combinators (composite, mandatory,
59 maybeOnce, message, once,
60 optional, position, repeated,
61 repeatedAtLeastOnce, segment,
62 segmentGroup, simple, (.@),
63 (@.))
64import Text.Edifact.Parsing.Commons (Parser, defaultContext)
65import Text.Edifact.Parsing.Primitives (alpha, alphaNumeric, exactly,
66 many, numeric, upTo)
67
68import Data.Text (Text)
69import Text.Parsec (ParseError, runParser)
70
71parse :: Parser value -> Text -> Either ParseError value
72parse p = runParser p defaultContext ""
diff --git a/core/src/Text/Edifact/Parsing/Combinators.hs b/core/src/Text/Edifact/Parsing/Combinators.hs
new file mode 100644
index 0000000..ce3f4be
--- /dev/null
+++ b/core/src/Text/Edifact/Parsing/Combinators.hs
@@ -0,0 +1,230 @@
1{-# LANGUAGE TupleSections #-}
2
3{-|
4Module : Text.Edifact.Parsing.Combinators
5Description : High level combinators
6 -}
7module Text.Edifact.Parsing.Combinators
8 ( -- * Combinators
9 -- ** Values parsers
10 message
11 , segmentGroup
12 , segment
13 , composite
14 , simple
15
16 -- ** Position and strictness
17 , position
18 , (.@)
19 , (@.)
20 , mandatory
21 , optional
22
23 -- ** Repetition of segments and segment groups
24 , repeated
25 , repeatedAtLeastOnce
26 , once
27 , maybeOnce
28 ) where
29
30import Text.Edifact.Parsing.Commons
31import Text.Edifact.Types
32
33import Text.Parsec (lookAhead, many1, optionMaybe,
34 string, try)
35import qualified Text.Parsec as P (many)
36
37-- | Parses a 'Message'.
38--
39-- > messageABCDEF :: Parser Value
40-- > messageABCDEF =
41-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35)
42-- > c101 = composite "C101" [ position "010" (mandatory simple1234)
43-- > , position "020" (optional simple1234)
44-- > ]
45-- > segmentABC = segment "ABC" [ position "010" (mandatory c101)
46-- > ]
47-- > in message "ABCDEF" [ position "0010" (mandatory segmentABC)
48-- > ]
49message :: MessageCode -> [Parser (Position, [Value])] -> Parser Value
50message code ps =
51 let description = "message " <> show code
52 in Message code <$> sequence ps <??> description
53
54-- | Parses a 'Group'.
55--
56-- A Segment Group is the way Edifact format represents hierarchy. One can view
57-- a segment group as a sub message. A segment group can be repeated like
58-- segments. A segment group wraps segments and segment groups.
59segmentGroup :: GroupCode -> [Parser (Position, [Value])] -> Parser Value
60segmentGroup code ps =
61 let description = "segment-group " <> show code
62 in Group code <$> sequence ps <??> description
63
64-- | Parses a 'Segment'.
65--
66-- Following parser:
67--
68-- > segmentABC :: Parser Value
69-- > segmentABC =
70-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35)
71-- > simple2001 = simple "2001" (alphaNumeric `exactly` 3)
72-- > c101 = composite "C101" [ position "010" (mandatory simple1234)
73-- > , position "020" (optional simple1234)
74-- > , position "030" (optional simple1234)
75-- > ]
76-- > in segment "ABC" [ position "010" (mandatory simple2001)
77-- > , position "020" (optional c101)
78-- > ]
79--
80-- would parse strings such as:
81--
82-- >>> parse segmentABC "ABC+123'"
83-- Segment "ABC" [ ("010", Just (Simple "2001" "123"))
84-- ]
85-- >>> parse segmentABC "ABC+123+abcdefgh'"
86-- Segment "ABC" [ ("010", Just (Simple "2001" "123"))
87-- , ("020", Just (Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh"))
88-- ]
89-- ))
90-- ]
91-- >>> parse segmentABC "ABC+123+abcdefgh:ijklmno'"
92-- Segment "ABC" [ ("010", Just (Simple "2001" "123"))
93-- , ("020", Just (Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh"))
94-- , ("020", Just (Simple "1234" "ijklmno"))
95-- ]
96-- ))
97-- ]
98segment :: SegmentCode -> [Parser (Position, Maybe Value)] -> Parser Value
99segment code parsers =
100 let go [] = [] <$ parseSegmentSeparator
101 go (p:ps) =
102 tries [ [] <$ parseSegmentSeparator
103 , (:) <$> (parseElementSeparator *> p)
104 <*> go ps
105 ]
106 description = "segment " <> show code
107 in Segment <$> parseSegmentCode code
108 <*> go parsers
109 <??> description
110
111parseSegmentCode :: SegmentCode -> Parser SegmentCode
112parseSegmentCode (SegmentCode code) =
113 let description = "segment code " <> show code
114 in SegmentCode <$> string code <??> description
115
116-- | Parses a 'Composite' element.
117--
118-- Following parser:
119--
120-- > compositeC101 :: Parser Value
121-- > compositeC101 =
122-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35)
123-- > in composite "C101" [ position "010" (mandatory simple1234)
124-- > , position "020" (optional simple1234)
125-- > , position "030" (optional simple1234)
126-- > ]
127--
128-- would parse strings such as:
129--
130-- >>> parse compositeC101 "abcdefgh"
131-- Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh"))
132-- ]
133-- >>> parse compositeC101 "abcdefgh:ijklmno"
134-- Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh"))
135-- , ("020", Just (Simple "1234" "ijklmno"))
136-- ]
137-- >>> parse compositeC101 "abcdefgh::pqrstu"
138-- Composite "C101" [ ("010", Just (Simple "1234" "abcdefgh"))
139-- , ("020", Just (Simple "1234" ""))
140-- , ("030", Just (Simple "1234" "pqrstu"))
141-- ]
142composite :: CompositeCode -> [Parser (Position, Maybe Value)] -> Parser Value
143composite code parsers =
144 let go [] = pure []
145 go (p:ps) = do
146 let parseSeparator = tries [ parseCompositeSeparator
147 , lookAhead parseElementSeparator
148 , lookAhead parseSegmentSeparator
149 ]
150 (value, continuation) <- tries [ (, ps) <$> p <* parseSeparator
151 , (, []) <$> p
152 ]
153 (:) value <$> go continuation
154 description = "composite element " <> show code
155 in Composite code <$> go parsers <??> description
156
157-- | Parses a 'Simple' element.
158--
159-- Following parser would parse strings of size between 0 and 35 characters.
160--
161-- > simple1234 :: Parser Value
162-- > simple1234 = simple "1234" (alphaNumeric `upTo` 35)
163simple :: SimpleCode -> Parser Primitive -> Parser Value
164simple code p =
165 let description = "simple element " <> show code
166 in Simple code <$> p <??> description
167
168-- | Makes the parsing of the element optional, which doesn't consume input if the given parser doesn't succeed.
169optional :: Parser Value -> Parser (Maybe Value)
170optional = optionMaybe
171
172-- | Makes the parsing of the element mandatory.
173mandatory :: Parser Value -> Parser (Maybe Value)
174mandatory = fmap Just
175
176-- | Sets the current 'Position'. This is relevant for segment in a message, for composite or simple element in a segment, and for simple element in a composite.
177position :: Position -> Parser (f Value) -> Parser (Position, f Value)
178position pos p =
179 let decorated = (pos,) <$> p
180 in setCurrentPosition pos *> decorated <* resetCurrentPosition
181
182-- | Alias to 'position'.
183--
184-- > compositeC101 :: Parser Value
185-- > compositeC101 =
186-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35)
187-- > in composite "C101" [ "010" .@ mandatory simple1234
188-- > , "020" .@ optional simple1234
189-- > , "030" .@ optional simple1234
190-- > ]
191(.@) :: Position -> Parser (f Value) -> Parser (Position, f Value)
192(.@) = position
193
194-- | Flipped alias to 'position'.
195--
196-- > compositeC101 :: Parser Value
197-- > compositeC101 =
198-- > let simple1234 = simple "1234" (alphaNumeric `upTo` 35)
199-- > in composite "C101" [ mandatory simple1234 @. "010"
200-- > , optional simple1234 @. "020"
201-- > , optional simple1234 @. "030"
202-- > ]
203(@.) :: Parser (f Value) -> Position -> Parser (Position, f Value)
204(@.) = flip position
205
206-- | For segments or segment groups, let you express how many occurrences.
207repeated :: Int -> Parser a -> Parser [a]
208repeated limit p = do
209 values <- P.many (try p)
210 let parsed = length values
211 if parsed > limit
212 then failWithPosition ("expected up to " <> show limit <> " items, but encountered " <> show parsed)
213 else pure values
214
215-- | For segments or segment groups, let you express how many occurrences with at least one occurrence.
216repeatedAtLeastOnce :: Int -> Parser a -> Parser [a]
217repeatedAtLeastOnce limit p = do
218 values <- many1 (try p)
219 let parsed = length values
220 if parsed > limit
221 then failWithPosition ("expected up to " <> show limit <> " items, but encountered " <> show parsed)
222 else pure values
223
224-- | For segments or segment groups, let you express you expect only one occurrence.
225once :: Parser a -> Parser [a]
226once = fmap pure
227
228-- | For segments or segment groups, let you express you expect one or no occurrence.
229maybeOnce :: Parser a -> Parser [a]
230maybeOnce = fmap (maybe [] pure) . optionMaybe
diff --git a/core/src/Text/Edifact/Parsing/Commons.hs b/core/src/Text/Edifact/Parsing/Commons.hs
new file mode 100644
index 0000000..a1c6150
--- /dev/null
+++ b/core/src/Text/Edifact/Parsing/Commons.hs
@@ -0,0 +1,173 @@
1module Text.Edifact.Parsing.Commons
2 ( -- * Parsing context
3 Parser
4 , Context(..)
5 , CurrentPosition(..)
6 , defaultContext
7
8 -- * State combinators
9 , updateSyntax
10 , setCurrentPosition
11 , resetCurrentPosition
12
13 -- * Syntax helpers
14 -- ** Parsing combinators
15 , parseCompositeSeparator
16 , parseElementSeparator
17 , parseSegmentSeparator
18 , parseEscape
19
20 -- ** State accessors
21 -- | Shortcuts to the syntax in current state. Doesn't alter input stream.
22 , getCompositeSeparator
23 , getElementSeparator
24 , getSegmentSeparator
25 , getDecimalSign
26
27 -- * Context aware failure helpers
28 , failWithPosition
29 , (<??>)
30
31 -- * Parsec extras
32 , tries
33
34 -- * Technical combinators
35 , notYetImplemented
36 ) where
37
38import Text.Edifact.Types (Position, Syntax (..), defaultSyntax)
39
40import Data.Text (Text)
41import Text.Parsec (Parsec, char, choice, endOfLine, getState,
42 modifyState, try, updateState, (<?>))
43
44-- | Defines our "Text.Parsec" context.
45type Parser = Parsec Text Context
46
47data Context =
48 Context
49 { parsingSyntax :: Syntax -- ^ State of the syntax. To be updated on the encounter of the @UNA@ segment.
50 , currentPosition :: CurrentPosition -- ^ Pointer for current position in the parser. Used for enriched parsing error messages.
51 }
52
53defaultContext :: Context
54defaultContext = Context defaultSyntax Undefined
55
56-- | Current position in the parser.
57--
58-- For now it only stores the current position in a message, a segment group,
59-- a segment, or a composite.
60--
61-- Future version could store the whole path to improve debugging.
62data CurrentPosition = Undefined
63 | Defined Position
64 deriving Show
65
66getSyntax :: Parser Syntax
67getSyntax = parsingSyntax <$> getState
68
69-- | Get current charactor for decimal sign.
70-- It doesn't parse nor consume input.
71getDecimalSign :: Parser Char
72getDecimalSign = decimalSign <$> getSyntax
73
74-- | Get current charactor for segment separator.
75-- It doesn't parse nor consume input.
76getSegmentSeparator :: Parser Char
77getSegmentSeparator = segmentSeparator <$> getSyntax
78
79-- | Get current charactor for element separator.
80-- It doesn't parse nor consume input.
81getElementSeparator :: Parser Char
82getElementSeparator = elementSeparator <$> getSyntax
83
84-- | Get current charactor for composite separator.
85-- It doesn't parse nor consume input.
86getCompositeSeparator :: Parser Char
87getCompositeSeparator = compositeSeparator <$> getSyntax
88
89-- | This let change the operators used in the parsing. This is designed for the @UNA@ segment.
90updateSyntax :: Syntax -> Parser ()
91updateSyntax s = updateState (\ c -> c { parsingSyntax = s })
92
93-- | Read the parser state to extract current position.
94-- It doesn't parse nor consume input.
95getCurrentPosition :: Parser CurrentPosition
96getCurrentPosition = currentPosition <$> getState
97
98-- | Write the parser state to update current position.
99-- It doesn't parse nor consume input.
100setCurrentPosition :: Position -> Parser ()
101setCurrentPosition = updateCurrentPosition . Defined
102
103-- | Write the parser state to reset current position.
104-- It doesn't parse nor consume input.
105resetCurrentPosition :: Parser ()
106resetCurrentPosition = updateCurrentPosition Undefined
107
108updateCurrentPosition :: CurrentPosition -> Parser ()
109updateCurrentPosition pos = modifyState (\s -> s { currentPosition = pos })
110
111-- | Parse current charactor for element separator.
112-- It does parse and consume input.
113parseElementSeparator :: Parser Char
114parseElementSeparator = parseSpecialChar "element separator" elementSeparator
115
116-- | Parse current charactor for composite separator.
117-- It does parse and consume input.
118parseCompositeSeparator :: Parser Char
119parseCompositeSeparator = parseSpecialChar "composite separator" compositeSeparator
120
121-- | Parse current charactor for escape separator.
122-- It does parse and consume input.
123parseEscape :: Parser Char
124parseEscape = parseSpecialChar "escape character" escape
125
126-- | Parse current charactor for segment separator.
127-- It does parse and consume input.
128--
129-- It also tries consuming end of line after segment separator if any.
130parseSegmentSeparator :: Parser Char
131parseSegmentSeparator = tries [ parseSpecialChar "segment separator" segmentSeparator <* endOfLine
132 , parseSpecialChar "segment separator" segmentSeparator
133 ]
134
135parseSpecialChar :: String -> (Syntax -> Char) -> Parser Char
136parseSpecialChar description reader = do
137 c <- reader <$> getSyntax
138 let escape' '\"' = "\\\""
139 escape' c' = [c']
140 comment = description <> " (\"" <> escape' c <> "\")"
141 char c <?> comment
142
143-- | Let you try various parsers, not consuming until success.
144tries :: [Parser a] -> Parser a
145tries = choice . map try
146
147-- | Like 'fail', this operator let you annotate a parser if it were to fail.
148-- The difference with the standard "Text.Parsec" operator is that it appends
149-- the current position if any.
150failWithPosition :: String -> Parser a
151failWithPosition = withPosition fail
152
153-- | Like '<?>', this operator let you annotate a parser if it were to fail.
154-- The difference with the standard "Text.Parsec" operator is that it appends
155-- the current position if any.
156(<??>) :: Parser a -> String -> Parser a
157(<??>) = withPosition . (<?>)
158
159-- Same priority as <?> from Text.Parsec
160infix 0 <??>
161
162withPosition :: (String -> Parser a) -> String -> Parser a
163withPosition continuation msg =
164 let mkMessage Undefined = msg
165 mkMessage (Defined d) = msg <> " at position " <> show d
166 getMessage = mkMessage <$> getCurrentPosition
167 in getMessage >>= continuation
168
169-- | Alias to 'failWithPosition' to convey semantics of work-in-progress when
170-- writing a parser. This might be useful if you want to partially support a
171-- message.
172notYetImplemented :: String -> Parser a
173notYetImplemented = failWithPosition
diff --git a/core/src/Text/Edifact/Parsing/Primitives.hs b/core/src/Text/Edifact/Parsing/Primitives.hs
new file mode 100644
index 0000000..61659c8
--- /dev/null
+++ b/core/src/Text/Edifact/Parsing/Primitives.hs
@@ -0,0 +1,127 @@
1{-|
2Module : Text.Edifact.Parsing.Primitives
3Description : Low level combinators
4
5This module let you build parsers for primitive values, ie. values contained
6in a simple element, either text or number.
7
8= Examples
9
10To parse a text of 3 characters (@an3@ in standard Edifact representation):
11
12> an3 :: Parser Primitive
13> an3 = alphaNumeric `exactly` 3
14
15To parse a text of up to 10 characters (@an..10@ in standard Edifact representation):
16
17> an_10 :: Parser Primitive
18> an_10 = alphaNumeric `upTo` 10
19
20= Known limitations
21
22Numeric representation is not strictly compatible to the specification.
23The specification tells that negative sign (@-@) and decimal sign (@.@) are not
24to be counted in the length of the field.
25
26Therefore the following parser will fail even it's legal according to the
27specification:
28
29> n_3 :: Parser Primitive
30> n_3 = numeric `upTo` 3
31>
32> parse n_3 "-12.3"
33
34To be fixed, we have to change the way primitives combinators are built so that
35the 'upTo' and 'exactly' combinators are aware of the inner parser.
36 -}
37module Text.Edifact.Parsing.Primitives
38 (
39 -- * Primitives
40 -- ** Simple elements definition
41 alphaNumeric
42 , alpha
43 , numeric
44
45 -- ** Cardinality
46 , exactly
47 , upTo
48 , many
49
50 ) where
51
52import Text.Edifact.Parsing.Commons
53import Text.Edifact.Types
54
55import Data.String (fromString)
56import qualified Data.Text as T (length)
57import Text.Parsec (count, lookAhead, many1, noneOf,
58 oneOf)
59import qualified Text.Parsec as P (many)
60
61-- | Parser associated with the @an@ notation.
62alphaNumeric :: Parser Char
63alphaNumeric = do
64 separators <- sequence [ getSegmentSeparator
65 , getElementSeparator
66 , getCompositeSeparator
67 ]
68 tries [ parseEscape *> parseSegmentSeparator
69 , parseEscape *> parseElementSeparator
70 , parseEscape *> parseCompositeSeparator
71 , parseEscape *> parseEscape
72 , noneOf separators
73 ]
74
75-- | Parser associated with the @a@ notation.
76--
77-- So far it's simply an alias to 'alphaNumeric'.
78alpha :: Parser Char
79alpha = alphaNumeric
80
81-- | Parser associated with the @n@ notation.
82numeric :: Parser Char
83numeric = do
84 punctuationSign <- getDecimalSign
85 oneOf (punctuationSign : "0123456789-")
86
87-- | Combinator to build a parser of primitive which length is unspecified.
88--
89-- Correspondance with the Edifact notation:
90--
91-- > many alpha # same as a
92-- > many numeric # same as n
93-- > many alphaNumeric # same as an
94many :: Parser Char -> Parser Primitive
95many = fmap fromString . many1
96
97-- | Combinator to build a parser of primitive which length is capped.
98--
99-- Correspondance with the Edifact notation:
100--
101-- > alpha `upTo` 3 # same as a..3
102-- > numeric `upTo` 3 # same as n..3
103-- > alphaNumeric `upTo` 3 # same as an..3
104upTo :: Parser Char -> Int -> Parser Primitive
105upTo p c =
106 let check t =
107 let c' = T.length t
108 in if c' > c
109 then failWithPosition ("expected up to " <> show c <> " characters, but encountered " <> show c')
110 else pure (String t)
111 maybeEmpty = (<$) mempty . lookAhead
112 in check =<<
113 tries [ maybeEmpty parseSegmentSeparator
114 , maybeEmpty parseElementSeparator
115 , maybeEmpty parseCompositeSeparator
116 , fromString <$> P.many p
117 ]
118
119-- | Combinator to build a parser of primitive which length is fixed.
120--
121-- Correspondance with the Edifact notation:
122--
123-- > alpha `exactly` 3 # same as a3
124-- > numeric `exactly` 3 # same as n3
125-- > alphaNumeric `exactly` 3 # same as an3
126exactly :: Parser Char -> Int -> Parser Primitive
127exactly p c = fromString <$> count c p
diff --git a/core/src/Text/Edifact/Types.hs b/core/src/Text/Edifact/Types.hs
new file mode 100644
index 0000000..d0bbe0d
--- /dev/null
+++ b/core/src/Text/Edifact/Types.hs
@@ -0,0 +1,124 @@
1{-# LANGUAGE DerivingStrategies #-}
2{-# LANGUAGE GeneralizedNewtypeDeriving #-}
3
4{-|
5 Data types to represent Edifact values.
6
7 See "Text.Edifact.Parsing" to learn how to build parsers and use such parsers.
8 -}
9module Text.Edifact.Types
10 ( -- * Values
11 Value(..)
12 , Primitive(..)
13 , Position(..)
14 -- ** Element codes
15 , MessageCode(..)
16 , GroupCode(..)
17 , SegmentCode(..)
18 , CompositeCode(..)
19 , SimpleCode(..)
20 -- * Syntax
21 , Syntax(..)
22 , defaultSyntax
23 ) where
24
25import Data.Scientific (Scientific)
26import Data.String (IsString (..))
27import Data.Text (Text)
28
29-- | Code for a message.
30--
31-- Content is expected to match this regexp: @[A-Z]{6}@.
32newtype MessageCode = MessageCode { getMessageCode :: String } deriving newtype (Eq, Show, IsString)
33
34-- | Code for a segment group.
35--
36-- It's a code local to the message definition.
37newtype GroupCode = GroupCode { getGroupCode :: String } deriving newtype (Eq, Show, IsString)
38
39-- | Code for a segment.
40--
41-- Content is expected to match this regexp: @[A-Z]{3}@.
42--
43-- Standard segment codes are expected to match this regexp: @UN[A-Z]@.
44newtype SegmentCode = SegmentCode { getSegmentCode :: String } deriving newtype (Eq, Show, IsString)
45
46-- | Code for a composite element.
47--
48-- Content is expected to match this regexp: @C[0-9]{3}@.
49--
50-- It can also be used for standalone composites, frequently in standard segment
51-- definitions. In this case the codes are expected to match this regexp: @S[0-9]{3}@.
52newtype CompositeCode = CompositeCode String deriving newtype (Eq, Show, IsString)
53
54-- | Code for a simple element.
55--
56-- Content is expected to match this regexp: @[0-9]{4}@.
57newtype SimpleCode = SimpleCode String deriving newtype (Eq, Show, IsString)
58
59-- | Annotation of the position of the value relative to the parent value.
60--
61-- Content is expected to match this regexp: @[0-9]{3,4}@.
62--
63-- Example values:
64--
65-- > "010" :: Position
66-- > "0210" :: Position
67--
68-- See 'Text.Edifact.Parsing.position' for how to parse one.
69newtype Position = Position { getPosition :: String } deriving newtype (Eq, Show, IsString)
70
71-- | Representation of a simple component.
72--
73-- When defined by the 'Text.Edifact.Parsing.numeric' combinator, the simple
74-- component will produce a 'Number'.
75--
76-- When parsed by the 'Text.Edifact.Parsing.alphaNumeric' or
77-- 'Text.Edifact.Parsing.alpha' combinators, the simple component will produce a
78-- 'Text.Edifact.Types.String' from the raw textual representation.
79data Primitive = String Text -- ^ Default representation of a simple component.
80 | Number Scientific -- ^ Representation of a numerical simple component.
81 deriving stock (Eq, Show)
82
83-- | String like primitive values can be constructed via overloaded strings.
84-- This is convenient, but might be removed.
85instance IsString Primitive where
86 fromString = String . fromString
87
88-- | Recursive data structure to represent parsed Edifact values.
89data Value = Message MessageCode [(Position, [Value])]
90 | Group GroupCode [(Position, [Value])]
91 | Segment SegmentCode [(Position, Maybe Value)]
92 | Composite CompositeCode [(Position, Maybe Value)]
93 | Simple SimpleCode Primitive
94 deriving stock (Show, Eq)
95
96-- | Defines the special charactors the parser should respect.
97--
98-- This is defined in every payload via the @UNA@ segment (first segment expected).
99data Syntax = Syntax { compositeSeparator :: Char
100 , elementSeparator :: Char
101 , decimalSign :: Char
102 , escape :: Char
103 , segmentSeparator :: Char
104 }
105
106-- | Default value to initialize the parser.
107--
108-- > Syntax { compositeSeparator = ':'
109-- > , elementSeparator = '+'
110-- > , decimalSign = '.'
111-- > , escape = '?'
112-- > , segmentSeparator = '\''
113-- > }
114--
115-- Those default charactors should be considered as recommended values rather
116-- than official default values.
117defaultSyntax :: Syntax
118defaultSyntax =
119 Syntax { compositeSeparator = ':'
120 , elementSeparator = '+'
121 , decimalSign = '.'
122 , escape = '?'
123 , segmentSeparator = '\''
124 }