]> git.immae.eu Git - github/fretlink/edi-parser.git/blob - core/src/Text/Edifact/Parsing/Primitives.hs
Release code as open source
[github/fretlink/edi-parser.git] / core / src / Text / Edifact / Parsing / Primitives.hs
1 {-|
2 Module : Text.Edifact.Parsing.Primitives
3 Description : Low level combinators
4
5 This module let you build parsers for primitive values, ie. values contained
6 in a simple element, either text or number.
7
8 = Examples
9
10 To parse a text of 3 characters (@an3@ in standard Edifact representation):
11
12 > an3 :: Parser Primitive
13 > an3 = alphaNumeric `exactly` 3
14
15 To parse a text of up to 10 characters (@an..10@ in standard Edifact representation):
16
17 > an_10 :: Parser Primitive
18 > an_10 = alphaNumeric `upTo` 10
19
20 = Known limitations
21
22 Numeric representation is not strictly compatible to the specification.
23 The specification tells that negative sign (@-@) and decimal sign (@.@) are not
24 to be counted in the length of the field.
25
26 Therefore the following parser will fail even it's legal according to the
27 specification:
28
29 > n_3 :: Parser Primitive
30 > n_3 = numeric `upTo` 3
31 >
32 > parse n_3 "-12.3"
33
34 To be fixed, we have to change the way primitives combinators are built so that
35 the 'upTo' and 'exactly' combinators are aware of the inner parser.
36 -}
37 module Text.Edifact.Parsing.Primitives
38 (
39 -- * Primitives
40 -- ** Simple elements definition
41 alphaNumeric
42 , alpha
43 , numeric
44
45 -- ** Cardinality
46 , exactly
47 , upTo
48 , many
49
50 ) where
51
52 import Text.Edifact.Parsing.Commons
53 import Text.Edifact.Types
54
55 import Data.String (fromString)
56 import qualified Data.Text as T (length)
57 import Text.Parsec (count, lookAhead, many1, noneOf,
58 oneOf)
59 import qualified Text.Parsec as P (many)
60
61 -- | Parser associated with the @an@ notation.
62 alphaNumeric :: Parser Char
63 alphaNumeric = do
64 separators <- sequence [ getSegmentSeparator
65 , getElementSeparator
66 , getCompositeSeparator
67 ]
68 tries [ parseEscape *> parseSegmentSeparator
69 , parseEscape *> parseElementSeparator
70 , parseEscape *> parseCompositeSeparator
71 , parseEscape *> parseEscape
72 , noneOf separators
73 ]
74
75 -- | Parser associated with the @a@ notation.
76 --
77 -- So far it's simply an alias to 'alphaNumeric'.
78 alpha :: Parser Char
79 alpha = alphaNumeric
80
81 -- | Parser associated with the @n@ notation.
82 numeric :: Parser Char
83 numeric = do
84 punctuationSign <- getDecimalSign
85 oneOf (punctuationSign : "0123456789-")
86
87 -- | Combinator to build a parser of primitive which length is unspecified.
88 --
89 -- Correspondance with the Edifact notation:
90 --
91 -- > many alpha # same as a
92 -- > many numeric # same as n
93 -- > many alphaNumeric # same as an
94 many :: Parser Char -> Parser Primitive
95 many = fmap fromString . many1
96
97 -- | Combinator to build a parser of primitive which length is capped.
98 --
99 -- Correspondance with the Edifact notation:
100 --
101 -- > alpha `upTo` 3 # same as a..3
102 -- > numeric `upTo` 3 # same as n..3
103 -- > alphaNumeric `upTo` 3 # same as an..3
104 upTo :: Parser Char -> Int -> Parser Primitive
105 upTo p c =
106 let check t =
107 let c' = T.length t
108 in if c' > c
109 then failWithPosition ("expected up to " <> show c <> " characters, but encountered " <> show c')
110 else pure (String t)
111 maybeEmpty = (<$) mempty . lookAhead
112 in check =<<
113 tries [ maybeEmpty parseSegmentSeparator
114 , maybeEmpty parseElementSeparator
115 , maybeEmpty parseCompositeSeparator
116 , fromString <$> P.many p
117 ]
118
119 -- | Combinator to build a parser of primitive which length is fixed.
120 --
121 -- Correspondance with the Edifact notation:
122 --
123 -- > alpha `exactly` 3 # same as a3
124 -- > numeric `exactly` 3 # same as n3
125 -- > alphaNumeric `exactly` 3 # same as an3
126 exactly :: Parser Char -> Int -> Parser Primitive
127 exactly p c = fromString <$> count c p