{-| Module : Text.Edifact.Parsing.Primitives Description : Low level combinators This module let you build parsers for primitive values, ie. values contained in a simple element, either text or number. = Examples To parse a text of 3 characters (@an3@ in standard Edifact representation): > an3 :: Parser Primitive > an3 = alphaNumeric `exactly` 3 To parse a text of up to 10 characters (@an..10@ in standard Edifact representation): > an_10 :: Parser Primitive > an_10 = alphaNumeric `upTo` 10 = Known limitations Numeric representation is not strictly compatible to the specification. The specification tells that negative sign (@-@) and decimal sign (@.@) are not to be counted in the length of the field. Therefore the following parser will fail even it's legal according to the specification: > n_3 :: Parser Primitive > n_3 = numeric `upTo` 3 > > parse n_3 "-12.3" To be fixed, we have to change the way primitives combinators are built so that the 'upTo' and 'exactly' combinators are aware of the inner parser. -} module Text.Edifact.Parsing.Primitives ( -- * Primitives -- ** Simple elements definition alphaNumeric , alpha , numeric -- ** Cardinality , exactly , upTo , many ) where import Text.Edifact.Parsing.Commons import Text.Edifact.Types import Data.String (fromString) import qualified Data.Text as T (length) import Text.Parsec (count, lookAhead, many1, noneOf, oneOf) import qualified Text.Parsec as P (many) -- | Parser associated with the @an@ notation. alphaNumeric :: Parser Char alphaNumeric = do separators <- sequence [ getSegmentSeparator , getElementSeparator , getCompositeSeparator ] tries [ parseEscape *> parseSegmentSeparator , parseEscape *> parseElementSeparator , parseEscape *> parseCompositeSeparator , parseEscape *> parseEscape , noneOf separators ] -- | Parser associated with the @a@ notation. -- -- So far it's simply an alias to 'alphaNumeric'. alpha :: Parser Char alpha = alphaNumeric -- | Parser associated with the @n@ notation. numeric :: Parser Char numeric = do punctuationSign <- getDecimalSign oneOf (punctuationSign : "0123456789-") -- | Combinator to build a parser of primitive which length is unspecified. -- -- Correspondance with the Edifact notation: -- -- > many alpha # same as a -- > many numeric # same as n -- > many alphaNumeric # same as an many :: Parser Char -> Parser Primitive many = fmap fromString . many1 -- | Combinator to build a parser of primitive which length is capped. -- -- Correspondance with the Edifact notation: -- -- > alpha `upTo` 3 # same as a..3 -- > numeric `upTo` 3 # same as n..3 -- > alphaNumeric `upTo` 3 # same as an..3 upTo :: Parser Char -> Int -> Parser Primitive upTo p c = let check t = let c' = T.length t in if c' > c then failWithPosition ("expected up to " <> show c <> " characters, but encountered " <> show c') else pure (String t) maybeEmpty = (<$) mempty . lookAhead in check =<< tries [ maybeEmpty parseSegmentSeparator , maybeEmpty parseElementSeparator , maybeEmpty parseCompositeSeparator , fromString <$> P.many p ] -- | Combinator to build a parser of primitive which length is fixed. -- -- Correspondance with the Edifact notation: -- -- > alpha `exactly` 3 # same as a3 -- > numeric `exactly` 3 # same as n3 -- > alphaNumeric `exactly` 3 # same as an3 exactly :: Parser Char -> Int -> Parser Primitive exactly p c = fromString <$> count c p