-- | -- References: -- - -- -- @ -- URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] -- -- hier-part = "//" authority path-abempty -- / path-absolute -- / path-rootless -- / path-empty -- -- URI-reference = URI / relative-ref -- -- absolute-URI = scheme ":" hier-part [ "?" query ] -- -- relative-ref = relative-part [ "?" query ] [ "#" fragment ] -- -- relative-part = "//" authority path-abempty -- / path-absolute -- / path-noscheme -- / path-empty -- -- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) -- -- authority = [ userinfo "@" ] host [ ":" port ] -- userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) -- host = IP-literal / IPv4address / reg-name -- port = *DIGIT -- -- IP-literal = "[" ( IPv6address / IPvFuture ) "]" -- -- IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) -- -- IPv6address = 6( h16 ":" ) ls32 -- / "::" 5( h16 ":" ) ls32 -- / [ h16 ] "::" 4( h16 ":" ) ls32 -- / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 -- / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 -- / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 -- / [ *4( h16 ":" ) h16 ] "::" ls32 -- / [ *5( h16 ":" ) h16 ] "::" h16 -- / [ *6( h16 ":" ) h16 ] "::" -- -- h16 = 1*4HEXDIG -- ls32 = ( h16 ":" h16 ) / IPv4address -- IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet -- dec-octet = DIGIT ; 0-9 -- / %x31-39 DIGIT ; 10-99 -- / "1" 2DIGIT ; 100-199 -- / "2" %x30-34 DIGIT ; 200-249 -- / "25" %x30-35 ; 250-255 -- -- reg-name = *( unreserved / pct-encoded / sub-delims ) -- -- path = path-abempty ; begins with "/" or is empty -- / path-absolute ; begins with "/" but not "//" -- / path-noscheme ; begins with a non-colon segment -- / path-rootless ; begins with a segment -- / path-empty ; zero characters -- -- path-abempty = *( "/" segment ) -- path-absolute = "/" [ segment-nz *( "/" segment ) ] -- path-noscheme = segment-nz-nc *( "/" segment ) -- path-rootless = segment-nz *( "/" segment ) -- path-empty = 0 -- -- segment = *pchar -- segment-nz = 1*pchar -- segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) -- ; non-zero-length segment without any colon ":" -- -- pchar = unreserved / pct-encoded / sub-delims / ":" / "@" -- -- query = *( pchar / "/" / "?" ) -- -- fragment = *( pchar / "/" / "?" ) -- -- pct-encoded = "%" HEXDIG HEXDIG -- -- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -- reserved = gen-delims / sub-delims -- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" -- sub-delims = "!" / "$" / "&" / "'" / "(" / ")" -- / "*" / "+" / "," / ";" / "=" -- @ module Iri.CodePointPredicates.Rfc3986 where import Iri.CodePointPredicates.Core import Iri.Prelude hiding (Predicate, inRange, (&&&), (|||)) -- | -- Reference: -- -- @ -- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -- @ unreserved :: Predicate unreserved = asciiAlphanumeric ||| oneOfChars "-._~" subDelims :: Predicate subDelims = oneOfChars "!$&'()*+,;=" {-# NOINLINE scheme #-} scheme :: Predicate scheme = cached $ asciiAlphanumeric ||| oneOfChars "+.-" {-# NOINLINE domainLabel #-} domainLabel :: Predicate domainLabel = cached $ asciiAlphanumeric ||| oneOfChars "-_~" {- pchar = unreserved / pct-encoded / sub-delims / ":" / "@" -} {-# NOINLINE unencodedPathSegment #-} unencodedPathSegment :: Predicate unencodedPathSegment = cached $ unreserved ||| subDelims ||| oneOfChars ":@" -- | -- Reference: -- -- @ -- query = *( pchar / "/" / "?" ) -- @ -- -- Notice that we've added the "|" char, because some real life URIs seem to contain it. -- Also we've excluded the '+' char, because it gets decoded as a space char. {-# NOINLINE unencodedQuery #-} unencodedQuery :: Predicate unencodedQuery = cached $ (unencodedPathSegment ||| oneOfChars "/?|[]\"'") &&& (/= 43) unencodedFragment :: Predicate unencodedFragment = unencodedQuery {-# NOINLINE unencodedQueryComponent #-} unencodedQueryComponent :: Predicate unencodedQueryComponent = cached $ unencodedQuery &&& not . oneOfChars "=&;" {-# NOINLINE unencodedUserInfoComponent #-} unencodedUserInfoComponent :: Predicate unencodedUserInfoComponent = cached $ unreserved ||| subDelims