Package char-utf8: Theory of Unicode characters

Information

namechar-utf8
version1.0
descriptionTheory of Unicode characters
authorJoe Hurd <joe@gilith.com>
licenseMIT
show Data.Bool
Data.Byte as Byte
Data.Char
Data.List
Data.Pair
Data.Word16 as Word16
Number.Numeral
Parser

Files

Defined Constants

Theorems

isParser UTF8.decoder.parse

UTF8.parseContinuationByte = parseSome UTF8.isContinuationByte

UTF8.decoder = mkParser UTF8.decoder.parse

UTF8.decodeStream = parseStream UTF8.decoder

destParser UTF8.decoder = UTF8.decoder.parse

UTF8.parseTwoContinuationBytes =
  parsePair UTF8.parseContinuationByte UTF8.parseContinuationByte

UTF8.parseThreeContinuationBytes =
  parsePair UTF8.parseContinuationByte UTF8.parseTwoContinuationBytes

parse UTF8.decoder =
  Stream.case Data.Option.none Data.Option.none UTF8.decoder.parse

bs.
    UTF8.decode bs = Stream.toList (UTF8.decodeStream (Stream.fromList bs))

chs. UTF8.encode chs = concat (map UTF8.encoder chs)

b. UTF8.isContinuationByte b Byte.bit b 7 ¬Byte.bit b 6

p0 p1.
    UTF8.encoder.encode1 p0 p1 =
    let
      (λb00.
         let
           (λb01.
              let
                (λb0.
                   let
                     (λb10.
                        let (λb1. b0 :: b1 :: [])
                          (Byte.or (Byte.fromNatural 128) b10))
                     (Byte.and p1 (Byte.fromNatural 63)))
                (Byte.or (Byte.fromNatural 192) (Byte.or b00 b01)))
           (Byte.shiftRight (Byte.and p1 (Byte.fromNatural 192)) 6))
      (Byte.shiftLeft p0 2)

ch.
    UTF8.encoder ch =
    let
      (λ(pl, pos).
         let
           (λp.
              let
                (λ(p0, p1).
                   if p = Byte.fromNatural 0
                   then (if p0 = Byte.fromNatural 0 ¬Byte.bit p1 7
                    then p1 :: []
                    else if Byte.and (Byte.fromNatural 248) p0 =
                    Byte.fromNatural 0 then UTF8.encoder.encode1 p0 p1
                    else UTF8.encoder.encode2 p0 p1)
                   else UTF8.encoder.encode3 p p0 p1)
                (Word16.toBytes (destPosition pos))) (destPlane pl))
      (destChar ch)

b0 s.
    UTF8.decoder.parse b0 s =
    (if Byte.bit b0 7
     then (if Byte.bit b0 6
      then (if Byte.bit b0 5
       then (if Byte.bit b0 4
        then (if Byte.bit b0 3 then Data.Option.none
         else parse
           (partialMap (UTF8.decoder.decode3 b0)
              UTF8.parseThreeContinuationBytes) s)
        else parse
          (partialMap (UTF8.decoder.decode2 b0)
             UTF8.parseTwoContinuationBytes) s)
       else parse
         (partialMap (UTF8.decoder.decode1 b0) UTF8.parseContinuationByte)
         s) else Data.Option.none)
     else let
       (λpl.
          let
            (λpos. let (λch. Data.Option.some (ch, s)) (mkChar (pl, pos)))
            (mkPosition (Word16.fromBytes (Byte.fromNatural 0) b0)))
       (mkPlane (Byte.fromNatural 0)))

b0 b1.
    UTF8.decoder.decode1 b0 b1 =
    let
      (λpl.
         let
           (λp0.
              let
                (λy1.
                   let
                     (λx1.
                        let
                          (λp1.
                             if p0 = Byte.fromNatural 0
                             Byte.< p1 (Byte.fromNatural 128)
                             then Data.Option.none
                             else let
                               (λpos.
                                  let (λch. Data.Option.some ch)
                                    (mkChar (pl, pos)))
                               (mkPosition (Word16.fromBytes p0 p1)))
                          (Byte.or y1 x1))
                     (Byte.and b1 (Byte.fromNatural 63)))
                (Byte.shiftLeft (Byte.and b0 (Byte.fromNatural 3)) 6))
           (Byte.shiftRight (Byte.and b0 (Byte.fromNatural 28)) 2))
      (mkPlane (Byte.fromNatural 0))

p0 p1.
    UTF8.encoder.encode2 p0 p1 =
    let
      (λb00.
         let
           (λb0.
              let
                (λb10.
                   let
                     (λb11.
                        let
                          (λb1.
                             let
                               (λb20.
                                  let (λb2. b0 :: b1 :: b2 :: [])
                                    (Byte.or (Byte.fromNatural 128) b20))
                               (Byte.and p1 (Byte.fromNatural 63)))
                          (Byte.or (Byte.fromNatural 128)
                             (Byte.or b10 b11)))
                     (Byte.shiftRight (Byte.and p1 (Byte.fromNatural 192))
                        6))
                (Byte.shiftLeft (Byte.and p0 (Byte.fromNatural 15)) 2))
           (Byte.or (Byte.fromNatural 224) b00))
      (Byte.shiftRight (Byte.and p0 (Byte.fromNatural 240)) 4)

b0 b1 b2 b3.
    UTF8.decoder.decode3 b0 (b1, b2, b3) =
    let
      (λw.
         let
           (λz.
              let
                (λp.
                   if p = Byte.fromNatural 0
                   Byte.< (Byte.fromNatural 16) p then Data.Option.none
                   else let
                     (λpl.
                        let
                          (λz0.
                             let
                               (λy0.
                                  let
                                    (λp0.
                                       let
                                         (λy1.
                                            let
                                              (λx1.
                                                 let
                                                   (λp1.
                                                      let
                                                        (λpos.
                                                           let
                                                             (λch.
                                                                Data.Option.some
                                                                  ch)
                                                             (mkChar
                                                                (pl, pos)))
                                                        (mkPosition
                                                           (Word16.fromBytes
                                                              p0 p1)))
                                                   (Byte.or y1 x1))
                                              (Byte.and b3
                                                 (Byte.fromNatural 63)))
                                         (Byte.shiftLeft
                                            (Byte.and b2
                                               (Byte.fromNatural 3)) 6))
                                    (Byte.or z0 y0))
                               (Byte.shiftRight
                                  (Byte.and b2 (Byte.fromNatural 60)) 2))
                          (Byte.shiftLeft
                             (Byte.and b1 (Byte.fromNatural 15)) 4))
                     (mkPlane p)) (Byte.or w z))
           (Byte.shiftRight (Byte.and b1 (Byte.fromNatural 48)) 4))
      (Byte.shiftLeft (Byte.and b0 (Byte.fromNatural 7)) 2)

b0 b1 b2.
    UTF8.decoder.decode2 b0 (b1, b2) =
    let
      (λz0.
         let
           (λy0.
              let
                (λp0.
                   if Byte.< p0 (Byte.fromNatural 8)
                   Byte.≤ (Byte.fromNatural 216) p0
                   Byte.≤ p0 (Byte.fromNatural 223) then Data.Option.none
                   else let
                     (λy1.
                        let
                          (λx1.
                             let
                               (λp1.
                                  if p0 = Byte.fromNatural 255
                                  Byte.≤ (Byte.fromNatural 254) p1
                                  then Data.Option.none
                                  else let
                                    (λpl.
                                       let
                                         (λpos.
                                            let (λch. Data.Option.some ch)
                                              (mkChar (pl, pos)))
                                         (mkPosition
                                            (Word16.fromBytes p0 p1)))
                                    (mkPlane (Byte.fromNatural 0)))
                               (Byte.or y1 x1))
                          (Byte.and b2 (Byte.fromNatural 63)))
                     (Byte.shiftLeft (Byte.and b1 (Byte.fromNatural 3)) 6))
                (Byte.or z0 y0))
           (Byte.shiftRight (Byte.and b1 (Byte.fromNatural 60)) 2))
      (Byte.shiftLeft (Byte.and b0 (Byte.fromNatural 15)) 4)

p p0 p1.
    UTF8.encoder.encode3 p p0 p1 =
    let
      (λb00.
         let
           (λb0.
              let
                (λb10.
                   let
                     (λb11.
                        let
                          (λb1.
                             let
                               (λb20.
                                  let
                                    (λb21.
                                       let
                                         (λb2.
                                            let
                                              (λb30.
                                                 let
                                                   (λb3.
                                                      b0 :: b1 :: b2 ::
                                                      b3 :: [])
                                                   (Byte.or
                                                      (Byte.fromNatural
                                                         128) b30))
                                              (Byte.and p1
                                                 (Byte.fromNatural 63)))
                                         (Byte.or (Byte.fromNatural 128)
                                            (Byte.or b20 b21)))
                                    (Byte.shiftRight
                                       (Byte.and p1 (Byte.fromNatural 192))
                                       6))
                               (Byte.shiftLeft
                                  (Byte.and p0 (Byte.fromNatural 15)) 2))
                          (Byte.or (Byte.fromNatural 128)
                             (Byte.or b10 b11)))
                     (Byte.shiftRight (Byte.and p0 (Byte.fromNatural 240))
                        4))
                (Byte.shiftLeft (Byte.and p (Byte.fromNatural 3)) 4))
           (Byte.or (Byte.fromNatural 240) b00))
      (Byte.shiftRight (Byte.and p (Byte.fromNatural 28)) 2)

Input Type Operators

Input Constants

Assumptions

T

x. Stream.isSuffix x x

let = λf x. f x

t. (λx. t x) = t

() = λP. P = λx. T

x. x = x T

() = λp q. p q p

t. (t T) (t F)

x y. fst (x, y) = x

x y. snd (x, y) = y

P x. P x P ((select) P)

f y. (λx. f x) y = f y

x y. Stream.isProperSuffix x y Stream.isSuffix x y

() = λp q. (λf. f p q) = λf. f T T

() = λP. q. (x. P x q) q

f g. f = g x. f x = g x

() = λp q. r. (p r) (q r) r

PAIR'. fn. a0 a1. fn (a0, a1) = PAIR' a0 a1

t1 t2. (if T then t1 else t2) = t1 (if F then t1 else t2) = t2

(a. mkParser (destParser a) = a)
  r. isParser r destParser (mkParser r) = r

x. x = Stream.error x = Stream.eof a0 a1. x = Stream.stream a0 a1

(b f. Data.Option.case b f Data.Option.none = b)
  b f a. Data.Option.case b f (Data.Option.some a) = f a

p s.
    parse p s = Data.Option.none
    b s'.
      parse p s = Data.Option.some (b, s') Stream.isProperSuffix s' s

p.
    isParser p
    x xs. Data.Option.case T (λ(y, xs'). Stream.isSuffix xs' xs) (p x xs)

(p. parse p Stream.error = Data.Option.none)
  (p. parse p Stream.eof = Data.Option.none)
  p a s. parse p (Stream.stream a s) = destParser p a s

t. (T t t) (t T T) (F t T) (t t T) (t F ¬t)

(e b f. Stream.case e b f Stream.error = e)
  (e b f. Stream.case e b f Stream.eof = b)
  e b f a s. Stream.case e b f (Stream.stream a s) = f a s