Formalization of Floating-Point Arithmetic

Full float theory (with infinities and NaN).

A note on intended semantics: we use the same idea as the SMTLIB floating point theory, that defers any inconsistencies to the "parent" document. Hence, in doubt, the correct axiomatisation is one that implements [BTRW14] "An Automatable Formal Semantics for IEEE-754 Floating-Point Arithmetic", which in turn defers any inconsistencies to IEEE-754.

This theory is split into two parts: the first part talks about IEEE operations and this is what you should use as a user, the second part is internal only and is an axiomatisation for the provers that do not natively support floating point. You should not use any symbols you find there in your verification conditions as solvers with native floating point support will leave them uninterpreted.

Rounding Modes

module RoundingMode
  type mode = RNE | RNA | RTP | RTN | RTZ

RNE : Round Nearest ties to Even
RNA : Round Nearest ties to Away
RTP : Round Towards Positive
RTN : Round Towards Negative
RTZ : Round Towards Zero

  predicate to_nearest (m:mode) = m = RNE \/ m = RNA
end

module GenericFloat

  use int.Int
  use bv.Pow2int
  use real.Abs as Abs
  use real.FromInt as FromInt
  use real.Truncate as Truncate
  use real.RealInfix
  use export RoundingMode

Part I - Public Interface

  constant eb : int

the number of bits in the exponent.

  constant sb : int

the number of bits in the significand, including the hidden bit.

  axiom eb_gt_1: 1 < eb
  axiom sb_gt_1: 1 < sb

Sorts

  type t

abstract type to denote floating-point numbers, including the special values for infinities and NaNs

Constructors and Constants

  val constant zeroF     : t

+0.0

  (* exp_bias = 2^(eb - 1) - 1 *)
  (* max_finite_exp = 2^sb - 2 - exp_bias = exp_bias *)
  (* max_significand = (2^eb + 2^eb - 1) * 2^(1-eb) *)
  (* max_value = (2^eb + 2^eb - 1) * 2^(1-eb) * 2 ^ max_finite_exp *)
  (* [m;x] = ( 1 + m * 2^(1-eb) ) * 2^( x - exp_bias ) *)
  (* max_value = [2^(eb-1); 2^sb - 2] *)

Operators

  val function add mode t t : t
  val function sub mode t t : t
  val function mul mode t t : t
  val function div mode t t : t

The four basic operations, rounded in the given mode

  val function abs         t   : t

Absolute value

  val function neg         t   : t

Opposite

  val function fma  mode t t t : t

Fused multiply-add: x * y + z

  val function sqrt mode   t   : t

Square root

  let function (.-_) (x:t)   : t = neg x
  let function (.+)  (x y:t) : t = add RNE x y
  let function (.-)  (x y:t) : t = sub RNE x y
  let function (.*)  (x y:t) : t = mul RNE x y
  let function (./)  (x y:t) : t = div RNE x y

Notations for operations in the default mode RNE

  val function roundToIntegral mode t : t

Rounding to an integer

  function min       t t : t
  function max       t t : t

Minimum and Maximum

Note that we have to follow IEEE-754 and SMTLIB here. Two things to note in particular:

1) min(-0, 0) is either 0 or -0, there is a choice

2) if either argument is NaN then the other argument is returned

Due to the unclear status of min and max in IEEE norm, we intentionally not provide these function as "val"s to be used in programs

Comparisons

  val predicate le t t
  val predicate lt t t
  let predicate ge (x:t) (y:t) = le y x
  let predicate gt (x:t) (y:t) = lt y x
  val predicate eq t t

equality on floats, different from = since not (eq NaN NaN)

  let predicate (.<=) (x:t) (y:t) = le x y
  let predicate (.<)  (x:t) (y:t) = lt x y
  let predicate (.>=) (x:t) (y:t) = ge x y
  let predicate (.>)  (x:t) (y:t) = gt x y
  let predicate (.=)  (x:t) (y:t) = eq x y

Notations

Classification of numbers

  predicate is_normal    t
  predicate is_subnormal t
  val predicate is_zero      t
  val predicate is_infinite  t
  val predicate is_nan       t
  val predicate is_positive  t
  val predicate is_negative  t

  predicate is_finite    t

helper predicate for zeros, normals and subnormals. not defined so that the axiomatisation below can use it without talking about subnormals

  predicate is_plus_infinity  (x:t) = is_infinite x /\ is_positive x
  predicate is_minus_infinity (x:t) = is_infinite x /\ is_negative x
  predicate is_plus_zero      (x:t) = is_zero x /\ is_positive x
  predicate is_minus_zero     (x:t) = is_zero x /\ is_negative x
  predicate is_not_nan        (x:t) = is_finite x \/ is_infinite x

  axiom is_not_nan: forall x:t. is_not_nan x <-> not (is_nan x)

  axiom is_not_finite: forall x:t.
    not (is_finite x) <-> (is_infinite x \/ is_nan x)

Conversions from other sorts

  (* from bitvec binary interchange                           *)
  (*   partly done with from_binary (for literals only)       *)
  (* from another fp                 - see FloatConverter     *)
  (* from real                                                *)
  (*   partly done with (!) (for literals only)               *)
  (* from unsigned integer bitvector - see Float_BV_Converter *)
  (* from signed integer bitvector                            *)

Conversions to other sorts

  (* to unsigned integer bitvector   - see Float_BV_Converter *)
  (* to signed integer bitvector                              *)
  (* to real *)
  function to_real   t    : real

Part II - Private Axiomatisation

Constructors and Constants

  axiom zeroF_is_positive : is_positive zeroF
  axiom zeroF_is_zero     : is_zero zeroF

  axiom zero_to_real : forall x [is_zero x].
    is_zero x <-> is_finite x /\ to_real x = 0.0

Conversions from other sorts

  (* with mathematical int *)
  (* note that these conversions do not feature in SMTLIB *)

  (* intended semantics for of_int are the same as (_ to_fp eb sb) with a   *)
  (* suitably sized bitvector, large enough to hold x                       *)
  (* note values >= than the below should result in infinities              *)
  (*    float32 : 0x1ffffff * 2^103                                         *)
  (*    float64 : 0x3fffffffffffff * 2^970                                  *)
  (* also note that this function never yields a subnormal, or a NaN, or -0 *)
  function of_int (m:mode) (x:int) : t

Conversions to other sorts

  (* Intended semantics for to_int are the same as (_ fp.to_sbv) with a    *)
  (* suitably sized bitvector. Safe minimum sizes are given below:         *)
  (*    float32 : 129                                                      *)
  (*    float64 : 1025                                                     *)
  (* In particular this function should be uninterpreted for infinities    *)
  (* and NaN. Take care that no conclusion can be made on the result based *)
  (* on the size of the bitvector chosen in those cases, i.e. this should  *)
  (* not hold:                                                             *)
  (*    to_int +INF < 2 ** 2048     // nope                                *)
  (*    to_int +INF > 0             // nope                                *)
  function to_int (m:mode) (x:t) : int

  axiom zero_of_int : forall m. zeroF = of_int m 0

Arithmetic

  (* The intended meaning for round is the rounding for floating point as  *)
  (* described on p17 of IEEE-754. For results where the corresponding     *)
  (* floating point result would be infinity or NaN this function should   *)
  (* be uninterpreted.                                                     *)
  (*                                                                       *)
  (* Note that this means round (+INF) > 0 is not true.                    *)
  (* Note also this means round (2*INF) > round (INF) is not true either.  *)
  function round mode real : real

  constant max_real : real (* defined when cloning *)
  constant max_int  : int  (* defined when cloning *)

  constant emax : int = pow2 (eb - 1)

  axiom max_int_spec : max_int = pow2 emax - pow2 (emax - sb)
  axiom max_real_int: max_real = FromInt.from_int max_int

  predicate in_range (x:real) = -. max_real <=. x <=. max_real

  predicate in_int_range (i:int) = - max_int <= i <= max_int

  axiom is_finite: forall x:t. is_finite x -> in_range (to_real x)

  (* used as a condition to propagate is_finite *)
  predicate no_overflow (m:mode) (x:real) = in_range (round m x)

  (* Axioms on round *)

  axiom Bounded_real_no_overflow [@W:non_conservative_extension:N] :
    forall m:mode, x:real. in_range x -> no_overflow m x

  axiom Round_monotonic :
    forall m:mode, x y:real. x <=. y -> round m x <=. round m y

  axiom Round_idempotent :
    forall m1 m2:mode, x:real. round m1 (round m2 x) = round m2 x

  axiom Round_to_real :
    forall m:mode, x:t. is_finite x -> round m (to_real x) = to_real x

  axiom Round_down_le:
    forall x:real. round RTN x <=. x
  axiom Round_up_ge:
    forall x:real. round RTP x >=. x
  axiom Round_down_neg:
    forall x:real. round RTN (-.x) = -. round RTP x
  axiom Round_up_neg:
    forall x:real. round RTP (-.x) = -. round RTN x

rounding up and down

  (* The biggest representable integer whose predecessor (i.e. -1) is  representable *)
  constant pow2sb : int (* defined when cloning *)
  axiom pow2sb: pow2sb = pow2 sb

  (* range in which every integer is representable *)
  predicate in_safe_int_range (i: int) = - pow2sb <= i <= pow2sb

  (* round and integers *)

  axiom Exact_rounding_for_integers:
    forall m:mode, i:int.
      in_safe_int_range i ->
        round m (FromInt.from_int i) = FromInt.from_int i

Comparisons

Comparison predicates

  predicate same_sign (x y : t) =
    (is_positive x /\ is_positive y) \/ (is_negative x /\ is_negative y)
  predicate diff_sign (x y : t) =
    (is_positive x /\ is_negative y) \/ (is_negative x /\ is_positive y)

  axiom feq_eq: forall x y.
    is_finite x -> is_finite y -> not (is_zero x) -> x .= y -> x = y

  axiom eq_feq: forall x y.
    is_finite x -> is_finite y -> x = y -> x .= y

  axiom eq_refl: forall x. is_finite x -> x .= x

  axiom eq_sym :
    forall x y. x .= y -> y .= x

  axiom eq_trans :
    forall x y z. x .= y -> y .= z -> x .= z

  axiom eq_zero: zeroF .= (.- zeroF)

  axiom eq_to_real_finite: forall x y.
    is_finite x /\ is_finite y -> (x .= y <-> to_real x = to_real y)

  axiom eq_special: forall x y. x .= y ->
       (is_not_nan x  /\ is_not_nan y
    /\ ((is_finite x /\ is_finite y)
        \/ (is_infinite x /\ is_infinite y /\ same_sign x y)))

  axiom lt_finite: forall x y [lt x y].
    is_finite x /\ is_finite y -> (lt x y <-> to_real x <. to_real y)

  axiom le_finite: forall x y [le x y].
    is_finite x /\ is_finite y -> (le x y <-> to_real x <=. to_real y)

  lemma le_lt_trans:
    forall x y z:t. x .<= y /\ y .< z -> x .< z

  lemma lt_le_trans:
    forall x y z:t. x .< y /\ y .<= z -> x .< z

  lemma le_ge_asym:
    forall x y:t. x .<= y /\ x .>= y -> x .= y

  lemma not_lt_ge: forall x y:t.
    not (x .< y) /\ is_not_nan x /\ is_not_nan y -> x .>= y

  lemma not_gt_le: forall x y:t.
    not (x .> y) /\ is_not_nan x /\ is_not_nan y -> x .<= y

  axiom le_special: forall x y [le x y]. le x y ->
      ((is_finite x /\ is_finite y)
   \/ ((is_minus_infinity x /\ is_not_nan y)
   \/  (is_not_nan x /\ is_plus_infinity y)))

  axiom lt_special: forall x y [lt x y]. lt x y ->
      ((is_finite x /\ is_finite y)
   \/ ((is_minus_infinity x /\ is_not_nan y /\ not (is_minus_infinity y))
   \/  (is_not_nan x /\ not (is_plus_infinity x) /\ is_plus_infinity y)))

  axiom lt_lt_finite: forall x y z. lt x y -> lt y z -> is_finite y

  (*  lemmas on sign *)
  axiom positive_to_real: forall x[is_positive x|to_real x >=. 0.0].
    is_finite x -> is_positive x -> to_real x >=. 0.0
  axiom to_real_positive: forall x[is_positive x].
    is_finite x -> to_real x >. 0.0 -> is_positive x

  axiom negative_to_real: forall x [is_negative x|to_real x <=. 0.0].
    is_finite x -> is_negative x -> to_real x <=. 0.0
  axiom to_real_negative: forall x [is_negative x].
    is_finite x -> to_real x <. 0.0 -> is_negative x

  axiom negative_xor_positive: forall x.
    not (is_positive x /\ is_negative x)
  axiom negative_or_positive: forall x.
    is_not_nan x -> is_positive x \/ is_negative x

  lemma diff_sign_trans:
    forall x y z:t. (diff_sign x y /\ diff_sign y z) -> same_sign x z

  lemma diff_sign_product:
    forall x y:t.
      (is_finite x /\ is_finite y /\ to_real x *. to_real y <. 0.0) ->
        diff_sign x y

  lemma same_sign_product:
    forall x y:t.
      (is_finite x /\ is_finite y /\ same_sign x y) ->
        to_real x *. to_real y >=. 0.0

  predicate product_sign (z x y: t) =
    (same_sign x y -> is_positive z) /\ (diff_sign x y -> is_negative z)

Overflow

  (* This predicate is used to tell what is the result of a rounding
     in case of overflow in the axioms specifying add/sub/mul and fma
     *)
  predicate overflow_value (m:mode) (x:t) =
    match m with
    | RTN -> if is_positive x then is_finite x /\ to_real x = max_real
                              else is_infinite x
    | RTP -> if is_positive x then is_infinite x
                              else is_finite x /\ to_real x = -. max_real
    | RTZ -> if is_positive x then is_finite x /\ to_real x = max_real
                              else is_finite x /\ to_real x = -. max_real
    | (RNA | RNE) -> is_infinite x
    end

  (* This predicate is used to tell what is the sign of zero in the
     axioms specifying add and sub *)
  predicate sign_zero_result (m:mode) (x:t) =
    is_zero x ->
    match m with
    | RTN -> is_negative x
    | _   -> is_positive x
    end

binary operations

  axiom add_finite: forall m:mode, x y:t [add m x y].
    is_finite x -> is_finite y -> no_overflow m (to_real x +. to_real y) ->
    is_finite (add m x y) /\
    to_real (add m x y) = round m (to_real x +. to_real y)

  lemma add_finite_rev: forall m:mode, x y:t [add m x y].
    is_finite (add m x y) ->
    is_finite x /\ is_finite y

  lemma add_finite_rev_n: forall m:mode, x y:t [add m x y].
    to_nearest m ->
    is_finite (add m x y) ->
    no_overflow m (to_real x +. to_real y) /\
    to_real (add m x y) = round m (to_real x +. to_real y)

  axiom sub_finite: forall m:mode, x y:t [sub m x y].
    is_finite x -> is_finite y -> no_overflow m (to_real x -. to_real y) ->
    is_finite (sub m x y) /\
    to_real (sub m x y) = round m (to_real x -. to_real y)

  lemma sub_finite_rev: forall m:mode, x y:t [sub m x y].
    is_finite (sub m x y) ->
    is_finite x /\ is_finite y

  lemma sub_finite_rev_n: forall m:mode, x y:t [sub m x y].
    to_nearest m ->
    is_finite (sub m x y) ->
    no_overflow m (to_real x -. to_real y) /\
    to_real (sub m x y) = round m (to_real x -. to_real y)

  axiom mul_finite: forall m:mode, x y:t [mul m x y].
    is_finite x -> is_finite y -> no_overflow m (to_real x *. to_real y) ->
    is_finite (mul m x y) /\
    to_real (mul m x y) = round m (to_real x *. to_real y)

  lemma mul_finite_rev: forall m:mode, x y:t [mul m x y].
    is_finite (mul m x y) ->
    is_finite x /\ is_finite y

  lemma mul_finite_rev_n: forall m:mode, x y:t [mul m x y].
    to_nearest m ->
    is_finite (mul m x y) ->
    no_overflow m (to_real x *. to_real y) /\
    to_real (mul m x y) = round m (to_real x *. to_real y)

  axiom div_finite: forall m:mode, x y:t [div m x y].
    is_finite x -> is_finite y ->
    not is_zero y -> no_overflow m (to_real x /. to_real y) ->
    is_finite (div m x y) /\
    to_real (div m x y) = round m (to_real x /. to_real y)

  lemma div_finite_rev: forall m:mode, x y:t [div m x y].
    is_finite (div m x y) ->
    (is_finite x /\ is_finite y /\ not is_zero y) \/
    (is_finite x /\ is_infinite y /\ to_real (div m x y) = 0.)

  lemma div_finite_rev_n: forall m:mode, x y:t [div m x y].
    to_nearest m ->
    is_finite (div m x y) -> is_finite y ->
    no_overflow m (to_real x /. to_real y) /\
    to_real (div m x y) = round m (to_real x /. to_real y)

  axiom neg_finite: forall x:t [neg x].
    is_finite x ->
    is_finite (neg x) /\
    to_real (neg x) = -. to_real x

  lemma neg_finite_rev: forall x:t [neg x].
    is_finite (neg x) ->
    is_finite x /\
    to_real (neg x) = -. to_real x

  axiom abs_finite: forall x:t [abs x].
    is_finite x ->
    is_finite (abs x) /\
    to_real (abs x) = Abs.abs (to_real x) /\
    is_positive (abs x)

  lemma abs_finite_rev: forall x:t [abs x].
    is_finite (abs x) ->
    is_finite x /\
    to_real (abs x) = Abs.abs (to_real x)

  axiom abs_universal : forall x:t [abs x]. not (is_negative (abs x))

  axiom fma_finite: forall m:mode, x y z:t [fma m x y z].
    is_finite x -> is_finite y -> is_finite z ->
    no_overflow m (to_real x *. to_real y +. to_real z) ->
    is_finite (fma m x y z) /\
    to_real (fma m x y z) = round m (to_real x *. to_real y +. to_real z)

  lemma fma_finite_rev: forall m:mode, x y z:t [fma m x y z].
    is_finite (fma m x y z) ->
    is_finite x /\ is_finite y /\ is_finite z

  lemma fma_finite_rev_n: forall m:mode, x y z:t [fma m x y z].
    to_nearest m ->
    is_finite (fma m x y z) ->
    no_overflow m (to_real x *. to_real y +. to_real z) /\
    to_real (fma m x y z) = round m (to_real x *. to_real y +. to_real z)

  use real.Square as S

  axiom sqrt_finite: forall m:mode, x:t [sqrt m x].
    is_finite x -> to_real x >=. 0. ->
    is_finite (sqrt m x) /\
    to_real (sqrt m x) = round m (S.sqrt (to_real x))

  lemma sqrt_finite_rev: forall m:mode, x:t [sqrt m x].
    is_finite (sqrt m x) ->
    is_finite x /\ to_real x >=. 0. /\
    to_real (sqrt m x) = round m (S.sqrt (to_real x))

  predicate same_sign_real (x:t) (r:real) =
    (is_positive x /\ r >. 0.0) \/ (is_negative x /\ r <. 0.0)

  axiom add_special: forall m:mode, x y:t [add m x y].
    let r = add m x y in
    (is_nan x \/ is_nan y -> is_nan r)
    /\
    (is_finite x /\ is_infinite y -> is_infinite r /\ same_sign r y)
    /\
    (is_infinite x /\ is_finite y -> is_infinite r /\ same_sign r x)
    /\
    (is_infinite x /\ is_infinite y /\ same_sign x y
      -> is_infinite r /\ same_sign r x)
    /\
    (is_infinite x /\ is_infinite y /\ diff_sign x y -> is_nan r)
    /\
    (is_finite x /\ is_finite y /\ not no_overflow m (to_real x +. to_real y)
      -> same_sign_real r (to_real x +. to_real y) /\ overflow_value m r)
    /\
    (is_finite x /\ is_finite y
      -> if same_sign x y then same_sign r x else sign_zero_result m r)

  axiom sub_special: forall m:mode, x y:t [sub m x y].
    let r = sub m x y in
    (is_nan x \/ is_nan y -> is_nan r)
    /\
    (is_finite x /\ is_infinite y -> is_infinite r /\ diff_sign r y)
    /\
    (is_infinite x /\ is_finite y -> is_infinite r /\ same_sign r x)
    /\
    (is_infinite x /\ is_infinite y /\ same_sign x y -> is_nan r)
    /\
    (is_infinite x /\ is_infinite y /\ diff_sign x y
      -> is_infinite r /\ same_sign r x)
    /\
    (is_finite x /\ is_finite y /\ not no_overflow m (to_real x -. to_real y)
      -> same_sign_real r (to_real x -. to_real y) /\ overflow_value m r)
    /\
    (is_finite x /\ is_finite y
      -> if diff_sign x y then same_sign r x else sign_zero_result m r)

  axiom mul_special: forall m:mode, x y:t [mul m x y].
    let r = mul m x y in
       (is_nan x \/ is_nan y -> is_nan r)
    /\ (is_zero x /\ is_infinite y -> is_nan r)
    /\ (is_finite x /\ is_infinite y /\ not (is_zero x)
         -> is_infinite r)
    /\ (is_infinite x /\ is_zero y -> is_nan r)
    /\ (is_infinite x /\ is_finite y /\ not (is_zero  y)
         -> is_infinite r)
    /\ (is_infinite x /\ is_infinite y -> is_infinite r)
    /\ (is_finite x /\ is_finite y /\ not no_overflow m (to_real x *. to_real y)
         -> overflow_value m r)
    /\ (not is_nan r -> product_sign r x y)

  axiom div_special: forall m:mode, x y:t [div m x y].
    let r = div m x y in
       (is_nan x \/ is_nan y -> is_nan r)
    /\ (is_finite x /\ is_infinite y -> is_zero r)
    /\ (is_infinite x /\ is_finite y -> is_infinite r)
    /\ (is_infinite x /\ is_infinite y -> is_nan r)
    /\ (is_finite x /\ is_finite y /\ not (is_zero y) /\
        not no_overflow m (to_real x /. to_real y)
         -> overflow_value m r)
    /\ (is_finite x /\ is_zero y /\ not (is_zero x)
         -> is_infinite r)
    /\ (is_zero x /\ is_zero y -> is_nan r)
    /\ (not is_nan r -> product_sign r x y)

  axiom neg_special: forall x:t [neg x].
       (is_nan x -> is_nan (neg x))
    /\ (is_infinite x -> is_infinite (neg x))
    /\ (not is_nan x -> diff_sign x (neg x))

  axiom abs_special: forall x:t [abs x].
       (is_nan x -> is_nan (abs x))
    /\ (is_infinite x -> is_infinite (abs x))
    /\ (not is_nan x -> is_positive (abs x))

  axiom fma_special: forall m:mode, x y z:t [fma m x y z].
    let r = fma m x y z in
       (is_nan x \/ is_nan y \/ is_nan z -> is_nan r)
    /\ (is_zero x /\ is_infinite y -> is_nan r)
    /\ (is_infinite x /\ is_zero y -> is_nan r)
    /\ (is_finite x /\ not (is_zero x) /\ is_infinite y /\ is_finite z
        -> is_infinite r /\ product_sign r x y)
    /\ (is_finite x /\ not (is_zero x) /\ is_infinite y /\ is_infinite z
        -> (if product_sign z x y then is_infinite r /\ same_sign r z
            else is_nan r))
    /\ (is_infinite x /\ is_finite y /\ not (is_zero y) /\ is_finite z
        -> is_infinite r /\ product_sign r x y)
    /\ (is_infinite x /\ is_finite y /\ not (is_zero y) /\ is_infinite z
        -> (if product_sign z x y then is_infinite r /\ same_sign r z
            else is_nan r))
    /\ (is_infinite x /\ is_infinite y /\ is_finite z
        -> is_infinite r /\ product_sign r x y)
    /\ (is_finite x /\ is_finite y /\ is_infinite z
        -> is_infinite r /\ same_sign r z)
    /\ (is_infinite x /\ is_infinite y /\ is_infinite z
        -> (if product_sign z x y then is_infinite r /\ same_sign r z
            else is_nan r))
    /\ (is_finite x /\ is_finite y /\ is_finite z /\
        not no_overflow m (to_real x *. to_real y +. to_real z)
        -> same_sign_real r (to_real x *. to_real y +. to_real z)
        /\ overflow_value m r)
    /\ (is_finite x /\ is_finite y /\ is_finite z
        -> if product_sign z x y then same_sign r z
           else (to_real x *. to_real y +. to_real z = 0.0 ->
                 if m = RTN then is_negative r else is_positive r))

  axiom sqrt_special: forall m:mode, x:t [sqrt m x].
    let r = sqrt m x in
       (is_nan x -> is_nan r)
    /\ (is_plus_infinity x -> is_plus_infinity r)
    /\ (is_minus_infinity x -> is_nan r)
    /\ (is_finite x /\ to_real x <. 0.0 -> is_nan r)
    /\ (is_zero x -> same_sign r x)
    /\ (is_finite x /\ to_real x >. 0.0 -> is_positive r)

  (* exact arithmetic with integers *)

  axiom of_int_add_exact: forall m n, i j.
    in_safe_int_range i -> in_safe_int_range j ->
    in_safe_int_range (i + j) -> eq (of_int m (i + j)) (add n (of_int m i) (of_int m j))

  axiom of_int_sub_exact: forall m n, i j.
    in_safe_int_range i -> in_safe_int_range j ->
    in_safe_int_range (i - j) -> eq (of_int m (i - j)) (sub n (of_int m i) (of_int m j))

  axiom of_int_mul_exact: forall m n, i j.
    in_safe_int_range i -> in_safe_int_range j ->
    in_safe_int_range (i * j) -> eq (of_int m (i * j)) (mul n (of_int m i) (of_int m j))

  (* min and max *)

  lemma Min_r : forall x y:t. y .<= x -> (min x y) .= y
  lemma Min_l : forall x y:t. x .<= y -> (min x y) .= x
  lemma Max_r : forall x y:t. y .<= x -> (max x y) .= x
  lemma Max_l : forall x y:t. x .<= y -> (max x y) .= y

  (* _____________ *)

  use real.Truncate as Truncate

  (* This predicate specify if a float is finite and is an integer *)
  predicate is_int (x:t)

characterizing integers

  (* by construction *)
  axiom zeroF_is_int: is_int zeroF

  axiom of_int_is_int: forall m, x.
    in_int_range x -> is_int (of_int m x)

  axiom big_float_is_int: forall m i.
    is_finite i ->
    i .<= neg (of_int m pow2sb) \/ (of_int m pow2sb) .<= i ->
      is_int i

  axiom roundToIntegral_is_int: forall m:mode, x:t. is_finite x ->
    is_int (roundToIntegral m x)

  (* by propagation *)
  axiom eq_is_int: forall x y. eq x y -> is_int x -> is_int y

  axiom add_int: forall x y m. is_int x -> is_int y ->
    is_finite (add m x y) -> is_int (add m x y)

  axiom sub_int: forall x y m. is_int x -> is_int y ->
    is_finite (sub m x y) -> is_int (sub m x y)

  axiom mul_int: forall x y m. is_int x -> is_int y ->
    is_finite (mul m x y) -> is_int (mul m x y)

  axiom fma_int: forall x y z m. is_int x -> is_int y -> is_int z ->
    is_finite (fma m x y z) -> is_int (fma m x y z)

  axiom neg_int: forall x. is_int x -> is_int (neg x)

  axiom abs_int: forall x. is_int x -> is_int (abs x)

basic properties of float integers

  axiom is_int_of_int: forall x m m'.
    is_int x -> eq x (of_int m' (to_int m x))

  axiom is_int_to_int: forall m x.
    is_int x -> in_int_range (to_int m x)

  axiom is_int_is_finite: forall x.
    is_int x -> is_finite x

  axiom int_to_real: forall m x.
    is_int x -> to_real x = FromInt.from_int (to_int m x)

(*  axiom int_mode: forall m1 m2 x.
    is_int x -> to_int m1 x = to_int m2 x  etc ...*)

rounding ints

  axiom truncate_int: forall m:mode, i:t. is_int i ->
    roundToIntegral m i .= i

truncate

  axiom truncate_neg: forall x:t.
    is_finite x -> is_negative x -> roundToIntegral RTZ x = roundToIntegral RTP x

  axiom truncate_pos: forall x:t.
    is_finite x -> is_positive x -> roundToIntegral RTZ x = roundToIntegral RTN x

ceil

  axiom ceil_le: forall x:t. is_finite x -> x .<= (roundToIntegral RTP x)

  axiom ceil_lest: forall x y:t. x .<= y /\ is_int y -> (roundToIntegral RTP x) .<= y

  axiom ceil_to_real: forall x:t.
    is_finite x ->
      to_real (roundToIntegral RTP x) = FromInt.from_int (Truncate.ceil (to_real x))

  axiom ceil_to_int: forall m:mode, x:t.
    is_finite x ->
      to_int m (roundToIntegral RTP x) = Truncate.ceil (to_real x)

floor

  axiom floor_le: forall x:t. is_finite x -> (roundToIntegral RTN x) .<= x

  axiom floor_lest: forall x y:t. y .<= x /\ is_int y -> y .<= (roundToIntegral RTN x)

  axiom floor_to_real: forall x:t.
    is_finite x ->
      to_real (roundToIntegral RTN x) = FromInt.from_int (Truncate.floor (to_real x))

  axiom floor_to_int: forall m:mode, x:t.
    is_finite x ->
      to_int m (roundToIntegral RTN x) = Truncate.floor (to_real x)

  (* Rna *)

  axiom RNA_down:
    forall x:t. (x .- (roundToIntegral RTN x)) .< ((roundToIntegral RTP x) .- x) ->
      roundToIntegral RNA x = roundToIntegral RTN x

  axiom RNA_up:
    forall x:t. (x .- (roundToIntegral RTN x)) .> ((roundToIntegral RTP x) .- x) ->
      roundToIntegral RNA x = roundToIntegral RTP x

  axiom RNA_down_tie:
    forall x:t. (x .- (roundToIntegral RTN x)) .= ((roundToIntegral RTP x) .- x) ->
      is_negative x -> roundToIntegral RNA x = roundToIntegral RTN x

  axiom RNA_up_tie:
    forall x:t. ((roundToIntegral RTP x) .- x) .= (x .- (roundToIntegral RTN x)) ->
    is_positive x -> roundToIntegral RNA x = roundToIntegral RTP x

  (* to_int *)
  axiom to_int_roundToIntegral: forall m:mode, x:t.
    to_int m x = to_int m (roundToIntegral m x)

  axiom to_int_monotonic: forall m:mode, x y:t.
    is_finite x -> is_finite y -> le x y -> to_int m x <= to_int m y

  axiom to_int_of_int: forall m:mode, i:int.
    in_safe_int_range i ->
      to_int m (of_int m i) = i

  axiom eq_to_int: forall m, x y. is_finite x -> x .= y ->
    to_int m x = to_int m y

  axiom neg_to_int: forall m x.
    is_int x -> to_int m (neg x) = - (to_int m x)

  axiom roundToIntegral_is_finite  : forall m:mode, x:t. is_finite x ->
    is_finite (roundToIntegral m x)
end

Conversions to/from bitvectors

module Float_BV_Converter
  use bv.BV8 as BV8
  use bv.BV16 as BV16
  use bv.BV32 as BV32
  use bv.BV64 as BV64
  use RoundingMode

  type t (* the underlying float type, to be cloned *)
  predicate is_finite t
  predicate le t t
  function to_real         t : real
  function round   mode real : real

  (* convert from signed bitvector *)
  val function of_sbv8  mode BV8.t  : t
  val function of_sbv16 mode BV16.t : t
  val function of_sbv32 mode BV32.t : t
  val function of_sbv64 mode BV64.t : t

  (* convert to signed bitvector *)
  val function to_sbv8  mode BV8.t  : t
  val function to_sbv16 mode BV16.t : t
  val function to_sbv32 mode BV32.t : t
  val function to_sbv64 mode BV64.t : t

  (* convert from unsigned bitvector *)
  val function of_ubv8  mode BV8.t  : t
  val function of_ubv16 mode BV16.t : t
  val function of_ubv32 mode BV32.t : t
  val function of_ubv64 mode BV64.t : t

  (* convert to unsigned bitvector *)
  val function to_ubv8  mode t : BV8.t
  val function to_ubv16 mode t : BV16.t
  val function to_ubv32 mode t : BV32.t
  val function to_ubv64 mode t : BV64.t

  use real.RealInfix
  use real.FromInt as FromInt

  (* only true for big enough floats...  *)

of unsigned bv axioms

  axiom of_ubv8_is_finite : forall m, x. is_finite (of_ubv8  m x)
  axiom of_ubv16_is_finite: forall m, x. is_finite (of_ubv16 m x)
  axiom of_ubv32_is_finite: forall m, x. is_finite (of_ubv32 m x)
  axiom of_ubv64_is_finite: forall m, x. is_finite (of_ubv64 m x)

  axiom of_ubv8_monotonic :
    forall m, x y. BV8.ule  x y -> le (of_ubv8  m x) (of_ubv8  m y)
  axiom of_ubv16_monotonic:
    forall m, x y. BV16.ule x y -> le (of_ubv16 m x) (of_ubv16 m y)
  axiom of_ubv32_monotonic:
    forall m, x y. BV32.ule x y -> le (of_ubv32 m x) (of_ubv32 m y)
  axiom of_ubv64_monotonic:
    forall m, x y. BV64.ule x y -> le (of_ubv64 m x) (of_ubv64 m y)

  axiom of_ubv8_to_real : forall m, x.
    to_real (of_ubv8  m x) = FromInt.from_int (BV8.t'int x)
  axiom of_ubv16_to_real: forall m, x.
    to_real (of_ubv16 m x) = FromInt.from_int (BV16.t'int x)
  (* of_ubv32_to_real is defined at cloning *)
  axiom of_ubv64_to_real: forall m, x.
    to_real (of_ubv64 m x) = round m (FromInt.from_int (BV64.t'int x))
end

Standard simple precision floats (32 bits)

module Float32
  use int.Int as Int
  use real.Real

  type t = < float 8 24 >

  constant pow2sb : int = 16777216
  constant max_int : int = 0xFFFF_FF00_0000_0000_0000_0000_0000_0000
  constant max_real : real = 0x1.FFFFFEp127

  clone export GenericFloat with
    type t = t,
    constant eb = t'eb,
    constant sb = t'sb,
    constant pow2sb = pow2sb,
    constant max_int = max_int,
    constant max_real = max_real,
    function to_real = t'real,
    predicate is_finite = t'isFinite,
    (* the following are lemmas and not goals, because we want to
       prove them in the realizations. See also
       [https://gitlab.inria.fr/why3/why3/-/issues/664] *)
    lemma eb_gt_1,
    lemma sb_gt_1,
    lemma max_int_spec,
    lemma max_real_int,
    lemma pow2sb,
    axiom . (* TODO: "lemma"? "goal"? *)

  lemma round_bound_ne :
    forall x:real [round RNE x].
      no_overflow RNE x ->
        x - 0x1p-24 * Abs.abs(x) - 0x1p-150 <= round RNE x <= x + 0x1p-24 * Abs.abs(x) + 0x1p-150

  lemma round_bound :
    forall m:mode, x:real [round m x].
      no_overflow m x ->
      x - 0x1p-23 * Abs.abs(x) - 0x1p-149 <= round m x <= x + 0x1p-23 * Abs.abs(x) + 0x1p-149
end

Standard double precision floats (64 bits)

module Float64
  use int.Int as Int
  use real.Real

  type t = < float 11 53 >

  constant pow2sb : int = 9007199254740992  (* 242 *)
  constant max_int : int = 0xFFFF_FFFF_FFFF_F800_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000
  constant max_real : real = 0x1.FFFFFFFFFFFFFp1023

  clone export GenericFloat with
    type t = t,
    constant eb = t'eb,
    constant sb = t'sb,
    constant pow2sb = pow2sb,
    constant max_int = max_int,
    constant max_real = max_real,
    function to_real = t'real,
    predicate is_finite = t'isFinite,
    (* the following are lemmas and not goals, because we want to
       prove them in the realizations. See also
       [https://gitlab.inria.fr/why3/why3/-/issues/664] *)
    lemma eb_gt_1,
    lemma sb_gt_1,
    lemma max_int_spec,
    lemma max_real_int,
    lemma pow2sb,
    axiom . (* TODO: "lemma"? "goal"? *)

  lemma round_bound_ne :
    forall x:real [round RNE x].
      no_overflow RNE x ->
        x - 0x1p-53 * Abs.abs(x) - 0x1p-1075 <= round RNE x <= x + 0x1p-53 * Abs.abs(x) + 0x1p-1075

  lemma round_bound :
    forall m:mode, x:real [round m x].
      no_overflow m x ->
      x - 0x1p-52 * Abs.abs(x) - 0x1p-1074 <= round m x <= x + 0x1p-52 * Abs.abs(x) + 0x1p-1074
end

Conversions between float formats

module FloatConverter

  use Float64 as Float64
  use Float32 as Float32

  use export RoundingMode

  function to_float64 mode Float32.t : Float64.t
  function to_float32 mode Float64.t : Float32.t

  lemma round_double_single :
  forall m1 m2:mode, x:real.
    Float64.round m1 (Float32.round m2 x) = Float32.round m2 x

  lemma to_float64_exact:
    forall m:mode, x:Float32.t. Float32.t'isFinite x ->
      Float64.t'isFinite (to_float64 m x)
   /\ Float64.t'real (to_float64 m x) = Float32.t'real x

  lemma to_float32_conv:
    forall m:mode, x:Float64.t. Float64.t'isFinite x ->
      Float32.no_overflow m (Float64.t'real x) ->
        Float32.t'isFinite (to_float32 m x)
     /\ Float32.t'real (to_float32 m x) = Float32.round m (Float64.t'real x)

end

module Float32_BV_Converter
  use Float32

  clone export Float_BV_Converter with
    type t = t,
    predicate is_finite = t'isFinite,
    predicate le = (.<=),
    function to_real = t'real,
    function round = round,
    axiom . (* TODO: "lemma"? "goal"? *)

  axiom of_ubv32_to_real : forall m, x.
    t'real (of_ubv32 m x) = round m (FromInt.from_int (BV32.t'int x))
end

module Float64_BV_Converter
  use Float64

  clone export Float_BV_Converter with
    type t = t,
    predicate is_finite = t'isFinite,
    predicate le = (.<=),
    function to_real = t'real,
    function round = round,
    axiom . (* TODO: "lemma"? "goal"? *)

  axiom of_ubv32_to_real : forall m, x.
    t'real (of_ubv32 m x) = FromInt.from_int (BV32.t'int x)
end