(*
 * Copyright (c) 2018-2019
 *	The President and Fellows of Harvard College.
 *
 * Written by David A. Holland.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *)

open Symbolic
open Mipsstate


(*
 * Elements that appear in instructions.
 *
 * Any of these can be symbolic.
 *
 * Local code labels (branch/call targets) are program points and
 * if symbolic must resolve to program points within the code block.
 *
 * Local data labels represent stack frame entries. They ultimately
 * resolve to integer offsets, but not without machinations.
 *)

type imm =  int symbolic    (* immediate constant *)

type localcodelabel = Ppoint.t symbolic
type localdatalabel = int symbolic (* stack offset *)

(*
 * External code and data labels are fixed strings representing
 * external assembler/linker symbols. These appear in the assembler
 * namespace, not the logic namespace.
 *
 * XXX this form doesn't distinguish code and data labels any more;
 * should probably have another type layer inside the symbolic instead
 * of just using string.
 *)

type extcodelabel = string symbolic
type extdatalabel = string symbolic

(*
 * A memory reference in an instruction is a register and an offset,
 * with the proviso that the register contains a reference to an
 * external data label and the offset is within its bounds/agrees
 * with its typing. XXX: how that needs to work is not yet clear.
 *
 * (local data, being stack elements, uses specializations of the
 * instructions instead.)
 *)

type memory = MEMORY of sireg * imm

(*
 * MIPS instruction set
 *
 * This doesn't include everything, but it includes everything we're
 * remotely likely to need to synthesize. Note that it explicitly 
 * does *not* include sync, ll/sc, syscall, wait, and other special-
 * purpose ops with special-purpose semantics, which we currently
 * estimate will be configured explicitly in the machine description.
 *)

(* operand order is the same as assembly language: output is on the left *)

type concreteinsn =
  (* arithmetic *)
| ADDU of sireg * sireg * sireg
| ADDIU of sireg * sireg * imm
| DIV of sireg * sireg           (* raw version *)
| DIVU of sireg * sireg          (* raw version *)
| LUI of sireg * imm
| MULT of sireg * sireg          (* raw version *)
| MULTU of sireg * sireg         (* raw version *)
| SUBU of sireg * sireg * sireg
| LUI_extcode_hi of sireg * extcodelabel
| ADDIU_extcode_lo of sireg * sireg * extcodelabel
| LUI_extdata_hi of sireg * extdatalabel
| ADDIU_extdata_lo of sireg * sireg * extdatalabel
| LUI_localcode_hi of sireg * localcodelabel
| ADDIU_localcode_lo of sireg * sireg * localcodelabel
  (* comparisons *)
| SLT of sireg * sireg * sireg
| SLTI of sireg * sireg * imm
| SLTU of sireg * sireg * sireg
| SLTIU of sireg * sireg * imm
  (* bitwise logic *)
| AND of sireg * sireg * sireg
| ANDI of sireg * sireg * imm
| NOR of sireg * sireg * sireg
| OR of sireg * sireg * sireg
| ORI of sireg * sireg * imm
| XOR of sireg * sireg * sireg
| XORI of sireg * sireg * imm
  (* shifts *)
| SLL of sireg * sireg * imm
| SLLV of sireg * sireg * sireg
| SRA of sireg * sireg * imm
| SRAV of sireg * sireg * sireg
| SRL of sireg * sireg * imm
| SRLV of sireg * sireg * sireg
  (* transfers *)
| B of localcodelabel
| BEQ of sireg * sireg * localcodelabel
| BNE of sireg * sireg * localcodelabel
| J of extcodelabel
| JAL of extcodelabel
| JALR of sireg                (* link register always implicitly $31 *)
| JR of sireg
  (* moves *)
| CFC1 of sireg * sifcreg
| CTC1 of sireg * sifcreg
| MFC0 of sireg * sicreg
| MFC1 of sireg * sifreg
| MFHI of sireg
| MFLO of sireg
| MTC0 of sireg * sicreg
| MTC1 of sireg * sifreg
| MTHI of sireg
| MTLO of sireg
| MOVN of sireg * sireg * sireg
| MOVZ of sireg * sireg * sireg
  (* loads *)
| LB of sireg * memory
| LBU of sireg * memory
| LDC1 of sifreg * memory
| LH of sireg * memory
| LHU of sireg * memory
| LW of sireg * memory
| LWC1 of sifreg * memory
  (* stores *)
| SB of sireg * memory
| SDC1 of sifreg * memory
| SH of sireg * memory
| SW of sireg * memory
| SWC1 of sifreg * memory
  (*
   * cache
   *
   * The final symi names a golem to use for addressing; this
   * determines ... I'm still not sure exactly what. XXX
   *)
| CACHE_INDEX_WBINV of cache symbolic * memory * symi
| CACHE_VADDR_WB of cache symbolic * memory * symi
| CACHE_VADDR_WBINV of cache symbolic * memory * symi
| CACHE_VADDR_INV of cache symbolic * memory * symi
  (* mmu *)
| TLBP
| TLBR
| TLBWI
| TLBWR

type insn = concreteinsn symbolic

(*
 * A code block is an alternating sequence of program points and
 * instructions. Every instruction is potentially symbolic because
 * that's the domain we're working in.
 *
 * The last (rather than first) program point is stored separately
 * because that makes it easier to extract the first and last
 * program points.
 *)

type code = (Ppoint.t * insn) list * Ppoint.t

(*
 * Types for the logical expressions.
 *)

type typename =
   | BOOL
   | CACHESTATE
   | UINT5
   | SINT8
   | UINT8
   | SINT16
   | UINT16
   | SINT32
   | UINT32
   | REGISTER
   | CREGISTER
   | FREGISTER
   | FCREGISTER
   | HIREGISTER
   | LOREGISTER
   | GOLEM
   | WHICHCACHE
   | WHICHCACHEENTRY
   | WHICHCACHEENTRYID
   | PROGRAMPOINT
   | INSN
   | MAP of typename * typename

(*
 * Semantics for accessing $0. $0 always reads as 0, and writes to it
 * are discarded. Since we compile everything to constraints on state,
 * some references to $0 (those that arise from reading) should yield
 * 0, and others (those that arise from writing) should yield "any
 * value is ok here".
 *)
type zerosem = Z_ZERO | Z_ANY

(*
 * Logical expressions.
 *
 * Note that the logic is timeless, so any reference to anything mutable
 * (e.g. registers or other processor state) must include a program point
 * to define when the reference is taken.
 *
 * Variable names may be:
 *    - plain variables: unqualified, universal, e.g. bound by the spec
 *    - control variables: contain the instruction number and typically
 *      instantiated for each instruction, but universal over state
 *    - state variables: contain the instruction number and also will
 *      be qualified with the start state when lowered to SMT; this means
 *      they can depend on machine state.
 *
 * At this level machine state is accessed with READSTATE and a program
 * point and not with variables.
 *
 * Scratch variables that don't depend on machine state (e.g. things
 * derived only from instruction fields) should be control variables.
 *
 * STATEVAR replaces the prior method of using a program point; that
 * causes confusion/complications when the variable is really per
 * instruction
 *)

type varname =
   | PLAINVAR of string
   | CONTROLVAR of symi
   | STATEVAR of symi

module VarMapKey = struct
   type t = varname
   let compare = compare
end
module VarMap = Map.Make(VarMapKey)

let string_of_varname x =
   match x with
   | PLAINVAR x' -> x'
   | CONTROLVAR (k, x') -> "insn" ^ string_of_int k ^ "_" ^ x'
   | STATEVAR (k, x') -> "s_insn" ^ string_of_int k ^ "_" ^ x'

type expr =
     (* constants *)
   | TRUE
   | FALSE
     (* constructors *)
   | CACHESTATEVALUE of cachestate
   | INTVALUE of int * typename
   | REGVALUE of regs
   | CREGVALUE of cregs
   | FREGVALUE of fregs
   | FCREGVALUE of fcregs
   | HIREGVALUE
   | LOREGVALUE
   | GOLEMVALUE of golems
   | WHICHCACHEVALUE of cache
   | WHICHCACHEENTRYIDVALUE of cacheentryid
   | WHICHCACHEENTRYVALUE of cache * cacheentryid
   | PPVALUE of Ppoint.t
   | INSNVALUE of concreteinsn
     (* access to processor state, as of a particular program point *)
   | READSTATE of expr * Ppoint.t * zerosem
   | READGOLEM of expr
(*
   | READALLSTATE of typename * Ppoint.t  (*all state of given register type*)
   | UPDATEMAP of expr * expr * expr
*)
     (* access to logic variables *)
   | READVAR of varname * typename
     (* access to logic functions *)
   | GOLEMIZE of expr(*golem value*) * expr(*argument*)
     (* logical operators *)
   | FORALL of varname * typename * expr
   | EXISTS of varname * typename * expr
   | LET of varname * typename * expr * expr
   | IF of expr * expr * expr
   | IMPLIES of expr * expr
   | LOGNOT of expr
   | LOGAND of expr * expr
   | LOGOR of expr * expr
   | LOGXOR of expr * expr
   | LOGEQ of expr * expr
     (* other operators *)
   | MINTUOP of Mintops.uop * expr
   | MINTBOP of Mintops.bop * expr * expr

(*
 * A specification is a precondition and a postcondition.
 *
 * NOTE: to avoid numbering complications, input preconditions and
 * postconditions are written against program points 0 (pre-state) and
 * 1 (post-state). The postcondition gets renumbered after we generate
 * a code block skeleton.
 *
 * XXX: do we need a separate list of binders first?
 *)

type spec = expr * expr

(*
 * Sketch that (maybe) goes with a specification
 *)

type sketchoperand = string * expr list		(* field and possible values *)
type sketchinsn = int * sketchoperand list	(* opcode and operands *)

type sketchentry =
   | ANYTHING					(* arbitrary sequence *)
   | CHOICES of sketchinsn list			(* choices for one insn *)

type sketch = sketchentry list



let eq_logic a b = LOGEQ (a, b)
let eq_mint a b = MINTBOP (Mintops.EQ, a, b)

let neq_logic a b = LOGNOT (LOGEQ (a, b))
let neq_mint a b = LOGNOT (MINTBOP (Mintops.EQ, a, b))

(*
 * and/or over lists
 * XXX: these reverse the list, which is confusing
 * (when fixing, also fix the ones in smt.ml)
 *)
let and_list props =
   List.fold_right (fun prop z : expr -> LOGAND (prop, z)) props TRUE
let or_list props =
   List.fold_right (fun prop z : expr -> LOGOR (prop, z)) props FALSE
