/*
 * IR - Lightweight JIT Compilation Framework
 * (Public API)
 * Copyright (C) 2022 Zend by Perforce.
 * Authors: Dmitry Stogov <dmitry@php.net>
 */

#ifndef IR_H
#define IR_H

#ifdef __cplusplus
extern "C" {
#endif

#include <inttypes.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>

#define IR_VERSION "0.0.1"

#ifdef _WIN32
/* TODO Handle ARM, too. */
# if defined(_M_X64) || defined(_M_ARM64)
#  define __SIZEOF_SIZE_T__ 8
# elif defined(_M_IX86)
#  define __SIZEOF_SIZE_T__ 4
# endif
/* Only supported is little endian for any arch on Windows,
   so just fake the same for all. */
# ifndef __ORDER_LITTLE_ENDIAN__
#  define __ORDER_LITTLE_ENDIAN__ 1
# endif
# define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
# ifndef __has_builtin
#  define __has_builtin(arg) (0)
# endif
#endif

/* target auto detection */
#if !defined(IR_TARGET_X86) && !defined(IR_TARGET_X64) && !defined(IR_TARGET_AARCH64)
# if defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
#  define IR_TARGET_X64
# elif defined(i386) || defined(__i386) || defined(__i386__) || defined(_M_IX86)
#  define IR_TARGET_X86
# elif defined(__aarch64__) || defined(_M_ARM64)
#  define IR_TARGET_AARCH64
# elif defined (_WIN64)
#  define IR_TARGET_X64
# elif defined (_WIN32)
#  define IR_TARGET_X86
# endif
#endif

#if defined(IR_TARGET_X86)
# define IR_TARGET "x86"
#elif defined(IR_TARGET_X64)
# ifdef _WIN64
#  define IR_TARGET "Windows-x86_64" /* 64-bit Windows use different ABI and calling convention */
# else
#  define IR_TARGET "x86_64"
# endif
#elif defined(IR_TARGET_AARCH64)
# define IR_TARGET "aarch64"
#else
# error "Unknown IR target"
#endif

#if defined(__SIZEOF_SIZE_T__)
# if __SIZEOF_SIZE_T__ == 8
#  define IR_64 1
# elif __SIZEOF_SIZE_T__ != 4
#  error "Unknown addr size"
# endif
#else
# error "Unknown addr size"
#endif

#if defined(__BYTE_ORDER__)
# if defined(__ORDER_LITTLE_ENDIAN__)
#  if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#   define IR_STRUCT_LOHI(lo, hi) struct {lo; hi;}
#  endif
# endif
# if defined(__ORDER_BIG_ENDIAN__)
#  if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#   define IR_STRUCT_LOHI(lo, hi) struct {hi; lo;}
#  endif
# endif
#endif
#ifndef IR_STRUCT_LOHI
# error "Unknown byte order"
#endif

#ifdef __has_attribute
# if __has_attribute(always_inline)
#  define IR_ALWAYS_INLINE static inline __attribute__((always_inline))
# endif
# if __has_attribute(noinline)
#  define IR_NEVER_INLINE __attribute__((noinline))
# endif
#else
# define __has_attribute(x) 0
#endif

#ifndef IR_ALWAYS_INLINE
# define IR_ALWAYS_INLINE static inline
#endif
#ifndef IR_NEVER_INLINE
# define IR_NEVER_INLINE
#endif

#ifdef IR_PHP
# include "ir_php.h"
#endif

/* IR Type flags (low 4 bits are used for type size) */
#define IR_TYPE_SIGNED     (1<<4)
#define IR_TYPE_UNSIGNED   (1<<5)
#define IR_TYPE_FP         (1<<6)
#define IR_TYPE_SPECIAL    (1<<7)
#define IR_TYPE_BOOL       (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED)
#define IR_TYPE_ADDR       (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED)
#define IR_TYPE_CHAR       (IR_TYPE_SPECIAL|IR_TYPE_SIGNED)

/* List of IR types */
#define IR_TYPES(_) \
	_(BOOL,   bool,      b,    IR_TYPE_BOOL)     \
	_(U8,     uint8_t,   u8,   IR_TYPE_UNSIGNED) \
	_(U16,    uint16_t,  u16,  IR_TYPE_UNSIGNED) \
	_(U32,    uint32_t,  u32,  IR_TYPE_UNSIGNED) \
	_(U64,    uint64_t,  u64,  IR_TYPE_UNSIGNED) \
	_(ADDR,   uintptr_t, addr, IR_TYPE_ADDR)     \
	_(CHAR,   char,      c,    IR_TYPE_CHAR)     \
	_(I8,     int8_t,    i8,   IR_TYPE_SIGNED)   \
	_(I16,    int16_t,   i16,  IR_TYPE_SIGNED)   \
	_(I32,    int32_t,   i32,  IR_TYPE_SIGNED)   \
	_(I64,    int64_t,   i64,  IR_TYPE_SIGNED)   \
	_(DOUBLE, double,    d,    IR_TYPE_FP)       \
	_(FLOAT,  float,     f,    IR_TYPE_FP)       \

#define IR_IS_TYPE_UNSIGNED(t) ((t) < IR_CHAR)
#define IR_IS_TYPE_SIGNED(t)   ((t) >= IR_CHAR && (t) < IR_DOUBLE)
#define IR_IS_TYPE_INT(t)      ((t) < IR_DOUBLE)
#define IR_IS_TYPE_FP(t)       ((t) >= IR_DOUBLE)

#define IR_TYPE_ENUM(name, type, field, flags) IR_ ## name,

typedef enum _ir_type {
	IR_VOID,
	IR_TYPES(IR_TYPE_ENUM)
	IR_LAST_TYPE
} ir_type;

#ifdef IR_64
# define IR_SIZE_T          IR_U64
# define IR_SSIZE_T         IR_I64
# define IR_UINTPTR_T       IR_U64
# define IR_INTPTR_T        IR_I64
# define IR_C_UINTPTR       IR_U64
# define IR_C_INTPTR        IR_I64
# define ir_const_size_t    ir_const_u64
# define ir_const_ssize_t   ir_const_i64
# define ir_const_uintptr_t ir_const_u64
# define ir_const_intptr_t  ir_const_i64
#else
# define IR_SIZE_T          IR_U32
# define IR_SSIZE_T         IR_I32
# define IR_UINTPTR_T       IR_U32
# define IR_INTPTR_T        IR_I32
# define IR_C_UINTPTR       IR_U32
# define IR_C_INTPTR        IR_I32
# define ir_const_size_t    ir_const_u32
# define ir_const_ssize_t   ir_const_i32
# define ir_const_uintptr_t ir_const_u32
# define ir_const_intptr_t  ir_const_i32
#endif

/* List of IR opcodes
 * ==================
 *
 * Each instruction is described by a type (opcode, flags, op1_type, op2_type, op3_type)
 *
 * flags
 * -----
 * v     - void
 * d     - data      IR_OP_FLAG_DATA
 * r     - ref       IR_OP_FLAG_DATA alias
 * p     - pinned    IR_OP_FLAG_DATA + IR_OP_FLAG_PINNED
 * c     - control   IR_OP_FLAG_CONTROL
 * S     - control   IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_START
 * E     - control   IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END
 * T     - control   IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END + IR_OP_FLAG_TERMINATOR
 * l     - load      IR_OP_FLAG_MEM + IR_OP_FLAG_MEM_LOAD
 * s     - store     IR_OP_FLAG_MEM + IR_OP_FLAG_STORE
 * x     - call      IR_OP_FLAG_MEM + IR_OP_FLAG_CALL
 * a     - alloc     IR_OP_FLAG_MEM + IR_OP_FLAG_ALLOC
 * 0-3   - number of input edges
 * N     - number of arguments is defined in the insn->inputs_count (MERGE, PHI, CALL)
 * X1-X3 - number of extra data ops
 * C     - commutative operation ("d2C" => IR_OP_FLAG_DATA + IR_OP_FLAG_COMMUTATIVE)
 *
 * operand types
 * -------------
 * ___ - unused
 * def - reference to a definition op (data-flow use-def dependency edge)
 * ref - memory reference (data-flow use-def dependency edge)
 * var - variable reference (data-flow use-def dependency edge)
 * arg - argument reference CALL/TAILCALL/CARG->CARG
 * src - reference to a previous control region (IF, IF_TRUE, IF_FALSE, MERGE, LOOP_BEGIN, LOOP_END, RETURN)
 * reg - data-control dependency on region (PHI, VAR, PARAM)
 * ret - reference to a previous RETURN instruction (RETURN)
 * str - string: variable/argument name (VAR, PARAM, CALL, TAILCALL)
 * num - number: argument number (PARAM)
 * prb - branch probability 1-99 (0 - unspecified): (IF_TRUE, IF_FALSE, CASE_VAL, CASE_DEFAULT)
 * opt - optional number
 * pro - function prototype
 *
 * The order of IR opcodes is carefully selected for efficient folding.
 * - foldable instruction go first
 * - NOP is never used (code 0 is used as ANY pattern)
 * - CONST is the most often used instruction (encode with 1 bit)
 * - equality inversion:  EQ <-> NE                         => op =^ 1
 * - comparison inversion: [U]LT <-> [U]GT, [U]LE <-> [U]GE  => op =^ 3
 */

#define IR_OPS(_) \
	/* special op (must be the first !!!)                               */ \
	_(NOP,          v,    ___, ___, ___) /* empty instruction           */ \
	\
	/* constants reference                                              */ \
	_(C_BOOL,       r0,   ___, ___, ___) /* constant                    */ \
	_(C_U8,         r0,   ___, ___, ___) /* constant                    */ \
	_(C_U16,        r0,   ___, ___, ___) /* constant                    */ \
	_(C_U32,        r0,   ___, ___, ___) /* constant                    */ \
	_(C_U64,        r0,   ___, ___, ___) /* constant                    */ \
	_(C_ADDR,       r0,   ___, ___, ___) /* constant                    */ \
	_(C_CHAR,       r0,   ___, ___, ___) /* constant                    */ \
	_(C_I8,         r0,   ___, ___, ___) /* constant                    */ \
	_(C_I16,        r0,   ___, ___, ___) /* constant                    */ \
	_(C_I32,        r0,   ___, ___, ___) /* constant                    */ \
	_(C_I64,        r0,   ___, ___, ___) /* constant                    */ \
	_(C_DOUBLE,     r0,   ___, ___, ___) /* constant                    */ \
	_(C_FLOAT,      r0,   ___, ___, ___) /* constant                    */ \
	\
	/* equality ops  */                                                    \
	_(EQ,           d2C,  def, def, ___) /* equal                       */ \
	_(NE,           d2C,  def, def, ___) /* not equal                   */ \
	\
	/* comparison ops (order matters, LT must be a modulo of 4 !!!)     */ \
	_(LT,           d2,   def, def, ___) /* less                        */ \
	_(GE,           d2,   def, def, ___) /* greater or equal            */ \
	_(LE,           d2,   def, def, ___) /* less or equal               */ \
	_(GT,           d2,   def, def, ___) /* greater                     */ \
	_(ULT,          d2,   def, def, ___) /* unsigned less               */ \
	_(UGE,          d2,   def, def, ___) /* unsigned greater or equal   */ \
	_(ULE,          d2,   def, def, ___) /* unsigned less or equal      */ \
	_(UGT,          d2,   def, def, ___) /* unsigned greater            */ \
	_(ORDERED,      d2,   def, def, ___) /* both operands are not NAN   */ \
	_(UNORDERED,    d2,   def, def, ___) /* one of operands is NAN      */ \
	\
	/* arithmetic ops                                                   */ \
	_(ADD,          d2C,  def, def, ___) /* addition                    */ \
	_(SUB,          d2,   def, def, ___) /* subtraction (must be ADD+1) */ \
	_(MUL,          d2C,  def, def, ___) /* multiplication              */ \
	_(DIV,          d2,   def, def, ___) /* division                    */ \
	_(MOD,          d2,   def, def, ___) /* modulo                      */ \
	_(NEG,          d1,   def, ___, ___) /* change sign                 */ \
	_(ABS,          d1,   def, ___, ___) /* absolute value              */ \
	/* (LDEXP, MIN, MAX, FPMATH)                                        */ \
	\
	/* type conversion ops                                              */ \
	_(SEXT,         d1,   def, ___, ___) /* sign extension              */ \
	_(ZEXT,         d1,   def, ___, ___) /* zero extension              */ \
	_(TRUNC,        d1,   def, ___, ___) /* truncates to int type       */ \
	_(BITCAST,      d1,   def, ___, ___) /* binary representation       */ \
	_(INT2FP,       d1,   def, ___, ___) /* int to float conversion     */ \
	_(FP2INT,       d1,   def, ___, ___) /* float to int conversion     */ \
	_(FP2FP,        d1,   def, ___, ___) /* float to float conversion   */ \
	_(PROTO,        d1X1, def, pro, ___) /* apply function prototype    */ \
	\
	/* overflow-check                                                   */ \
	_(ADD_OV,       d2C,  def, def, ___) /* addition                    */ \
	_(SUB_OV,       d2,   def, def, ___) /* subtraction                 */ \
	_(MUL_OV,       d2C,  def, def, ___) /* multiplication              */ \
	_(OVERFLOW,     d1,   def, ___, ___) /* overflow check add/sub/mul  */ \
	\
	/* bitwise and shift ops                                            */ \
	_(NOT,          d1,   def, ___, ___) /* bitwise NOT                 */ \
	_(OR,           d2C,  def, def, ___) /* bitwise OR                  */ \
	_(AND,          d2C,  def, def, ___) /* bitwise AND                 */ \
	_(XOR,          d2C,  def, def, ___) /* bitwise XOR                 */ \
	_(SHL,	        d2,   def, def, ___) /* logic shift left            */ \
	_(SHR,	        d2,   def, def, ___) /* logic shift right           */ \
	_(SAR,	        d2,   def, def, ___) /* arithmetic shift right      */ \
	_(ROL,	        d2,   def, def, ___) /* rotate left                 */ \
	_(ROR,	        d2,   def, def, ___) /* rotate right                */ \
	_(BSWAP,        d1,   def, ___, ___) /* byte swap                   */ \
	_(CTPOP,        d1,   def, ___, ___) /* count population            */ \
	_(CTLZ,         d1,   def, ___, ___) /* count leading zeros         */ \
	_(CTTZ,         d1,   def, ___, ___) /* count trailing zeros        */ \
	\
	/* branch-less conditional ops                                      */ \
	_(MIN,	        d2C,  def, def, ___) /* min(op1, op2)               */ \
	_(MAX,	        d2C,  def, def, ___) /* max(op1, op2)               */ \
	_(COND,	        d3,   def, def, def) /* op1 ? op2 : op3             */ \
	\
	/* data-flow and miscellaneous ops                                  */ \
	_(VADDR,        d1,   var, ___, ___) /* load address of local var   */ \
	_(FRAME_ADDR,   d0,   ___, ___, ___) /* function frame address      */ \
	_(PHI,          pN,   reg, def, def) /* SSA Phi function            */ \
	_(COPY,         d1X1, def, opt, ___) /* COPY (last foldable op)     */ \
	_(PI,           p2,   reg, def, ___) /* e-SSA Pi constraint ???     */ \
	_(ARGVAL,       d1X2, def, num, num) /* pass struct arg by value    */ \
	                                     /* (op2 - size, op3 - align)   */ \
	/* (USE, RENAME)                                                    */ \
	\
	/* data ops                                                         */ \
	_(PARAM,        p1X2, reg, str, num) /* incoming parameter proj.    */ \
	_(VAR,	        p1X1, reg, str, ___) /* local variable              */ \
	_(FUNC_ADDR,    r0,   ___, ___, ___) /* constant func ref           */ \
	_(FUNC,         r0,   ___, ___, ___) /* constant func ref           */ \
	_(SYM,          r0,   ___, ___, ___) /* constant symbol ref         */ \
	_(STR,          r0,   ___, ___, ___) /* constant str ref            */ \
	\
	/* call ops                                                         */ \
	_(CALL,         xN,   src, def, def) /* CALL(src, func, args...)    */ \
	_(TAILCALL,     xN,   src, def, def) /* CALL+RETURN                 */ \
	\
	/* memory reference and load/store ops                              */ \
	_(ALLOCA,       a2,   src, def, ___) /* alloca(def)                 */ \
	_(AFREE,        a2,   src, def, ___) /* revert alloca(def)          */ \
	_(BLOCK_BEGIN,  a1,   src, ___, ___) /* stacksave                   */ \
	_(BLOCK_END,    a2,   src, def, ___) /* stackrestore                */ \
	_(VLOAD,        l2,   src, var, ___) /* load value of local var     */ \
	_(VSTORE,       s3,   src, var, def) /* store value to local var    */ \
	_(RLOAD,        l1X2, src, num, opt) /* load value from register    */ \
	_(RSTORE,       s2X1, src, def, num) /* store value into register   */ \
	_(LOAD,         l2,   src, ref, ___) /* load from memory            */ \
	_(STORE,        s3,   src, ref, def) /* store to memory             */ \
	_(TLS,          l1X2, src, num, num) /* thread local variable       */ \
	_(TRAP,         x1,   src, ___, ___) /* DebugBreak                  */ \
	/* memory reference ops (A, H, U, S, TMP, STR, NEW, X, V) ???       */ \
	\
	/* va_args                                                          */ \
	_(VA_START,     x2,   src, def, ___) /* va_start(va_list)           */ \
	_(VA_END,       x2,   src, def, ___) /* va_end(va_list)             */ \
	_(VA_COPY,      x3,   src, def, def) /* va_copy(dst, stc)           */ \
	_(VA_ARG,       x2X1, src, def, opt) /* va_arg(va_list)             */ \
	                                     /* op3 - (size<<3)+log2(align) */ \
	\
	/* guards                                                           */ \
	_(GUARD,        c3,   src, def, def) /* IF without second successor */ \
	_(GUARD_NOT  ,  c3,   src, def, def) /* IF without second successor */ \
	\
	/* deoptimization                                                   */ \
	_(SNAPSHOT,     xN,   src, def, def) /* SNAPSHOT(src, args...)      */ \
	\
	/* control-flow nodes                                               */ \
	_(START,        S0X1, ret, ___, ___) /* function start              */ \
	_(ENTRY,        S1X1, src, num, ___) /* entry with a fake src edge  */ \
	_(BEGIN,        S1,   src, ___, ___) /* block start                 */ \
	_(IF_TRUE,      S1X1, src, prb, ___) /* IF TRUE proj.               */ \
	_(IF_FALSE,     S1X1, src, prb, ___) /* IF FALSE proj.              */ \
	_(CASE_VAL,     S2X1, src, def, prb) /* switch proj.                */ \
	_(CASE_RANGE,   S3,   src, def, def) /* switch proj.                */ \
	_(CASE_DEFAULT, S1X1, src, prb, ___) /* switch proj.                */ \
	_(MERGE,        SN,   src, src, src) /* control merge               */ \
	_(LOOP_BEGIN,   SN,   src, src, src) /* loop start                  */ \
	_(END,          E1,   src, ___, ___) /* block end                   */ \
	_(LOOP_END,     E1,   src, ___, ___) /* loop end                    */ \
	_(IF,           E2,   src, def, ___) /* conditional control split   */ \
	_(SWITCH,       E2,   src, def, ___) /* multi-way control split     */ \
	_(RETURN,       T2X1, src, def, ret) /* function return             */ \
	_(IJMP,         T2X1, src, def, ret) /* computed goto               */ \
	_(UNREACHABLE,  T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \
	\
	/* deoptimization helper                                            */ \
	_(EXITCALL,     x2,   src, def, ___) /* save CPU regs and call op2  */ \


#define IR_OP_ENUM(name, flags, op1, op2, op3) IR_ ## name,

typedef enum _ir_op {
	IR_OPS(IR_OP_ENUM)
#ifdef IR_PHP
	IR_PHP_OPS(IR_OP_ENUM)
#endif
	IR_LAST_OP
} ir_op;

/* IR Opcode and Type Union */
#define IR_OPT_OP_MASK       0x00ff
#define IR_OPT_TYPE_MASK     0xff00
#define IR_OPT_TYPE_SHIFT    8
#define IR_OPT_INPUTS_SHIFT  16

#define IR_OPT(op, type)     ((uint16_t)(op) | ((uint16_t)(type) << IR_OPT_TYPE_SHIFT))
#define IR_OPTX(op, type, n) ((uint32_t)(op) | ((uint32_t)(type) << IR_OPT_TYPE_SHIFT) | ((uint32_t)(n) << IR_OPT_INPUTS_SHIFT))
#define IR_OPT_TYPE(opt)     (((opt) & IR_OPT_TYPE_MASK) >> IR_OPT_TYPE_SHIFT)

/* IR References */
typedef int32_t ir_ref;

#define IR_IS_CONST_REF(ref) ((ref) < 0)

/* IR Constant Value */
#define IR_UNUSED            0
#define IR_NULL              (-1)
#define IR_FALSE             (-2)
#define IR_TRUE              (-3)
#define IR_LAST_FOLDABLE_OP  IR_COPY

#define IR_CONSTS_LIMIT_MIN (-(IR_TRUE - 1))
#define IR_INSNS_LIMIT_MIN (IR_UNUSED + 1)

/* ADDR_MEMBER is neccessary to workaround MSVC C preprocessor bug */
#ifndef IR_64
# define ADDR_MEMBER            uintptr_t                  addr; \
								void                      *ptr;
#else
# define ADDR_MEMBER
#endif
typedef union _ir_val {
	double                             d;
	uint64_t                           u64;
	int64_t                            i64;
#ifdef IR_64
	uintptr_t                          addr;
	void                              *ptr;
#endif
	IR_STRUCT_LOHI(
		union {
			uint32_t                   u32;
			int32_t                    i32;
			float                      f;
			ADDR_MEMBER
			ir_ref                     name;
			ir_ref                     str;
			IR_STRUCT_LOHI(
				union {
					uint16_t           u16;
					int16_t            i16;
					IR_STRUCT_LOHI(
						union {
							uint8_t    u8;
							int8_t     i8;
							bool       b;
							char       c;
						},
						uint8_t        u8_hi
					);
				},
				uint16_t               u16_hi
			);
		},
		uint32_t                       u32_hi
	);
} ir_val;
#undef ADDR_MEMBER

/* IR Instruction */
typedef struct _ir_insn {
	IR_STRUCT_LOHI(
		union {
			IR_STRUCT_LOHI(
				union {
					IR_STRUCT_LOHI(
						uint8_t        op,
						uint8_t        type
					);
					uint16_t           opt;
				},
				union {
					uint16_t           inputs_count;       /* number of input control edges for MERGE, PHI, CALL, TAILCALL */
					uint16_t           prev_insn_offset;   /* 16-bit backward offset from current instruction for CSE */
					uint16_t           proto;
				}
			);
			uint32_t                   optx;
			ir_ref                     ops[1];
		},
		union {
			ir_ref                     op1;
			ir_ref                     ref;
			ir_ref                     prev_const;
		}
	);
	union {
		IR_STRUCT_LOHI(
			ir_ref                     op2,
			ir_ref                     op3
		);
		ir_val                         val;
	};
} ir_insn;

/* IR Hash Tables API (private) */
typedef struct _ir_hashtab ir_hashtab;

/* IR String Tables API (implementation in ir_strtab.c) */
typedef struct _ir_strtab {
	void       *data;
	uint32_t    mask;
	uint32_t    size;
	uint32_t    count;
	uint32_t    pos;
	char       *buf;
	uint32_t    buf_size;
	uint32_t    buf_top;
} ir_strtab;

#define ir_strtab_count(strtab) (strtab)->count

typedef void (*ir_strtab_apply_t)(const char *str, uint32_t len, ir_ref val);

void ir_strtab_init(ir_strtab *strtab, uint32_t count, uint32_t buf_size);
ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val);
ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len);
ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val);
const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx);
const char *ir_strtab_strl(const ir_strtab *strtab, ir_ref idx, size_t *len);
void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func);
void ir_strtab_free(ir_strtab *strtab);

/* IR Context Flags */
#define IR_FUNCTION            (1<<0) /* Generate a function. */
#define IR_FASTCALL_FUNC       (1<<1) /* Generate a function with fastcall calling convention, x86 32-bit only. */
#define IR_VARARG_FUNC         (1<<2)
#define IR_BUILTIN_FUNC        (1<<3)
#define IR_STATIC              (1<<4)
#define IR_EXTERN              (1<<5)
#define IR_CONST               (1<<6)

#define IR_INITIALIZED         (1<<7) /* sym data flag: constant or an initialized variable */
#define IR_CONST_STRING        (1<<8) /* sym data flag: constant string */

#define IR_SKIP_PROLOGUE       (1<<8) /* Don't generate function prologue. */
#define IR_USE_FRAME_POINTER   (1<<9)
#define IR_PREALLOCATED_STACK  (1<<10)
#define IR_NO_STACK_COMBINE    (1<<11)
#define IR_START_BR_TARGET     (1<<12)
#define IR_ENTRY_BR_TARGET     (1<<13)
#define IR_GEN_ENDBR           (1<<14)
#define IR_MERGE_EMPTY_ENTRIES (1<<15)

#define IR_OPT_INLINE          (1<<16)
#define IR_OPT_FOLDING         (1<<17)
#define IR_OPT_CFG             (1<<18) /* merge BBs, by remove END->BEGIN nodes during CFG construction */
#define IR_OPT_MEM2SSA         (1<<19)
#define IR_OPT_CODEGEN         (1<<20)
#define IR_GEN_NATIVE          (1<<21)
#define IR_GEN_CODE            (1<<22) /* C or LLVM */

#define IR_GEN_CACHE_DEMOTE    (1<<23) /* Demote the generated code from closest CPU caches */

/* debug related */
#ifdef IR_DEBUG
# define IR_DEBUG_SCCP         (1<<26)
# define IR_DEBUG_GCM          (1<<27)
# define IR_DEBUG_GCM_SPLIT    (1<<28)
# define IR_DEBUG_SCHEDULE     (1<<29)
# define IR_DEBUG_RA           (1<<30)
# define IR_DEBUG_BB_SCHEDULE  (1U<<31)
#endif

typedef struct _ir_ctx           ir_ctx;
typedef struct _ir_use_list      ir_use_list;
typedef struct _ir_block         ir_block;
typedef struct _ir_arena         ir_arena;
typedef struct _ir_live_interval ir_live_interval;
typedef struct _ir_live_range    ir_live_range;
typedef struct _ir_loader        ir_loader;
typedef int8_t ir_regs[4];

typedef void (*ir_snapshot_create_t)(ir_ctx *ctx, ir_ref addr);

#if defined(IR_TARGET_AARCH64)
typedef const void *(*ir_get_exit_addr_t)(uint32_t exit_num);
typedef const void *(*ir_get_veneer_t)(ir_ctx *ctx, const void *addr);
typedef bool (*ir_set_veneer_t)(ir_ctx *ctx, const void *addr, const void *veneer);
#endif

typedef struct _ir_code_buffer {
	void *start;
	void *end;
	void *pos;
} ir_code_buffer;

typedef struct {
	int   size;
	int   align;
	int   offset;
} ir_value_param;

#define IR_CONST_HASH_SIZE 64

struct _ir_ctx {
	ir_insn           *ir_base;                 /* two directional array - instructions grow down, constants grow up */
	ir_ref             insns_count;             /* number of instructions stored in instructions buffer */
	ir_ref             insns_limit;             /* size of allocated instructions buffer (it's extended when overflow) */
	ir_ref             consts_count;            /* number of constants stored in constants buffer */
	ir_ref             consts_limit;            /* size of allocated constants buffer (it's extended when overflow) */
	uintptr_t          const_hash_mask;
	ir_ref            *const_hash;
	uint32_t           flags;                   /* IR context flags (see IR_* defines above) */
	uint32_t           flags2;                  /* IR context private flags (see IR_* defines in ir_private.h) */
	ir_type            ret_type;                /* Function return type */
	uint32_t           mflags;                  /* CPU specific flags (see IR_X86_... macros below) */
	int32_t            status;                  /* non-zero error code (see IR_ERROR_... macros), app may use negative codes */
	ir_ref             fold_cse_limit;          /* CSE finds identical insns backward from "insn_count" to "fold_cse_limit" */
	ir_insn            fold_insn;               /* temporary storage for folding engine */
	ir_value_param    *value_params;            /* information about "by-val" struct parameters */
	ir_hashtab        *binding;
	ir_use_list       *use_lists;               /* def->use lists for each instruction */
	ir_ref            *use_edges;               /* the actual uses: use = ctx->use_edges[ctx->use_lists[def].refs + n] */
	ir_ref             use_edges_count;         /* number of elements in use_edges[] array */
	uint32_t           cfg_blocks_count;        /* number of elements in cfg_blocks[] array */
	uint32_t           cfg_edges_count;         /* number of elements in cfg_edges[] array */
	ir_block          *cfg_blocks;              /* list of basic blocks (starts from 1) */
	uint32_t          *cfg_edges;               /* the actual basic blocks predecessors and successors edges */
	uint32_t          *cfg_map;                 /* map of instructions to basic block number */
	uint32_t          *cfg_schedule;            /* BB order for code generation */
	uint32_t          *rules;                   /* array of target specific code-generation rules (for each instruction) */
	uint32_t          *vregs;
	ir_ref             vregs_count;
	int32_t            spill_base;              /* base register for special spill area (e.g. PHP VM frame pointer) */
	uint64_t           fixed_regset;            /* fixed registers, excluded for regular register allocation */
	int32_t            fixed_stack_red_zone;    /* reusable stack allocated by caller (default 0) */
	int32_t            fixed_stack_frame_size;  /* fixed stack allocated by generated code for spills and registers save/restore */
	int32_t            fixed_call_stack_size;   /* fixed preallocated stack for parameter passing (default 0) */
	uint64_t           fixed_save_regset;       /* registers that always saved/restored in prologue/epilogue */
	uint32_t           locals_area_size;
	uint32_t           gp_reg_params;
	uint32_t           fp_reg_params;
	int32_t            param_stack_size;
	ir_live_interval **live_intervals;
	ir_arena          *arena;
	ir_live_range     *unused_ranges;
	ir_regs           *regs;
	ir_strtab         *fused_regs;
	ir_ref            *prev_ref;
	union {
		void          *data;
		ir_ref         control;                 /* used by IR construction API (see ir_builder.h) */
		ir_ref         bb_start;                /* used by target CPU instruction matcher */
		ir_ref         vars;                    /* list of VARs (used by register allocator) */
	};
	ir_snapshot_create_t   snapshot_create;
	int32_t            stack_frame_alignment;
	int32_t            stack_frame_size;        /* spill stack frame size (used by register allocator and code generator) */
	int32_t            call_stack_size;         /* stack for parameter passing (used by register allocator and code generator) */
	uint64_t           used_preserved_regs;
#ifdef IR_TARGET_X86
	int32_t            ret_slot;
#endif
	uint32_t           rodata_offset;
	uint32_t           jmp_table_offset;
	uint32_t           entries_count;
	uint32_t          *entries;                /* array of ENTRY blocks */
	void              *osr_entry_loads;
	ir_code_buffer    *code_buffer;
#if defined(IR_TARGET_AARCH64)
	int32_t            deoptimization_exits;
	const void        *deoptimization_exits_base;
	ir_get_exit_addr_t get_exit_addr;
	ir_get_veneer_t    get_veneer;
	ir_set_veneer_t    set_veneer;
#endif
	ir_loader         *loader;
	ir_strtab          strtab;
	ir_ref             prev_insn_chain[IR_LAST_FOLDABLE_OP + 1];
	ir_ref             _const_hash[IR_CONST_HASH_SIZE];
};

/* Basic IR Construction API (implementation in ir.c) */
void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limit);
void ir_free(ir_ctx *ctx);
void ir_truncate(ir_ctx *ctx);

ir_ref ir_const(ir_ctx *ctx, ir_val val, uint8_t type);
ir_ref ir_const_i8(ir_ctx *ctx, int8_t c);
ir_ref ir_const_i16(ir_ctx *ctx, int16_t c);
ir_ref ir_const_i32(ir_ctx *ctx, int32_t c);
ir_ref ir_const_i64(ir_ctx *ctx, int64_t c);
ir_ref ir_const_u8(ir_ctx *ctx, uint8_t c);
ir_ref ir_const_u16(ir_ctx *ctx, uint16_t c);
ir_ref ir_const_u32(ir_ctx *ctx, uint32_t c);
ir_ref ir_const_u64(ir_ctx *ctx, uint64_t c);
ir_ref ir_const_bool(ir_ctx *ctx, bool c);
ir_ref ir_const_char(ir_ctx *ctx, char c);
ir_ref ir_const_float(ir_ctx *ctx, float c);
ir_ref ir_const_double(ir_ctx *ctx, double c);
ir_ref ir_const_addr(ir_ctx *ctx, uintptr_t c);

ir_ref ir_const_func_addr(ir_ctx *ctx, uintptr_t c, ir_ref proto);
ir_ref ir_const_func(ir_ctx *ctx, ir_ref str, ir_ref proto);
ir_ref ir_const_sym(ir_ctx *ctx, ir_ref str);
ir_ref ir_const_str(ir_ctx *ctx, ir_ref str);

ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t c);

void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted);

ir_ref ir_str(ir_ctx *ctx, const char *s);
ir_ref ir_strl(ir_ctx *ctx, const char *s, size_t len);
const char *ir_get_str(const ir_ctx *ctx, ir_ref idx);
const char *ir_get_strl(const ir_ctx *ctx, ir_ref idx, size_t *len);

#define IR_MAX_PROTO_PARAMS 255

typedef struct _ir_proto_t {
	uint8_t flags;
	uint8_t ret_type;
	uint8_t params_count;
	uint8_t param_types[5];
} ir_proto_t;

ir_ref ir_proto_0(ir_ctx *ctx, uint8_t flags, ir_type ret_type);
ir_ref ir_proto_1(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1);
ir_ref ir_proto_2(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1, ir_type t2);
ir_ref ir_proto_3(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1, ir_type t2, ir_type t3);
ir_ref ir_proto_4(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1, ir_type t2, ir_type t3,
                                                                ir_type t4);
ir_ref ir_proto_5(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1, ir_type t2, ir_type t3,
                                                                ir_type t4, ir_type t5);
ir_ref ir_proto(ir_ctx *ctx, uint8_t flags, ir_type ret_type, uint32_t params_counts, uint8_t *param_types);

ir_ref ir_emit(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);

ir_ref ir_emit0(ir_ctx *ctx, uint32_t opt);
ir_ref ir_emit1(ir_ctx *ctx, uint32_t opt, ir_ref op1);
ir_ref ir_emit2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2);
ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);

ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count);
void   ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val);
ir_ref ir_get_op(ir_ctx *ctx, ir_ref ref, int32_t n);

IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val)
{
	ctx->ir_base[ref].op1 = val;
}

IR_ALWAYS_INLINE void ir_set_op2(ir_ctx *ctx, ir_ref ref, ir_ref val)
{
	ctx->ir_base[ref].op2 = val;
}

IR_ALWAYS_INLINE void ir_set_op3(ir_ctx *ctx, ir_ref ref, ir_ref val)
{
	ctx->ir_base[ref].op3 = val;
}

IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n)
{
	const ir_ref *p = insn->ops + n;
	return *p;
}

IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val)
{
	ir_ref *p = insn->ops + n;
	*p = val;
}

IR_ALWAYS_INLINE uint32_t ir_insn_find_op(const ir_insn *insn, ir_ref val)
{
	int i, n = insn->inputs_count;

	for (i = 1; i <= n; i++) {
		if (ir_insn_op(insn, i) == val) {
			return i;
		}
	}
	return 0;
}

ir_ref ir_fold(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);

ir_ref ir_fold0(ir_ctx *ctx, uint32_t opt);
ir_ref ir_fold1(ir_ctx *ctx, uint32_t opt, ir_ref op1);
ir_ref ir_fold2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2);
ir_ref ir_fold3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);

ir_ref ir_param(ir_ctx *ctx, ir_type type, ir_ref region, const char *name, int pos);
ir_ref ir_var(ir_ctx *ctx, ir_type type, ir_ref region, const char *name);

/* IR Binding */
ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def);
ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref);

/* Def -> Use lists */
void ir_build_def_use_lists(ir_ctx *ctx);

/* SSA Construction */
int ir_mem2ssa(ir_ctx *ctx);

/* CFG - Control Flow Graph (implementation in ir_cfg.c) */
int ir_build_cfg(ir_ctx *ctx);
int ir_build_dominators_tree(ir_ctx *ctx);
int ir_find_loops(ir_ctx *ctx);
int ir_schedule_blocks(ir_ctx *ctx);
void ir_reset_cfg(ir_ctx *ctx);

/* SCCP - Sparse Conditional Constant Propagation (implementation in ir_sccp.c) */
int ir_sccp(ir_ctx *ctx);

/* GCM - Global Code Motion and scheduling (implementation in ir_gcm.c) */
int ir_gcm(ir_ctx *ctx);
int ir_schedule(ir_ctx *ctx);

/* Liveness & Register Allocation (implementation in ir_ra.c) */
#define IR_REG_NONE          -1
#define IR_REG_SPILL_LOAD    (1<<6)
#define IR_REG_SPILL_STORE   (1<<6)
#define IR_REG_SPILL_SPECIAL (1<<7)
#define IR_REG_SPILLED(r) \
	((r) & (IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL))
#define IR_REG_NUM(r) \
	((int8_t)((r) == IR_REG_NONE ? IR_REG_NONE : ((r) & ~(IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL))))

int ir_assign_virtual_registers(ir_ctx *ctx);
int ir_compute_live_ranges(ir_ctx *ctx);
int ir_coalesce(ir_ctx *ctx);
int ir_compute_dessa_moves(ir_ctx *ctx);
int ir_reg_alloc(ir_ctx *ctx);

int ir_regs_number(void);
bool ir_reg_is_int(int32_t reg);
const char *ir_reg_name(int8_t reg, ir_type type);
int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref);

/* Target CPU instruction selection and code generation (see ir_x86.c) */
int ir_match(ir_ctx *ctx);
void *ir_emit_code(ir_ctx *ctx, size_t *size);

bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr);
void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr);
void ir_fix_thunk(void *thunk_entry, void *addr);

/* Target address resolution (implementation in ir_emit.c) */
void *ir_resolve_sym_name(const char *name);

/* Target CPU disassembler (implementation in ir_disasm.c) */
int  ir_disasm_init(void);
void ir_disasm_free(void);
void ir_disasm_add_symbol(const char *name, uint64_t addr, uint64_t size);
const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset);
int  ir_disasm(const char *name,
               const void *start,
               size_t      size,
               bool        asm_addr,
               ir_ctx     *ctx,
               FILE       *f);

/* Linux perf interface (implementation in ir_perf.c) */
int ir_perf_jitdump_open(void);
int ir_perf_jitdump_close(void);
int ir_perf_jitdump_register(const char *name, const void *start, size_t size);
void ir_perf_map_register(const char *name, const void *start, size_t size);

/* GDB JIT interface (implementation in ir_gdb.c) */
int ir_gdb_register(const char    *name,
                    const void    *start,
                    size_t         size,
                    uint32_t       sp_offset,
                    uint32_t       sp_adjustment);
void ir_gdb_unregister_all(void);
bool ir_gdb_present(void);

/* IR load API (implementation in ir_load.c) */
#define IR_RESOLVE_SYM_ADD_THUNK (1<<0)
#define IR_RESOLVE_SYM_SILENT    (1<<1)

struct _ir_loader {
	uint32_t default_func_flags;
	bool (*init_module)       (ir_loader *loader, const char *name, const char *filename, const char *target);
	bool (*external_sym_dcl)  (ir_loader *loader, const char *name, uint32_t flags);
	bool (*external_func_dcl) (ir_loader *loader, const char *name,
                               uint32_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types);
	bool (*forward_func_dcl)  (ir_loader *loader, const char *name,
                               uint32_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types);
	bool (*sym_dcl)           (ir_loader *loader, const char *name, uint32_t flags, size_t size);
	bool (*sym_data)          (ir_loader *loader, ir_type type, uint32_t count, const void *data);
	bool (*sym_data_str)      (ir_loader *loader, const char *str, size_t len);
	bool (*sym_data_pad)      (ir_loader *loader, size_t offset);
	bool (*sym_data_ref)      (ir_loader *loader, ir_op op, const char *ref, uintptr_t offset);
	bool (*sym_data_end)      (ir_loader *loader, uint32_t flags);
	bool (*func_init)         (ir_loader *loader, ir_ctx *ctx, const char *name);
	bool (*func_process)      (ir_loader *loader, ir_ctx *ctx, const char *name);
	void*(*resolve_sym_name)  (ir_loader *loader, const char *name, uint32_t flags);
	bool (*has_sym)           (ir_loader *loader, const char *name);
	bool (*add_sym)           (ir_loader *loader, const char *name, void *addr);
};

void ir_loader_init(void);
void ir_loader_free(void);
int ir_load(ir_loader *loader, FILE *f);

/* IR LLVM load API (implementation in ir_load_llvm.c) */
int ir_load_llvm_bitcode(ir_loader *loader, const char *filename);
int ir_load_llvm_asm(ir_loader *loader, const char *filename);

/* IR save API (implementation in ir_save.c) */
#define IR_SAVE_CFG        (1<<0) /* add info about CFG */
#define IR_SAVE_CFG_MAP    (1<<1) /* add info about CFG block assignment */
#define IR_SAVE_USE_LISTS  (1<<2) /* add info about def->use lists */
#define IR_SAVE_RULES      (1<<3) /* add info about selected code-generation rules */
#define IR_SAVE_REGS       (1<<4) /* add info about selected registers */
#define IR_SAVE_SAFE_NAMES (1<<5) /* add '@' prefix to symbol names */

void ir_print_proto(const ir_ctx *ctx, ir_ref proto, FILE *f);
void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f);
void ir_save(const ir_ctx *ctx, uint32_t save_flags, FILE *f);

/* IR debug dump API (implementation in ir_dump.c) */
void ir_dump(const ir_ctx *ctx, FILE *f);
void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f);
void ir_dump_use_lists(const ir_ctx *ctx, FILE *f);
void ir_dump_cfg(ir_ctx *ctx, FILE *f);
void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f);
void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f);
void ir_dump_codegen(const ir_ctx *ctx, FILE *f);

/* IR to C conversion (implementation in ir_emit_c.c) */
int ir_emit_c(ir_ctx *ctx, const char *name, FILE *f);
void ir_emit_c_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f);
void ir_emit_c_sym_decl(const char *name, uint32_t flags, FILE *f);

/* IR to LLVM conversion (implementation in ir_emit_llvm.c) */
int ir_emit_llvm(ir_ctx *ctx, const char *name, FILE *f);
void ir_emit_llvm_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f);
void ir_emit_llvm_sym_decl(const char *name, uint32_t flags, FILE *f);

/* IR verification API (implementation in ir_check.c) */
bool ir_check(const ir_ctx *ctx);
void ir_consistency_check(void);

/* Code patching (implementation in ir_patch.c) */
int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void *from_addr, const void *to_addr);

/* CPU information (implementation in ir_cpuinfo.c) */
#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
# define IR_X86_SSE2     (1<<0)
# define IR_X86_SSE3     (1<<1)
# define IR_X86_SSSE3    (1<<2)
# define IR_X86_SSE41    (1<<3)
# define IR_X86_SSE42    (1<<4)
# define IR_X86_AVX      (1<<5)
# define IR_X86_AVX2     (1<<6)
# define IR_X86_BMI1     (1<<7)
# define IR_X86_CLDEMOTE (1<<8)
#endif

uint32_t ir_cpuinfo(void);

/* Deoptimization helpers */
const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr);

/* A reference IR JIT compiler */
IR_ALWAYS_INLINE void *ir_jit_compile(ir_ctx *ctx, int opt_level, size_t *size)
{
	if (opt_level == 0) {
		if (ctx->flags & IR_OPT_FOLDING) {
			// IR_ASSERT(0 && "IR_OPT_FOLDING is incompatible with -O0");
			return NULL;
		}
		ctx->flags &= ~(IR_OPT_CFG | IR_OPT_CODEGEN);

		ir_build_def_use_lists(ctx);

		if (!ir_build_cfg(ctx)
		 || !ir_match(ctx)
		 || !ir_assign_virtual_registers(ctx)
		 || !ir_compute_dessa_moves(ctx)) {
			return NULL;
		}

		return ir_emit_code(ctx, size);
	} else if (opt_level > 0) {
		if (!(ctx->flags & IR_OPT_FOLDING)) {
			// IR_ASSERT(0 && "IR_OPT_FOLDING must be set in ir_init() for -O1 and -O2");
			return NULL;
		}
		ctx->flags |= IR_OPT_CFG | IR_OPT_CODEGEN;

		ir_build_def_use_lists(ctx);

		if (ctx->flags & IR_OPT_MEM2SSA) {
			if (!ir_build_cfg(ctx)
			 || !ir_build_dominators_tree(ctx)
			 || !ir_mem2ssa(ctx)) {
				return NULL;
			}
			ir_reset_cfg(ctx);
		}

		if (opt_level > 1) {
			if (!ir_sccp(ctx)) {
				return NULL;
			}
		}

		if (!ctx->cfg_blocks) {
			if (!ir_build_cfg(ctx)
			 || !ir_build_dominators_tree(ctx)) {
				return NULL;
			}
		}

		if (!ir_find_loops(ctx)
		 || !ir_gcm(ctx)
		 || !ir_schedule(ctx)
		 || !ir_match(ctx)
		 || !ir_assign_virtual_registers(ctx)
		 || !ir_compute_live_ranges(ctx)
		 || !ir_coalesce(ctx)
		 || !ir_reg_alloc(ctx)
		 || !ir_schedule_blocks(ctx)) {
			return NULL;
		}

		return ir_emit_code(ctx, size);
	} else {
		// IR_ASSERT(0 && "wrong optimization level");
		return NULL;
	}
}

#define IR_ERROR_CODE_MEM_OVERFLOW               1
#define IR_ERROR_FIXED_STACK_FRAME_OVERFLOW      2
#define IR_ERROR_UNSUPPORTED_CODE_RULE           3
#define IR_ERROR_LINK                            4
#define IR_ERROR_ENCODE                          5
#define IR_ERROR_TOO_LARGE                       6

/* IR Memmory Allocation */
#ifndef ir_mem_malloc
# define ir_mem_malloc   malloc
#endif
#ifndef ir_mem_calloc
# define ir_mem_calloc   calloc
#endif
#ifndef ir_mem_realloc
# define ir_mem_realloc  realloc
#endif
#ifndef ir_mem_free
# define ir_mem_free     free
#endif

#ifndef ir_mem_pmalloc
# define ir_mem_pmalloc  malloc
#endif
#ifndef ir_mem_pcalloc
# define ir_mem_pcalloc  calloc
#endif
#ifndef ir_mem_prealloc
# define ir_mem_prealloc realloc
#endif
#ifndef ir_mem_pfree
# define ir_mem_pfree    free
#endif

void *ir_mem_mmap(size_t size);
int ir_mem_unmap(void *ptr, size_t size);
int ir_mem_protect(void *ptr, size_t size);
int ir_mem_unprotect(void *ptr, size_t size);
int ir_mem_flush(void *ptr, size_t size);

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* IR_H */
