// my_predictor.h

/*--------------------------------------------------------------------------------
Title: PMPM: Prediction by combining Multiple Partial Matches (Realistic Track)

Authors: Hongliang Gao and Huiyang Zhou, Oct. 2006

For: CBP2

Code is derived from the simulator of TAGE predictors from Andre Seznec and Pierre Michaud
The code has been compiled and tested under gcc 2.96
--------------------------------------------------------------------------------*/

//Three switches to get results presented in the paper:

//the base configuraitons (in Section 4.6) and policies will be used if the following line is commented out
#define USE_CBP2

//#define GHR_ONLY  //don't use local history

#define AP_INDEX 1 //# of blocks ahead


//If you use a newer gcc version and get compilation error,
//please try to comment out the following line
#include <algorithm>

#include <bitset>
#include <inttypes.h>
#include <math.h>

//some pre-defined parameters
//-----------------------------------------------------------
#ifndef USE_CBP2
#define BASE_CONFIG            //use the base configurations and policies
#endif

#if AP_INDEX != 1 
#define AP_TAG (AP_INDEX - 1)  //# of blocks ahead for tag
#define AP_READ (AP_INDEX - 1) //# of blocks ahead for table reading
#else
#define AP_TAG 1
#define AP_READ 1
#endif

#define AP_MAX  5             //max # of blocks ahead
#define AP_SIZE 32            //max # of potential preds

#define MAXHIST 204

#ifdef USE_CBP2
#define THRESFIT 64   //for threshold fitting
#else
#define THRESFIT 32
#endif
#define TC_MAX 31     //maximum threshold

#ifdef USE_CBP2
#define TC_INIT 9     //initial threshold
#else
#define TC_INIT 8
#endif

//hard benchmark and high aliasing benchmark detection phase
//detect on every 128k branches
#define TICK_PHASE 17 
//---------------------------------------------------------

using namespace std;

typedef uint32_t address_t;
typedef bitset < MAXHIST > his_t;

//Comments from André Seznec and Pierre Michaud:
// this is the cyclic shift register for folding 
// a long global history into a smaller number of bits

class folded_history_chunk
{
public:
  unsigned comp;
  int in, out;
  int CLENGTH;
  int OLENGTH;
  int OUTPOINT;

    folded_history_chunk ()
  {
  }

  void init (int original_length, int compressed_length, int i, int o)
  {
    comp = 0;
    OLENGTH = original_length;
    CLENGTH = compressed_length;
    OUTPOINT = OLENGTH % CLENGTH;
    in = i;
    out = o;
    assert (OLENGTH < MAXHIST && in < MAXHIST && out < MAXHIST);
  }

  void update (his_t h)
  {
    assert ((comp >> CLENGTH) == 0);
    comp = (comp << 1) | h[in];
    comp ^= h[out] << OUTPOINT;
    comp ^= (comp >> CLENGTH);
    comp &= (1 << CLENGTH) - 1;
  }
};

//Comments from André Seznec and Pierre Michaud:
// bimodal table entry
class bentry_c
{
 public:
  int8_t hyst;
  int8_t pred;
  void reset()
    {
      pred = 0;
      hyst = 1;
    };
  bentry_c ()
    {
      pred = 0;
      hyst = 1;
    }
};

// a prediction table entry
class TableEntry_c
{
 public:
  int8_t ctr;
  uint16_t tag;
  int8_t ubit;

  void reset()
    {
      ctr = 0;
      tag = 0;
      ubit = 0;
    };

  TableEntry_c ()
    {
      ctr = 0;
      tag = 0;
      ubit = 0;
    }
  };

class CPredTable //a taged prediction table
{
 public:
  int size; 
  int ct_size;   //size of a prediction counter
  int i_len;     //global history length used to index this table
  folded_history_chunk ch_i;     //cyclic shift register for index
  folded_history_chunk ch_t[2];  //cyclic shift register for tag
  TableEntry_c *entry;           //array of prediction entries
  unsigned int tagw;             //tag width
};

//To model ahead pipelining, we use this structure 
//to record information from previours branches.
//Description of the meanings of those variables is in the definition of class CPMPM.
struct pred_info
{
  //indexes and tags
  int LocalPredIndex[AP_SIZE], LocalHisIndex[AP_SIZE];
  unsigned int LocalTag[AP_SIZE];
  int GlobalIndex[7][AP_SIZE];
  int BimIndex[AP_SIZE], HysIndex[AP_SIZE];
  int gtags[7][AP_SIZE];

  //entry values and pointers
  TableEntry_c *gentry[7][AP_SIZE];
  TableEntry_c *lentry[AP_SIZE];
  TableEntry_c vgentry[7][AP_SIZE];
  TableEntry_c vlentry[AP_SIZE];
  int BimCounter[AP_SIZE];
  bool bmpred[AP_SIZE];

  int icheck;    //for debug
  bool validp;   //at the begining of the simulation, no "previous" information is avalable
};

//the main class of the PMPM predictor
class CPMPM
{
public:
  int LHRT_SIZE;            //local history table size
  int LHRP_SIZE;            //size of the tagged local history prediction table
  int LHR_LEN;              //local history length
  int LHR_TAG;              //tag width
  int LHR_CT_SIZE;          //prediction counter size

  int TC;                   //a counter for threshold fitting
  int thresupdate;          //training threshold

  //storage
  int NUM_GTABLE;           //number of gtables
  int BM_SIZE;              //size of the bimodal table
  int GTABLE_SIZE;          //size of a gtable
  int CT_SIZE, TAG_SIZE;    //sizes of a prediction counter and a tag
  int TICK;                 //17 bits, for phase-based optimizations
  uint32_t phist;           //16 bits, path history
  his_t ghist;              //global history
  uint32_t inter_info;      //intermediate information to select out a prediction
  uint32_t num_miss, num_hitc0, num_hitc0_miss; //statistic counters

  bentry_c *btable;         //the bimodal table
  CPredTable *gtable;       //global prediction tables
  TableEntry_c *ltable;     //local prediction table
  uint32_t *lhr_table;
  unsigned int phase;       //2 bits, phase counter

  //values related to a prediction, they will be reused in the update stage
  int LocalPredIndex;       //index of the local prediction table
  int LocalHisIndex;        //index of the local history table
  unsigned int LocalTag;    //tag of the ltable      
  int GlobalIndex[7];       //index of gtables
  int BimIndex, HysIndex;   //indexes of the bimodal table

  bool hit[7];              //tag comparision results of gtables
  bool used[7];             //whether a counter is used in the summation
  int gtags[7];             //tags of gtables
  int hitc;                 //number of hit gtables
  int minhit;               //indicate which gtable is the longest match
  int y;                    //sum of counters
  int BimCounter;           //value of a bimodal counter

  bool my_pred;             //final prediction
  bool lused;               //whether the local prediction counter is included in the summation
  bool lhit;                //tag comparision result of the ltable

  TableEntry_c *gentry[7];  //pointers of the current gtable entries
  TableEntry_c vgentry[7];  //value of the current gtable entries
  TableEntry_c *lentry;     //pointer of the current ltable entry
  TableEntry_c vlentry;     //value of the current ltable entry
  bool gpred[7], lpred, bmpred; //predictions of current counters
  bool steal[7];            //whether to steal (decrease the ubit) an gtable entry 
  bool victim[7];           //which table will be assigned the new entry


  bool hard_bm;             //traces with a large number of hard-to-predict branches

  //some static masks and constants
  unsigned int mask_bm, mask_lhr, mask_lhrp, mask_ltag, mask_llen;
  unsigned int mask_gsize[7], mask_gi[7], mask_gtag[7];
  unsigned int mask_tick;
  int ctr_max[7], ctr_min[7];

  pred_info pinfo[AP_MAX];  //record some information to model ahead pipelining
  uint32_t inter_index;     //current intermediate infor to select out a prediction
  
  bool taken;               //for update
  
  ~CPMPM()
    {
      for (int i = 0; i < NUM_GTABLE; i++)
	delete [] gtable[i].entry;
      delete [] gtable;
      delete [] btable;
      delete [] ltable;
      delete [] lhr_table;
    }

  CPMPM ()
  {
    LHRT_SIZE = 0;
    LHRP_SIZE = 0;
    LHR_LEN = 0;
    LHR_TAG = 0;
    LHR_CT_SIZE = 0;

#ifdef USE_CBP2
//-------------------------------CBP2 Configuration---------------------------
    NUM_GTABLE = 7;
    BM_SIZE = 14;
    GTABLE_SIZE = 11;
    CT_SIZE = 5;

    //this tag width is not for all tables, will calculate the tag width for each table later
#ifdef GHR_ONLY
    TAG_SIZE = 12;
#else
    TAG_SIZE = 10;
#endif
    gtable = new CPredTable[NUM_GTABLE];

    //get the history lengths
    int s, d[7];
    s = 4;

    d[1] = 3;
    d[2] = 8;
    d[3] = 10;
    d[4] = 23;
    d[5] = 30;
    d[6] = 125;
    for (int i = NUM_GTABLE - 1; i >= 0; i--)
      {
        if (i == NUM_GTABLE - 1)
          gtable[NUM_GTABLE - 1].i_len = s;
        else
          gtable[i].i_len = gtable[i+1].i_len + d[6 - i];
      }

    for (int i = NUM_GTABLE - 1; i >= 0; i--)
      {
	//tag width
	gtable[i].tagw = TAG_SIZE - ((i + (NUM_GTABLE & 1)) / 2);
	
	gtable[i].size = GTABLE_SIZE;
	gtable[i].ct_size = CT_SIZE;
      }

    //fix some tag widths
    gtable[6].tagw --;
    gtable[4].tagw --;
    gtable[1].tagw ++;
#ifdef GHR_ONLY
    gtable[6].tagw --;
    gtable[5].tagw --;
    gtable[4].tagw --;
#endif

#ifndef GHR_ONLY 
    //configrations for local history related tables
    LHRT_SIZE = 10;
    LHRP_SIZE = 10;
    LHR_LEN = 11;
    LHR_CT_SIZE = 5;
    LHR_TAG = 5;
#endif

#else
//-------------------------------Base Configurations---------------------------
    NUM_GTABLE = 7;
    gtable = new CPredTable[NUM_GTABLE];

#ifdef GHR_ONLY
    BM_SIZE = 14;
    GTABLE_SIZE = 11;
    CT_SIZE = 5;
    TAG_SIZE = 9;
#else
    LHRT_SIZE = 10;
    LHRP_SIZE = 10;
    LHR_LEN = 10;
    LHR_CT_SIZE = 5;
    LHR_TAG = 5;

    BM_SIZE = 13;

    GTABLE_SIZE = 11;
    CT_SIZE = 5;
    TAG_SIZE = 9;
#endif //GHR_ONLY

    int maxh = 131, minh = 5; //reference history lengths
    gtable[0].i_len = maxh;
    gtable[NUM_GTABLE - 1].i_len = minh;
    //get geometrical history lengths (from the TAGE simulator)
    for (int i = 1; i < NUM_GTABLE - 1; i++)
      {
        gtable[NUM_GTABLE - 1 - i].i_len =
          (int) (((double) minh *
                  pow ((double) (maxh - 1) / (double) minh,
                       (double) (i) / (double) ((NUM_GTABLE - 1)))) + 0.5);
      }
    
    for (int i = NUM_GTABLE - 1; i >= 0; i--)
      {
	gtable[i].tagw = TAG_SIZE;
	gtable[i].size = GTABLE_SIZE;
	gtable[i].ct_size = CT_SIZE;
      }

#ifndef GHR_ONLY    //save some cost for local history related tables
    gtable[NUM_GTABLE - 1].tagw --;
    gtable[NUM_GTABLE - 2].tagw --;
    gtable[NUM_GTABLE - 3].tagw --;
#endif

#endif //pre-defined configurations

    //allocate prediction tables
    btable = new bentry_c[1 << BM_SIZE];
    for (int i = 0; i < NUM_GTABLE; i++)
      gtable[i].entry = new TableEntry_c[1 << gtable[i].size];

    lhr_table = new uint32_t[1 << LHRT_SIZE];
    ltable = new TableEntry_c[1 << LHRP_SIZE];

    //pre-compute some masks
    mask_bm = (1 << BM_SIZE) - 1;
    mask_lhr = (1 << LHRT_SIZE) - 1;
    mask_lhrp = (1 << LHRP_SIZE) - 1;
    mask_ltag = (1 << LHR_TAG) - 1;
    mask_llen = (1 << LHR_LEN) - 1;
    for (int i = 0; i < NUM_GTABLE; i++)
      {
	mask_gsize[i] = (1 << gtable[i].size) - 1;
	mask_gi[i] = (1 << gtable[i].i_len) - 1;
	mask_gtag[i] = (1 << gtable[i].tagw) - 1;

	ctr_max[i] = ((1 << (gtable[i].ct_size - 1)) - 1);
	ctr_min[i] = - (1 << (gtable[i].ct_size - 1));
      }

    mask_tick = ((1 << TICK_PHASE) - 1);

    reset();
  }

  //reset the predictor
  void reset()
    {
      memset(pinfo, 0, sizeof(pred_info)*AP_MAX);

      TC = 0;
      thresupdate = TC_INIT;

      ghist = 0;
      TICK = 0;
      phist = 0;
      inter_info = 0;

      phase = 0;
      hard_bm = false;

      num_hitc0 = 0;
      num_miss = 0;
      num_hitc0_miss = 0;

      //initialize cyclic registers
      for (int i = NUM_GTABLE - 1; i >= 0; i--)
	gtable[i].ch_i.init (gtable[i].i_len, gtable[i].size, 0, gtable[i].i_len);
      for (int i = 0; i < NUM_GTABLE; i++)
	{
	  gtable[i].ch_t[0].init (gtable[i].ch_i.OLENGTH, gtable[i].tagw, 0, gtable[i].i_len);
	  gtable[i].ch_t[1].init (gtable[i].ch_i.OLENGTH, gtable[i].tagw - 1, 0, gtable[i].i_len);
	}

      for (int i = 0; i < (1 << BM_SIZE); i++)
	btable[i].reset();
      for (int i = 0; i < NUM_GTABLE; i++)
	for (int j = 0; j < (1 << gtable[i].size); j++)
	  gtable[i].entry[j].reset();

      for (int j = 0; j < (1 << LHRP_SIZE); j++)
	ltable[j].reset();
      for (int j = 0; j < (1 << LHRT_SIZE); j++)
	lhr_table[j] = 0;

    };

//Comments from André Seznec and Pierre Michaud:
// index function for the bimodal table
inline  int bindex (address_t pc)
  {
    return ((pc ^ inter_index) & mask_bm);
  }

//Comments from André Seznec and Pierre Michaud:
// index function for the global tables: 
// includes path history as in the OGEHL predictor
//F serves to mix path history
inline  int F (int A, int size, int bank, int start)
  {
    int A1, A2;

    A >>= start;
    A = A & ((1 << size) - 1);
    A1 = (A & mask_gsize[bank]);
    A2 = (A >> gtable[bank].size);
    A2 = ((A2 << bank) & mask_gsize[bank]) + (A2 >> (gtable[bank].size - bank));
    A = A1 ^ A2;
    A = ((A << bank) & mask_gsize[bank]) + (A >> (gtable[bank].size - bank));
    return (A);
  }

inline  int gindex (address_t pc, int bank)
  {
    int index;

    int plen = 16;
    if (gtable[bank].i_len >= plen)
      index =
	pc ^ (pc >> ((gtable[bank].size - (NUM_GTABLE - bank - 1)))) ^  gtable[bank].ch_i.
	comp ^ F (phist, plen, bank, 0);
    else
      index =
	pc ^ (pc >> (gtable[bank].size - NUM_GTABLE + bank + 1)) ^ 
	gtable[bank].ch_i.comp ^ F (phist, gtable[bank].i_len, bank, 0);

    index = index ^ inter_index;

    return (index & mask_gsize[bank]);
  }

//Comments from André Seznec and Pierre Michaud:
//  tag computation
inline  uint16_t gtag (address_t pc, int bank)
  {
    int tag;
    tag = pc ^ gtable[bank].ch_t[0].comp ^ (gtable[bank].ch_t[1].comp << 1);
    return (tag & mask_gtag[bank]);
  }

//Comments from André Seznec and Pierre Michaud:
// up-down saturating counter
inline  void ctrupdate (int8_t & ctr, bool taken, int nbits)
  {
    if (taken)
      {
	if (ctr < ((1 << (nbits - 1)) - 1))
	  ctr++;
      }
    else
      {
	if (ctr > -(1 << (nbits - 1)))
	  ctr--;
      }
  }

// up-down saturating counter for gtables
inline  void gctrupdate (int8_t & ctr, bool taken, int bank)
  {
    if (taken)
      {
	if (ctr < ctr_max[bank])
	  ctr++;
      }
    else
      {
	if (ctr > ctr_min[bank])
	  ctr--;
      }
  }

//get the prediction from the bimodal table
inline  bool getbim ()
  {
    return (btable[pinfo[AP_INDEX - AP_READ].BimIndex[inter_index]].pred > 0);
  }

//Comments from André Seznec and Pierre Michaud:
// update  the bimodal predictor
inline  void baseupdate (unsigned int BimIndex, unsigned int HysIndex, bool p, bool Taken)
  {
//Comments from André Seznec and Pierre Michaud:
//just a normal 2-bit counter apart that hysteresis is shared
    if (Taken == p)
      {

	if (Taken)
	  {
	    if (btable[BimIndex].pred)
	      btable[HysIndex].hyst = 1;
	  }
	else
	  {
	    if (!btable[BimIndex].pred)
	      btable[HysIndex].hyst = 0;
	  }
      }
    else
      {
	int BimCounter = (btable[BimIndex].pred << 1) + btable[HysIndex].hyst;
	if (Taken)
	  {
	    if (BimCounter < 3)
	      BimCounter += 1;
	  }
	else
	  {
	    if (BimCounter > 0)
	      BimCounter--;
	  }

	btable[BimIndex].pred = BimCounter >> 1;
	btable[HysIndex].hyst = (BimCounter & 1);

      }

  }

  // PREDICTION
inline  bool get_prediction (branch_info & bi)
  {
    bool pred_taken = true;

    address_t pc = bi.address;

    //shift the information queue
    if (AP_INDEX > 1)
      for (int i = AP_INDEX - 1; i > 0; i --)
	pinfo[i] = pinfo[i - 1];
    
    //current information will be filled in entry 0
    pred_info *pi = &pinfo[0];

    //indicate this entry is valid
    pi -> validp = true;

    //get indexes
    for (int i = 0; i < (1 << (AP_INDEX - 1)); i ++)
      {
	inter_index = i;

	pi->BimIndex[i] = bindex(pc);
#ifdef BASE_CONFIG
	pi->HysIndex[i] = pi->BimIndex[i];
#else
	pi->HysIndex[i] = pi->BimIndex[i] >> 2;
#endif

	for (int j = 0; j < NUM_GTABLE; j++)
	  {
	    pi->GlobalIndex[j][i] = gindex (pc, j);
	    pi->gentry[j][i] = &gtable[j].entry[pi->GlobalIndex[j][i]];
	  }

	uint32_t li = (pc ^ (pc >> LHRT_SIZE))& mask_lhr;
	pi->LocalHisIndex[i] = li;
#ifdef USE_CBP2
        uint32_t L = lhr_table[li];
        L ^= (L >> LHRP_SIZE) << (LHR_LEN - (LHR_LEN - LHRP_SIZE));
	pi->LocalPredIndex[i] = (((pc ^ L) & mask_lhrp) ^ inter_index) & mask_lhrp;
#else
	pi->LocalPredIndex[i] = (pc ^ ((lhr_table[li]<<(LHRP_SIZE - LHR_LEN))& mask_lhrp) ^ inter_index) & mask_lhrp;
#endif
	pi->lentry[i] = &ltable[pi->LocalPredIndex[i]];
      }

    //read prediction tables
     for (int i = 0; i < (1 << (AP_INDEX - 1)); i ++)
       {
	 pi->icheck = pinfo[AP_INDEX - AP_READ].GlobalIndex[0][0];
	 inter_index = i;
	 for (int j = 0; j < NUM_GTABLE; j++)
	   pi->vgentry[j][i] = gtable[j].entry[pinfo[AP_INDEX - AP_READ].GlobalIndex[j][i]];
	 
	 pi->vlentry[i] = ltable[pinfo[AP_INDEX - AP_READ].LocalPredIndex[i]];
	 pi->BimCounter[i] = (btable[pinfo[AP_INDEX - AP_READ].BimIndex[i]].pred << 1) + btable[pinfo[AP_INDEX - AP_READ].BimIndex[i] >> 2].hyst;
	 pi->BimCounter[i] -= 2;
	 pi->bmpred[i] = getbim();
       }

     //get tags
     for (int i = 0; i < (1 << (AP_INDEX - 1)); i ++)
       {
	 inter_index = i;
	 for (int j = 0; j < NUM_GTABLE; j++)
	   pi->gtags[j][i] = gtag (pc, j);

	 pi->LocalTag[i] = (pc ^ (pc >> LHR_TAG) ^ inter_index) & mask_ltag;
       }

    if (bi.br_flags & BR_CONDITIONAL)
      {
	//get the current inter_info to select out a prediction
	inter_index = inter_info & ((1 << (AP_INDEX - 1)) - 1);

	if (AP_INDEX == 1)
	  assert(inter_index == 0);

	//assume we model the n-block ahead pipelining
	//copy ONE set of values based on the inter_index from 2^(n-1) sets of pre-computed indexes, tags etc. 
	//those values will be used to calculate the final prediction
        
	copyinfo();

	pred_taken = read_prediction ();
      }
    return pred_taken;
  }

 void copyinfo()
   {
     int ip = inter_index;
     int pindex = AP_INDEX - 1;  //use pindex-block ahead indexs
     int ptag = AP_TAG - 1;      //use ptag-block ahead tags
     int pvalue = AP_READ - 1;   //use values read pvalue-block ahead
     
     assert(pinfo[pvalue].icheck == pinfo[pindex].GlobalIndex[0][0]);
     
     //index
     LocalPredIndex = pinfo[pindex].LocalPredIndex[ip];
     LocalHisIndex = pinfo[pindex].LocalHisIndex[ip];
     for (int j = 0; j < NUM_GTABLE; j++)
       {
	 GlobalIndex[j] = pinfo[pindex].GlobalIndex[j][ip];
	 gentry[j] = pinfo[pindex].gentry[j][ip];
       }
     lentry = pinfo[pindex].lentry[ip];
     BimIndex = pinfo[pindex].BimIndex[ip];
     HysIndex = pinfo[pindex].HysIndex[ip];
     
     //value
     for (int j = 0; j < NUM_GTABLE; j++)
       vgentry[j] = pinfo[pvalue].vgentry[j][ip];
     vlentry = pinfo[pvalue].vlentry[ip];
     bmpred = pinfo[pvalue].bmpred[ip];
     BimCounter = pinfo[pvalue].BimCounter[ip];

     //tages
     LocalTag = pinfo[ptag].LocalTag[ip];
     for (int j = 0; j < NUM_GTABLE; j++)
       gtags[j] = pinfo[ptag].gtags[j][ip];
   } 

inline  bool read_prediction ()
  {
    y = 0;
    hitc = 0;
    minhit = NUM_GTABLE; //the longest match
    
    //tag comparision
    for (int i = 0; i < NUM_GTABLE; i++)
      {
	if (vgentry[i].tag == gtags[i])
	  {
	    hitc++;
	    hit[i] = true;
	    if (i < minhit)
	      minhit = i;
	  }
	else
	  {
	    hit[i] = false;
	    used[i] = false;
	  }
      }

    lhit = lused = false;

    lhit = (vlentry.tag == LocalTag);
    if (lhit)
      lpred = (vlentry.ctr >= 0);

    //prediction AND update policies
    //decisions related to both the prediction and update will be made here for convenience
    //in the update function, these decisions will be reused.

    bool cadidate[NUM_GTABLE];
    bool has_vic = false;

    for (int i = NUM_GTABLE - 1; i >= 0; i --)
      {
	cadidate[i] = false;
	used[i] = false;
	victim[i] = false;

	//select one counter from each group of gtables
#ifdef USE_CBP2
	if (i == (NUM_GTABLE - 1))
	  {
	    if (hard_bm || !(hit[0] || hit[1] || hit[2]))
	      cadidate[i] = true;
	  }
	else if ((i & 1) == 0)
	  cadidate[i] = true;
	else if (!hit[i-1])
	  cadidate[i] = true;

	if (hit[i] && cadidate[i])
	  used[i] = true;
#else
	if (i == 0)
	  {
	    cadidate[i] = true;
	  }
	else if ((i & 1) == 1)
	  cadidate[i] = true;
	else if (!hit[i-1])
	  cadidate[i] = true;

	if (hit[i] && cadidate[i])
	  used[i] = true;
#endif    
      }

    //find the table to alocate a new entry, which will be used in the update function
    for (int i = NUM_GTABLE - 1; i >= 0; i --)
      {
	steal[i] = false;
	if (!hit[i] && cadidate[i] && vgentry[i].ubit == 0 && !has_vic)
	  {
	    has_vic = true;
	    victim[i] = true;
	  }
#ifdef USE_CBP2
	else if (!hit[i] && cadidate[i])
	  steal[i] = true;
#endif
      }

    //decide whether to steal an entry from a missed gtable, which will be used in the update function
    for (int i = NUM_GTABLE - 1; i >= 0; i --)
      {
#ifdef USE_CBP2
	if (steal[i] && !victim[i] && ((!has_vic && (i < minhit || minhit == 0 || hard_bm)) || (has_vic && i < minhit && !hard_bm)))
#else
	//for hard_bm, don't steal entries when we already have a table to allocate the new entry  
	if (!hit[i] && !victim[i] && i < minhit && (!has_vic || (has_vic && !hard_bm)))
#endif
	  steal[i] = true;
	else
	  steal[i] = false;
      }
    //policy end

    //get the summation
    for (int i = 0; i < NUM_GTABLE; i++)
      if (hit[i] && used[i])
	{
	  gpred[i] = (vgentry[i].ctr >= 0);
	  y += vgentry[i].ctr;
	  used[i] = true;
	}

#ifndef GHR_ONLY	

#ifdef USE_CBP2
    if (((vlentry.ubit > 1 && !hard_bm) || (vlentry.ubit > 2)) && lhit)
#else
    if (vlentry.ubit > 1 && lhit)
#endif
      {
        lused = true;
	y += vlentry.ctr;
      }
#endif

    //the bimodal counter is always used
    y += BimCounter;

    if (y == 0)
      my_pred = bmpred;
    else
      {
	my_pred = (y > 0);
      }

    return my_pred;
  }

  // PREDICTOR UPDATE
inline  void update_predictor (branch_info &bi, bool mytaken, unsigned int target)
  {

    taken = mytaken;
    bool misp = (my_pred != taken);

    if (bi.br_flags & BR_CONDITIONAL && pinfo[AP_INDEX - 1].validp)
      {

	bool has_update = false; //whether have updated some gtable prediction counters

#ifdef USE_CBP2
        int cupdate = 0; //number of updates on a correct prediciton
        int wupdate = 0; //number of updates on a wrong prediction

	//check whether used gtable counters have the same prediciton
	bool used_same = true;
	bool first_used = true, used_pred = false;
	
	for (int i = 0; i <= NUM_GTABLE - 1; i++)
	  if (hit[i] && used[i])
	    {
	      if (first_used)
		{
		  used_pred =  (vgentry[i].ctr >= 0);
		  first_used = false;
		}
	      else if (used_pred != (vgentry[i].ctr >= 0))
		{
		  used_same = false;
		  break;
		}
	    }
#endif

	for (int i = 0; i <= NUM_GTABLE - 1; i++)
	  {

	    if (hit[i] && used[i]) //update used entries
	      {
#ifdef USE_CBP2
                if ((!used_same || hitc == 1 || !hard_bm) && gpred[i] == taken && gentry[i]->ubit < 3)
#else
		if (gpred[i] == taken && gentry[i]->ubit < 3)
#endif
		  {
		    gentry[i]->ubit++;
		  }
		
#ifdef USE_CBP2
                else if (hard_bm && (!used_same || hitc == 1) && i == minhit && gpred[i] != taken && gentry[i]->ubit > 0)

#else
		else if (hard_bm && i == minhit && gpred[i] != taken && gentry[i]->ubit > 0)
#endif
		  {
		    gentry[i]->ubit--;
		  }

		//update prediciton counters
		if (((abs(y) < thresupdate) || misp))
		  {
		    gctrupdate (gentry[i]->ctr, taken, i);
		    has_update = true;
#ifdef USE_CBP2
                    if (misp)
                      wupdate ++;
                    else if (abs(y) < thresupdate)
                      cupdate ++;
#endif
		  }
	      }
	  }//for
	
	//update the bimodal counter
	baseupdate (BimIndex, HysIndex, bmpred, taken);

	//steal entries and/or allocate the new entry
	for (int i = NUM_GTABLE - 1; i >= 0; i--)
	  if (misp && !hit[i])
	    {
	      if (victim[i])
		{
		  gentry[i]->tag = gtags[i];
		  gentry[i]->ctr = (taken) ? 3 : -4; //strong initialization
		  gentry[i]->ubit = 0;
		}
	      else if (steal[i])
		{
		  if (gentry[i]->ubit > 0)
		    gentry[i]->ubit--;
		}
	    }

#ifndef GHR_ONLY

	//update the local prediction table
	if (lhit)
	  {
	    if (lpred == taken && misp)
	      {
		if (lentry->ubit < 3)
		  lentry->ubit++;
	      }
	    else if (lpred != taken)
	      {
		if (lentry->ubit > 0)
		  lentry->ubit--;
	      }
#ifdef USE_CBP2
            if (misp || hitc < 2 || lpred != taken || abs(vlentry.ctr) < 8) //reduce the updates on the ltable
#endif
	     //update the prediction counter
	    ctrupdate(lentry->ctr, taken, LHR_CT_SIZE);
	  }
	else if (vlentry.ubit == 0 && misp)
	  {
	    //allocate a new entry
#ifdef USE_CBP2
            if (hitc > 0)
#endif
	      {
		lentry->tag = LocalTag;
#ifdef USE_CBP2
		lentry->ctr = taken ? 1 : -1;
#else
		lentry->ctr = taken ? 3 : -4;
#endif
		lentry->ubit = 1;
	      }
	  }
	else if (misp && lentry->ubit > 0)
#ifdef USE_CBP2
	  if (hitc > 0)
#endif
	    {
	      //steal an entry
	      lentry->ubit --;
	    }
#endif
	
	//threshold fitting
	if (has_update)
	  {
	    if (misp)
	      {
#ifdef USE_CBP2
		TC += wupdate;
#else
		TC ++;
#endif
		if (TC > THRESFIT - 1)
		  {
		    TC = THRESFIT - 1;
		    if (thresupdate < TC_MAX)
		      {
			TC = 0;
			thresupdate++;
		      }
		  }
	      }
	    else if (abs(y) < thresupdate)
	      {
#ifdef USE_CBP2
		TC -= cupdate;
#else
		TC--;
#endif
		if (TC < -THRESFIT)
		  {
		    TC = -THRESFIT;
		    if (thresupdate > 0)
		      {
			thresupdate--; TC=0;
		      }
		  }
	      }
	  }
	//for detection
	num_hitc0 += (hitc == 0) ? 1:0;
	num_miss += misp ? 1:0;
	num_hitc0_miss += (hitc == 0 && misp) ? 1:0;
      }//conditional br

    if (bi.br_flags & BR_CONDITIONAL)
      {
	TICK++;

	if ((TICK & mask_tick) == 0)
	  {
	    phase++;
#ifdef USE_CBP2	    
	    if (!hard_bm && num_miss > 4500 && num_hitc0 < 15000 && num_hitc0_miss > 20)
#else
	    if (!hard_bm && num_miss > 4500 && num_hitc0 < 15000)
#endif
	      {
		hard_bm = true;
	      }
	    else if (hard_bm && num_miss < 3500)
	      {
		hard_bm = false;
	      }

#ifdef USE_CBP2
	    bool ubit_reset = false;

            if (num_miss > 1500 && num_hitc0 > 40000 && phase > 3)
              //a large number of new branches (hitc0 is large) and they are not simple branches (num_miss is large)
              {
                ubit_reset = true;
              }

            int X = (TICK >> TICK_PHASE) & 1;
            if ((X & 1) == 0)
              X = 2;

            if (ubit_reset)
              {
                for (int i = 0; i < NUM_GTABLE; i++)
                  for (int j = 0; j < (1 << gtable[i].size); j++)
                    {
                      gtable[i].entry[j].ubit = gtable[i].entry[j].ubit & X;
                    }
                for (int i = 0; i < (1 << LHRP_SIZE); i++)
                  {
                    ltable[i].ubit = 0;
                  }
              }
#endif

	    num_hitc0 = 0;
	    num_miss = 0;
	    num_hitc0_miss = 0;
	  }//TICK
      }//conditional br


    //update the intermediate path information
    inter_info = (inter_info << 1);
    if (bi.br_flags & BR_CONDITIONAL)
      {
	if (taken ^ (bi.address & 1))
	  inter_info |= 1;
      }
    else if (bi.address & 1)
      inter_info |= 1;

    //update the global history and the path history
    ghist = (ghist << 1);

#ifdef USE_CBP2
    if ((bi.br_flags & BR_CONDITIONAL) && taken)
#else
    if ((!(bi.br_flags & BR_CONDITIONAL)) | (taken))
#endif
      {
	ghist |= (his_t) 1;
      }

    phist = (phist << 1) + (bi.address & 1);

    for (int i = 0; i < NUM_GTABLE; i++)
      {
	gtable[i].ch_i.update (ghist);
	gtable[i].ch_t[0].update (ghist);
	gtable[i].ch_t[1].update (ghist);
      }

    //update the local history
#ifndef GHR_ONLY
    if (bi.br_flags & BR_CONDITIONAL)
      {
	lhr_table[LocalHisIndex] <<= 1;
	if (taken)
	  lhr_table[LocalHisIndex] |= 1;
	lhr_table[LocalHisIndex] &= ((1 << LHR_LEN) - 1);
      }
#endif
  }
}; //CPMPM

//interface to the CBP2 framework
class my_update : public branch_update {
 public:
  unsigned int index;
};

class my_predictor : public branch_predictor {
 public:
  my_update u;
  branch_info bi;
  CPMPM *p;

  my_predictor (void) {
    p = new CPMPM;
  }

  branch_update *predict (branch_info & b) {
    bi = b;
    u.direction_prediction (p->get_prediction(bi));
    u.target_prediction (0);
    return &u;
  }

  void update (branch_update *u, bool taken, unsigned int target) {
    p->update_predictor(bi, taken, target);
  }
};



