*** dbinc/repmgr.h.orig 2009-05-04 10:33:55.000000000 -0400 --- dbinc/repmgr.h 2009-05-04 10:27:26.000000000 -0400 *************** *** 374,379 **** --- 374,380 ---- #define SITE_FROM_EID(eid) (&db_rep->sites[eid]) #define EID_FROM_SITE(s) ((int)((s) - (&db_rep->sites[0]))) #define IS_VALID_EID(e) ((e) >= 0) + #define IS_KNOWN_REMOTE_SITE(e) ((e) >= 0 && ((u_int)(e)) < db_rep->site_cnt) #define SELF_EID INT_MAX #define IS_PEER_POLICY(p) ((p) == DB_REPMGR_ACKS_ALL_PEERS || \ *** rep/rep_elect.c.orig 2009-05-04 10:35:50.000000000 -0400 --- rep/rep_elect.c 2009-05-04 10:31:24.000000000 -0400 *************** *** 33,39 **** static int __rep_fire_elected __P((ENV *, REP *, u_int32_t)); static void __rep_elect_master __P((ENV *, REP *)); static int __rep_tally __P((ENV *, REP *, int, u_int32_t *, u_int32_t, roff_t)); ! static int __rep_wait __P((ENV *, db_timeout_t *, int *, int, u_int32_t)); /* * __rep_elect -- --- 33,39 ---- static int __rep_fire_elected __P((ENV *, REP *, u_int32_t)); static void __rep_elect_master __P((ENV *, REP *)); static int __rep_tally __P((ENV *, REP *, int, u_int32_t *, u_int32_t, roff_t)); ! static int __rep_wait __P((ENV *, db_timeout_t *, int, u_int32_t)); /* * __rep_elect -- *************** *** 55,61 **** ENV *env; LOG *lp; REP *rep; ! int done, eid, elected, full_elect, locked, in_progress, need_req; int ret, send_vote, t_ret; u_int32_t ack, ctlflags, egen, nsites, orig_tally, priority, realpri; u_int32_t tiebreaker; --- 55,61 ---- ENV *env; LOG *lp; REP *rep; ! int done, elected, full_elect, locked, in_progress, need_req; int ret, send_vote, t_ret; u_int32_t ack, ctlflags, egen, nsites, orig_tally, priority, realpri; u_int32_t tiebreaker; *************** *** 181,188 **** REP_SYSTEM_UNLOCK(env); (void)__rep_send_message(env, DB_EID_BROADCAST, REP_MASTER_REQ, NULL, NULL, 0, 0); ! ret = __rep_wait(env, &to, &eid, ! 0, REP_F_EPHASE0); REP_SYSTEM_LOCK(env); F_CLR(rep, REP_F_EPHASE0); switch (ret) { --- 181,187 ---- REP_SYSTEM_UNLOCK(env); (void)__rep_send_message(env, DB_EID_BROADCAST, REP_MASTER_REQ, NULL, NULL, 0, 0); ! ret = __rep_wait(env, &to, 0, REP_F_EPHASE0); REP_SYSTEM_LOCK(env); F_CLR(rep, REP_F_EPHASE0); switch (ret) { *************** *** 286,296 **** REP_SYSTEM_LOCK(env); goto vote; } ! ret = __rep_wait(env, &to, &eid, full_elect, REP_F_EPHASE1); switch (ret) { case 0: /* Check if election complete or phase complete. */ ! if (eid != DB_EID_INVALID && !IN_ELECTION(rep)) { RPRINT(env, DB_VERB_REP_ELECT, (env, "Ended election phase 1")); goto edone; --- 285,295 ---- REP_SYSTEM_LOCK(env); goto vote; } ! ret = __rep_wait(env, &to, full_elect, REP_F_EPHASE1); switch (ret) { case 0: /* Check if election complete or phase complete. */ ! if (!IN_ELECTION(rep)) { RPRINT(env, DB_VERB_REP_ELECT, (env, "Ended election phase 1")); goto edone; *************** *** 398,412 **** REP_SYSTEM_LOCK(env); goto i_won; } ! ret = __rep_wait(env, &to, &eid, full_elect, REP_F_EPHASE2); RPRINT(env, DB_VERB_REP_ELECT, (env, "Ended election phase 2 %d", ret)); switch (ret) { case 0: ! if (eid != DB_EID_INVALID) ! goto edone; ! ret = DB_REP_UNAVAIL; ! break; case DB_REP_EGENCHG: if (to > timeout) to = timeout; --- 397,408 ---- REP_SYSTEM_LOCK(env); goto i_won; } ! ret = __rep_wait(env, &to, full_elect, REP_F_EPHASE2); RPRINT(env, DB_VERB_REP_ELECT, (env, "Ended election phase 2 %d", ret)); switch (ret) { case 0: ! goto edone; case DB_REP_EGENCHG: if (to > timeout) to = timeout; *************** *** 1050,1062 **** ENV *env; REP *rep; { - /* - * We often come through here twice, sometimes even more. We mustn't - * let the redundant calls affect stats counting. But rep_elect relies - * on this first part for setting eidp. - */ - rep->master_id = rep->eid; - if (F_ISSET(rep, REP_F_MASTERELECT | REP_F_MASTER)) { /* We've been through here already; avoid double counting. */ return; --- 1046,1051 ---- *************** *** 1093,1102 **** (timeout > 5000000) ? 500000 : ((timeout >= 10) ? timeout / 10 : 1); static int ! __rep_wait(env, timeoutp, eidp, full_elect, flags) ENV *env; db_timeout_t *timeoutp; ! int *eidp, full_elect; u_int32_t flags; { DB_REP *db_rep; --- 1082,1091 ---- (timeout > 5000000) ? 500000 : ((timeout >= 10) ? timeout / 10 : 1); static int ! __rep_wait(env, timeoutp, full_elect, flags) ENV *env; db_timeout_t *timeoutp; ! int full_elect; u_int32_t flags; { DB_REP *db_rep; *************** *** 1174,1180 **** F_CLR(rep, REP_F_EGENUPDATE); ret = DB_REP_EGENCHG; } else if (phase_over) { - *eidp = rep->master_id; done = 1; ret = 0; } --- 1163,1168 ---- *** repmgr/repmgr_net.c.orig 2009-05-04 10:34:46.000000000 -0400 --- repmgr/repmgr_net.c 2009-05-04 10:27:26.000000000 -0400 *************** *** 100,105 **** --- 100,107 ---- control, rec, &nsites_sent, &npeers_sent)) != 0) goto out; } else { + DB_ASSERT(env, IS_KNOWN_REMOTE_SITE(eid)); + /* * If this is a request that can be sent anywhere, then see if * we can send it to our peer (to save load on the master), but