Skip to content

Commit

Permalink
EDAC, i7core: Remove double buffering of error records
Browse files Browse the repository at this point in the history
In the bad old days the functions from x86_mce_decoder_chain could be
called in machine check context. So we used to carefully copy them and
defer processing until later. But in

  f29a7af ("x86/mce: Avoid potential deadlock due to printk() in MCE context")

we switched the logging code to save the record in a genpool, and call
the functions that registered to be notified later from a work queue.

So drop all the double buffering and do all the work we want to do as
soon as i7core_mce_check_error() is called.

Signed-off-by: Tony Luck <[email protected]>
Acked-by: Mauro Carvalho Chehab <[email protected]>
Cc: linux-edac <[email protected]>
Link: http://lkml.kernel.org/r/29ab2c370915c6e132fc5d88e7b72cb834bedbfe.1461855008.git.tony.luck@intel.com
Signed-off-by: Borislav Petkov <[email protected]>
  • Loading branch information
aegl authored and suryasaimadhu committed Apr 29, 2016
1 parent de0336b commit 5359534
Showing 1 changed file with 5 additions and 76 deletions.
81 changes: 5 additions & 76 deletions drivers/edac/i7core_edac.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,16 +271,6 @@ struct i7core_pvt {

bool is_registered, enable_scrub;

/* Fifo double buffers */
struct mce mce_entry[MCE_LOG_LEN];
struct mce mce_outentry[MCE_LOG_LEN];

/* Fifo in/out counters */
unsigned mce_in, mce_out;

/* Count indicator to show errors not got */
unsigned mce_overrun;

/* DCLK Frequency used for computing scrub rate */
int dclk_freq;

Expand Down Expand Up @@ -1792,69 +1782,24 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
* i7core_check_error Retrieve and process errors reported by the
* hardware. Called by the Core module.
*/
static void i7core_check_error(struct mem_ctl_info *mci)
static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
{
struct i7core_pvt *pvt = mci->pvt_info;
int i;
unsigned count = 0;
struct mce *m;

/*
* MCE first step: Copy all mce errors into a temporary buffer
* We use a double buffering here, to reduce the risk of
* losing an error.
*/
smp_rmb();
count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
% MCE_LOG_LEN;
if (!count)
goto check_ce_error;

m = pvt->mce_outentry;
if (pvt->mce_in + count > MCE_LOG_LEN) {
unsigned l = MCE_LOG_LEN - pvt->mce_in;

memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
smp_wmb();
pvt->mce_in = 0;
count -= l;
m += l;
}
memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
smp_wmb();
pvt->mce_in += count;

smp_rmb();
if (pvt->mce_overrun) {
i7core_printk(KERN_ERR, "Lost %d memory errors\n",
pvt->mce_overrun);
smp_wmb();
pvt->mce_overrun = 0;
}

/*
* MCE second step: parse errors and display
*/
for (i = 0; i < count; i++)
i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
i7core_mce_output_error(mci, m);

/*
* Now, let's increment CE error counts
*/
check_ce_error:
if (!pvt->is_registered)
i7core_udimm_check_mc_ecc_err(mci);
else
i7core_rdimm_check_mc_ecc_err(mci);
}

/*
* i7core_mce_check_error Replicates mcelog routine to get errors
* This routine simply queues mcelog errors, and
* return. The error itself should be handled later
* by i7core_check_error.
* WARNING: As this routine should be called at NMI time, extra care should
* be taken to avoid deadlocks, and to be as fast as possible.
* Check that logging is enabled and that this is the right type
* of error for us to handle.
*/
static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
Expand Down Expand Up @@ -1882,21 +1827,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
if (mce->bank != 8)
return NOTIFY_DONE;

smp_rmb();
if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
smp_wmb();
pvt->mce_overrun++;
return NOTIFY_DONE;
}

/* Copy memory error at the ringbuffer */
memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
smp_wmb();
pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;

/* Handle fatal errors immediately */
if (mce->mcgstatus & 1)
i7core_check_error(mci);
i7core_check_error(mci, mce);

/* Advise mcelog that the errors were handled */
return NOTIFY_STOP;
Expand Down Expand Up @@ -2243,8 +2174,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
get_dimm_config(mci);
/* record ptr to the generic device */
mci->pdev = &i7core_dev->pdev[0]->dev;
/* Set the function pointer to an actual operation function */
mci->edac_check = i7core_check_error;

/* Enable scrubrate setting */
if (pvt->enable_scrub)
Expand Down

0 comments on commit 5359534

Please sign in to comment.