svn commit: r256465 - projects/zfsd/head/cddl/sbin/zfsd
Alan Somers
asomers at FreeBSD.org
Mon Oct 14 21:59:33 UTC 2013
Author: asomers
Date: Mon Oct 14 21:59:31 2013
New Revision: 256465
URL: http://svnweb.freebsd.org/changeset/base/256465
Log:
zfsd should activate a spare that is newly added to an already-degraded pool
cddl/sbin/zfsd/case_file.cc
cddl/sbin/zfsd/case_file.h
cddl/sbin/zfsd/dev_ctl_event.cc
When a config_sync event arrives, reevaluate all open
casefiles on the same pool.
cddl/sbin/zfsd/case_file.cc
When evaluating a config_sync event, try to activate spares
if our pool is unhealthy. This is necessary because we
don't get any more specific event when a spare is added to a
pool. This required adding the CaseFileReEvaluator functor
and the CaseFile::ReEvaluateByGuid method.
cddl/sbin/zfsd/case_file.cc
When processing a statechange event, refresh the vdev state.
This is necessary because we could be reevaluating an
already-existing casefile that was opened before the pool
became degraded. Also, always consume the event, even if we
couldn't activate the spare. It's no longer necessary to
queue the statechange event since we can now reevaluate
CaseFiles on a config_sync event.
cddl/sbin/zfsd/case_file.cc
In CloseIfSolved(), don't close casefiles for DEGRADED or
FAULTED vdevs since we now have the ability to fix them.
cddl/sbin/zfsd/dev_ctl_event.cc
cddl/sbin/zfsd/dev_ctl_event.h
cddl/sbin/zfsd/zfsd.cc
Redelegated responsibility for queueing events from
DevCtlEvent(and its child classes) to ZfsDaemon.
cddl/sbin/zfsd/zfsd.cc
In the SIGINFO handler, print unconsumed events as well as
saved casefiles.
Submitted by: alans
Approved by: ken (mentor)
Sponsored by: Spectra Logic Corporation
Modified:
projects/zfsd/head/cddl/sbin/zfsd/case_file.cc
projects/zfsd/head/cddl/sbin/zfsd/case_file.h
projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.cc
projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.h
projects/zfsd/head/cddl/sbin/zfsd/zfsd.cc
Modified: projects/zfsd/head/cddl/sbin/zfsd/case_file.cc
==============================================================================
--- projects/zfsd/head/cddl/sbin/zfsd/case_file.cc Mon Oct 14 21:53:45 2013 (r256464)
+++ projects/zfsd/head/cddl/sbin/zfsd/case_file.cc Mon Oct 14 21:59:31 2013 (r256465)
@@ -41,6 +41,7 @@
#include <dirent.h>
#include <iomanip>
#include <fstream>
+#include <functional>
#include <sstream>
#include <syslog.h>
#include <unistd.h>
@@ -59,6 +60,27 @@ using std::stringstream;
using std::setfill;
using std::setw;
+/*-------------------------- File-scoped classes ----------------------------*/
+/**
+ * \brief Functor that operators on STL collections of CaseFiles
+ * Selectively calls ReEvaluate on the casefile, based on its pool GUID.
+ */
+class CaseFileReEvaluator : public std::unary_function<CaseFile, bool>
+{
+public:
+ CaseFileReEvaluator(Guid guid, const ZfsEvent &event) :
+ m_poolGUID(guid), m_event(event) {};
+ void operator() (CaseFile *casefile) {
+ if (m_poolGUID == casefile->PoolGUID())
+ casefile->ReEvaluate(m_event);
+ }
+private:
+ Guid m_poolGUID;
+ const ZfsEvent &m_event;
+};
+
+
+
/*--------------------------------- CaseFile ---------------------------------*/
//- CaseFile Static Data -------------------------------------------------------
CaseFileList CaseFile::s_activeCases;
@@ -99,6 +121,14 @@ CaseFile::Find(const string &physPath)
return (NULL);
}
+
+void
+CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
+{
+ CaseFileReEvaluator reevaluator(poolGUID, event);
+ std::for_each(s_activeCases.begin(), s_activeCases.end(), reevaluator);
+}
+
CaseFile &
CaseFile::Create(Vdev &vdev)
{
@@ -307,6 +337,11 @@ CaseFile::ReEvaluate(const ZfsEvent &eve
return (/*consumed*/true);
}
+ else if (event.Value("type") == "misc.fs.zfs.config_sync") {
+ if (VdevState() < VDEV_STATE_HEALTHY)
+ consumed = ActivateSpare();
+ }
+
if (event.Value("class") == "resource.fs.zfs.removed") {
bool spare_activated;
@@ -361,15 +396,15 @@ CaseFile::ReEvaluate(const ZfsEvent &eve
*/
consumed = spare_activated;
} else if (event.Value("class") == "resource.fs.zfs.statechange") {
+ RefreshVdevState();
/*
* If this vdev is DEGRADED or FAULTED, try to activate a
* hotspare. Otherwise, ignore the event
*/
if (VdevState() == VDEV_STATE_FAULTED ||
VdevState() == VDEV_STATE_DEGRADED)
- consumed = ActivateSpare();
- else
- consumed = true;
+ (void) ActivateSpare();
+ consumed = true;
}
else if (event.Value("class") == "ereport.fs.zfs.io" ||
event.Value("class") == "ereport.fs.zfs.checksum") {
@@ -513,10 +548,31 @@ CaseFile::CloseIfSolved()
* retain these cases so that any spares added in
* the future can be applied to them.
*/
- if (VdevState() > VDEV_STATE_CANT_OPEN
- && VdevState() <= VDEV_STATE_HEALTHY) {
+ switch (VdevState()) {
+ case VDEV_STATE_HEALTHY:
+ /* No need to keep cases for healthy vdevs */
Close();
return (true);
+ case VDEV_STATE_REMOVED:
+ case VDEV_STATE_CANT_OPEN:
+ /*
+ * Keep open. We may solve it with a newly inserted
+ * device.
+ */
+ case VDEV_STATE_FAULTED:
+ case VDEV_STATE_DEGRADED:
+ /*
+ * Keep open. We may solve it with the future
+ * addition of a spare to the pool
+ */
+ case VDEV_STATE_UNKNOWN:
+ case VDEV_STATE_CLOSED:
+ case VDEV_STATE_OFFLINE:
+ /*
+ * Keep open? This may not be the correct behavior,
+ * but it's what we've always done
+ */
+ ;
}
/*
Modified: projects/zfsd/head/cddl/sbin/zfsd/case_file.h
==============================================================================
--- projects/zfsd/head/cddl/sbin/zfsd/case_file.h Mon Oct 14 21:53:45 2013 (r256464)
+++ projects/zfsd/head/cddl/sbin/zfsd/case_file.h Mon Oct 14 21:59:31 2013 (r256465)
@@ -109,6 +109,14 @@ public:
static CaseFile *Find(const string &physPath);
/**
+ * \brief ReEvaluate all open cases whose pool guid matches the argument
+ *
+ * \param poolGUID Only reevaluate cases for this pool
+ * \param event Try to consume this event with the casefile
+ */
+ static void ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event);
+
+ /**
* \brief Create or return an existing active CaseFile for the
* specified vdev.
*
@@ -163,12 +171,13 @@ public:
/**
* \brief Update this CaseFile in light of the provided ZfsEvent.
+ * Must be virtual so it can be overridden in the unit tests
*
* \param event The ZfsEvent to evaluate.
*
* \return True if this event was consumed by this CaseFile.
*/
- bool ReEvaluate(const ZfsEvent &event);
+ virtual bool ReEvaluate(const ZfsEvent &event);
/**
* \brief Register an itimer callout for the given event, if necessary
Modified: projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.cc
==============================================================================
--- projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.cc Mon Oct 14 21:53:45 2013 (r256464)
+++ projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.cc Mon Oct 14 21:59:31 2013 (r256465)
@@ -248,9 +248,10 @@ DevCtlEvent::~DevCtlEvent()
delete &m_nvPairs;
}
-void
+bool
DevCtlEvent::Process() const
{
+ return (false);
}
timeval
@@ -516,7 +517,7 @@ DevfsEvent::DeepCopy() const
return (new DevfsEvent(*this));
}
-void
+bool
DevfsEvent::Process() const
{
/*
@@ -527,7 +528,7 @@ DevfsEvent::Process() const
if (Value("type") != "CREATE"
|| Value("subsystem") != "CDEV"
|| !IsDiskDev(devName))
- return;
+ return (false);
/* Log the event since it is of interest. */
Log(LOG_INFO);
@@ -535,7 +536,7 @@ DevfsEvent::Process() const
string devPath(_PATH_DEV + devName);
int devFd(open(devPath.c_str(), O_RDONLY));
if (devFd == -1)
- return;
+ return (false);
/* Normalize the device name in case the DEVFS event is for a link. */
devName = fdevname(devFd);
@@ -572,6 +573,7 @@ DevfsEvent::Process() const
}
if (devLabel != NULL)
nvlist_free(devLabel);
+ return (false);
}
//- DevfsEvent Protected Methods -----------------------------------------------
@@ -602,7 +604,7 @@ ZfsEvent::DeepCopy() const
return (new ZfsEvent(*this));
}
-void
+bool
ZfsEvent::Process() const
{
string logstr("");
@@ -610,31 +612,33 @@ ZfsEvent::Process() const
if (!Contains("class") && !Contains("type")) {
syslog(LOG_ERR,
"ZfsEvent::Process: Missing class or type data.");
- return;
+ return (false);
}
/* On config syncs, replay any queued events first. */
- if (Value("type").find("misc.fs.zfs.config_sync") == 0)
+ if (Value("type").find("misc.fs.zfs.config_sync") == 0) {
ZfsDaemon::ReplayUnconsumedEvents();
+ CaseFile::ReEvaluateByGuid(PoolGUID(), *this);
+ }
Log(LOG_INFO);
if (Value("type").find("misc.fs.zfs.") == 0) {
/* Configuration changes, resilver events, etc. */
ProcessPoolEvent();
- return;
+ return (false);
}
if (!Contains("pool_guid") || !Contains("vdev_guid")) {
/* Only currently interested in Vdev related events. */
- return;
+ return (false);
}
CaseFile *caseFile(CaseFile::Find(PoolGUID(), VdevGUID()));
if (caseFile != NULL) {
syslog(LOG_INFO, "Evaluating existing case file\n");
caseFile->ReEvaluate(*this);
- return;
+ return (false);
}
/* Skip events that can't be handled. */
@@ -645,7 +649,7 @@ ZfsEvent::Process() const
msg << "No replicas available for pool " << poolGUID;
msg << ", ignoring";
syslog(LOG_INFO, "%s", msg.str().c_str());
- return;
+ return (false);
}
/*
@@ -655,40 +659,38 @@ ZfsEvent::Process() const
ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
if (zpl.empty()) {
stringstream msg;
- bool queued = ZfsDaemon::SaveEvent(*this);
- int priority = queued ? LOG_INFO : LOG_ERR;
+ int priority = LOG_INFO;
msg << "ZfsEvent::Process: Event for unknown pool ";
msg << poolGUID << " ";
- msg << (queued ? "queued" : "dropped");
+ msg << "queued";
syslog(priority, "%s", msg.str().c_str());
- return;
+ return (true);
}
nvlist_t *vdevConfig = VdevIterator(zpl.front()).Find(VdevGUID());
if (vdevConfig == NULL) {
stringstream msg;
- bool queued = ZfsDaemon::SaveEvent(*this);
- int priority = queued ? LOG_INFO : LOG_ERR;
+ int priority = LOG_INFO;
msg << "ZfsEvent::Process: Event for unknown vdev ";
msg << VdevGUID() << " ";
- msg << (queued ? "queued" : "dropped");
+ msg << "queued";
syslog(priority, "%s", msg.str().c_str());
- return;
+ return (true);
}
Vdev vdev(zpl.front(), vdevConfig);
caseFile = &CaseFile::Create(vdev);
if ( caseFile->ReEvaluate(*this) == false) {
stringstream msg;
- bool queued = ZfsDaemon::SaveEvent(*this);
- int priority = queued ? LOG_INFO : LOG_ERR;
+ int priority = LOG_INFO;
msg << "ZfsEvent::Process: Unconsumed event for vdev(";
msg << zpool_get_name(zpl.front()) << ",";
msg << vdev.GUID() << ") ";
- msg << (queued ? "queued" : "dropped");
+ msg << "queued";
syslog(priority, "%s", msg.str().c_str());
- return;
+ return (true);
}
+ return (false);
}
//- ZfsEvent Protected Methods -------------------------------------------------
Modified: projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.h
==============================================================================
--- projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.h Mon Oct 14 21:53:45 2013 (r256464)
+++ projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.h Mon Oct 14 21:59:31 2013 (r256465)
@@ -283,8 +283,9 @@ public:
/**
* Interpret and perform any actions necessary to
* consume the event.
+ * \return True if this event should be queued for later reevaluation
*/
- virtual void Process() const;
+ virtual bool Process() const;
/**
* Get the time that the event was created
@@ -405,8 +406,9 @@ public:
/**
* Interpret and perform any actions necessary to
* consume the event.
+ * \return True if this event should be queued for later reevaluation
*/
- virtual void Process() const;
+ virtual bool Process() const;
protected:
/**
@@ -479,8 +481,9 @@ public:
/**
* Interpret and perform any actions necessary to
* consume the event.
+ * \return True if this event should be queued for later reevaluation
*/
- virtual void Process() const;
+ virtual bool Process() const;
const string &PoolName() const;
Guid PoolGUID() const;
Modified: projects/zfsd/head/cddl/sbin/zfsd/zfsd.cc
==============================================================================
--- projects/zfsd/head/cddl/sbin/zfsd/zfsd.cc Mon Oct 14 21:53:45 2013 (r256464)
+++ projects/zfsd/head/cddl/sbin/zfsd/zfsd.cc Mon Oct 14 21:59:31 2013 (r256465)
@@ -468,6 +468,12 @@ ZfsDaemon::ReplayUnconsumedEvents()
if (replayed_any)
syslog(LOG_INFO, "Started replaying unconsumed events");
while (event != s_unconsumedEvents.end()) {
+ /*
+ * Even if the event is unconsumed the second time around, drop
+ * it. By now we should've gotten a config_sync so any events
+ * that still can't be consumed are probably referring to vdevs
+ * or pools that no longer exist.
+ */
(*event)->Process();
delete *event;
s_unconsumedEvents.erase(event++);
@@ -494,7 +500,8 @@ ZfsDaemon::ProcessEvents(EventBuffer &ev
while (eventBuffer.ExtractEvent(evString)) {
DevCtlEvent *event(DevCtlEvent::CreateEvent(evString));
if (event != NULL) {
- event->Process();
+ if (event->Process())
+ SaveEvent(*event);
delete event;
}
}
@@ -597,7 +604,8 @@ ZfsDaemon::RescanSystem()
string evString(evStart + pp->lg_name + "\n");
event = DevCtlEvent::CreateEvent(evString);
if (event != NULL)
- event->Process();
+ if (event->Process())
+ SaveEvent(*event);
delete event;
}
}
@@ -639,8 +647,12 @@ ZfsDaemon::EventLoop()
int result;
if (s_logCaseFiles == true) {
+ DevCtlEventList::iterator
+ event(s_unconsumedEvents.begin());
s_logCaseFiles = false;
CaseFile::LogAll();
+ while (event != s_unconsumedEvents.end())
+ (*event++)->Log(LOG_INFO);
}
Callout::ExpireCallouts();
More information about the svn-src-projects
mailing list