svn commit: r224922 - in projects/zfsd/head: cddl/sbin cddl/sbin/zfsd etc/defaults etc/mtree etc/rc.d

Justin T. Gibbs gibbs at FreeBSD.org
Tue Aug 16 23:47:53 UTC 2011


Author: gibbs
Date: Tue Aug 16 23:47:53 2011
New Revision: 224922
URL: http://svn.freebsd.org/changeset/base/224922

Log:
  Add ZFSD, a ZFS fault management daemon.  This daemon has the following
  features:
  
   o When a vdev for an active pool is inserted into the system, it will
     re-integrate it with the pool.
   o When an unlabeled or inactive disk is inserted into the same physical
     location as a missing member of a pool with the "autoreplace" attribute
     set, the inserted disk will be used to replace the missing disk.
   o When the cumulative soft-error count for a vdev exceeds 50 errors, the
     vdev will be marked degraded, alerting users to a potential problem.
     The error counts are persisted across reboots.
  
  The daemon is written to be easily extended for more advanced fault
  management policies and to handle new features such as spare pool
  management.
  
  cddl/sbin/zfsd/zpool_list.cc:
  cddl/sbin/zfsd/zpool_list.h:
  	ZpoolList is a standard container allowing filtering and
  	iteration of imported ZFS pool information.
  
  cddl/sbin/zfsd/callout.cc:
  cddl/sbin/zfsd/callout.h:
  	Timer services built on top of the POSIX interval timer API.
  
  cddl/sbin/zfsd/vdev.cc:
  cddl/sbin/zfsd/vdev.h:
  	Wrapper class used to provide easy access to Vdev nvlist data.
  
  cddl/sbin/zfsd/zfsd.cc:
  cddl/sbin/zfsd/zfsd.h:
  	Daemon main, devctl socket handling, and global application state
  	exported through the ZfsDaemon singleton.
  
  cddl/sbin/zfsd/case_file.cc:
  cddl/sbin/zfsd/case_file.h:
  	CaseFile objects aggregate vdev faults that may require ZFSD action
  	in order to maintain the health of a ZFS pool.  They also handle
  	serialization/deserialization of fault data to persistent
  	storage.
  
  cddl/sbin/zfsd/vdev_iterator.cc:
  cddl/sbin/zfsd/vdev_iterator.h:
  	Helper class for traversing and finding vdev objects within a
  	pool configuration.
  
  cddl/sbin/zfsd/dev_ctl_event.cc:
  cddl/sbin/zfsd/dev_ctl_event.h:
  	Class hierarchy used to express events received via the devctl API.
  
  cddl/sbin/zfsd/zfsd_exception.cc:
  cddl/sbin/zfsd/zfsd_exception.h:
  	Definition of exceptions explicitly thrown by ZFSD.
  
  cddl/sbin/zfsd/Makefile:
  cddl/sbin/Makefile:
  	Add zfsd to the build.
  
  etc/rc.d/zfsd:
  	Rc script for ZFSD.
  
  etc/defaults/rc.conf:
  	ZFSD defaults, just like ZFS, to being disabled.
  
  etc/mtree/BSD.root.dist:
  	Create the etc/zfs/cases directory used to store persistent
  	fault data.
  
  Sponsored by:	Spectra Logic Corporation

Added:
  projects/zfsd/head/cddl/sbin/zfsd/
  projects/zfsd/head/cddl/sbin/zfsd/Makefile
  projects/zfsd/head/cddl/sbin/zfsd/callout.cc
  projects/zfsd/head/cddl/sbin/zfsd/callout.h
  projects/zfsd/head/cddl/sbin/zfsd/case_file.cc
  projects/zfsd/head/cddl/sbin/zfsd/case_file.h
  projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.cc
  projects/zfsd/head/cddl/sbin/zfsd/dev_ctl_event.h
  projects/zfsd/head/cddl/sbin/zfsd/vdev.cc
  projects/zfsd/head/cddl/sbin/zfsd/vdev.h
  projects/zfsd/head/cddl/sbin/zfsd/vdev_iterator.cc
  projects/zfsd/head/cddl/sbin/zfsd/vdev_iterator.h
  projects/zfsd/head/cddl/sbin/zfsd/zfsd.cc
  projects/zfsd/head/cddl/sbin/zfsd/zfsd.h
  projects/zfsd/head/cddl/sbin/zfsd/zfsd_exception.cc
  projects/zfsd/head/cddl/sbin/zfsd/zfsd_exception.h
  projects/zfsd/head/cddl/sbin/zfsd/zpool_list.cc
  projects/zfsd/head/cddl/sbin/zfsd/zpool_list.h
  projects/zfsd/head/etc/rc.d/zfsd
Modified:
  projects/zfsd/head/cddl/sbin/Makefile
  projects/zfsd/head/etc/defaults/rc.conf
  projects/zfsd/head/etc/mtree/BSD.root.dist

Modified: projects/zfsd/head/cddl/sbin/Makefile
==============================================================================
--- projects/zfsd/head/cddl/sbin/Makefile	Tue Aug 16 22:33:05 2011	(r224921)
+++ projects/zfsd/head/cddl/sbin/Makefile	Tue Aug 16 23:47:53 2011	(r224922)
@@ -2,11 +2,14 @@
 
 .include <bsd.own.mk>
 
-SUBDIR=	${_zfs} ${_zpool}
+SUBDIR=	${_zfs} ${_zpool} ${_zfsd}
 
 .if ${MK_ZFS} != "no"
 _zfs=	zfs
 _zpool=	zpool
+. if ${MK_CXX} != "no"
+_zfsd=	zfsd
+. endif
 .endif
 
 .include <bsd.subdir.mk>

Added: projects/zfsd/head/cddl/sbin/zfsd/Makefile
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/zfsd/head/cddl/sbin/zfsd/Makefile	Tue Aug 16 23:47:53 2011	(r224922)
@@ -0,0 +1,49 @@
+# $FreeBSD$
+
+PROG_CXX=	zfsd
+SRCS=		callout.cc		\
+		case_file.cc		\
+		dev_ctl_event.cc	\
+		vdev.cc			\
+		vdev_iterator.cc	\
+		zfsd.cc			\
+		zfsd_exception.cc	\
+		zpool_list.cc
+
+NO_MAN=		YES
+
+WARNS?=		0
+
+INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
+INCFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include
+INCFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/lib/libumem
+INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
+INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
+INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common
+INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libumem/common
+INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
+INCFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
+INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
+INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
+INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
+INCFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
+
+CFLAGS= -g -DNEED_SOLARIS_BOOLEAN ${INCFLAGS}
+
+#NO_SHARED?=	YES
+
+DPADD=  ${LIBZFS} ${LIBUTIL} ${LIBGEOM} ${LIBBSDXML} ${LIBSBUF} \
+	${LIBNVPAIR} ${LIBUUTIL}
+LDADD=  -lzfs -lutil -lgeom -lbsdxml -lsbuf -lnvpair -luutil
+
+#DPADD=  ${LIBAVL} ${LIBZFS} ${LIBGEOM} ${LIBBSDXML} ${LIBSBUF} \
+#        ${LIBM} ${LIBNVPAIR} ${LIBUUTIL} ${LIBUTIL}
+#LDADD=  -lavl -lzfs -lgeom -lbsdxml -lsbuf \
+#        -lm -lnvpair -luutil -lutil
+
+cscope:
+	find ${.CURDIR} -type f -a \( -name "*.[ch]" -o -name "*.cc" \) \
+	     > ${.CURDIR}/cscope.files
+	cd ${.CURDIR} && cscope -buq ${INCFLAGS}
+
+.include <bsd.prog.mk>

Added: projects/zfsd/head/cddl/sbin/zfsd/callout.cc
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/zfsd/head/cddl/sbin/zfsd/callout.cc	Tue Aug 16 23:47:53 2011	(r224922)
@@ -0,0 +1,162 @@
+/*-
+ * Copyright (c) 2011 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ */
+
+#include <signal.h>
+#include <syslog.h>
+
+#include "callout.h"
+#include "zfsd.h"
+#include "zfsd_exception.h"
+
+std::list<Callout *> Callout::s_activeCallouts;
+bool		     Callout::s_alarmFired(false);
+
+void
+Callout::Init()
+{
+	signal(SIGALRM,  Callout::AlarmSignalHandler);
+}
+
+inline bool
+Callout::Stop()
+{
+	if (!IsPending())
+		return (false);
+
+	for (std::list<Callout *>::iterator it(s_activeCallouts.begin());
+	     it != s_activeCallouts.end(); it++) {
+		if (*it != this)
+			continue;
+
+		it = s_activeCallouts.erase(it);
+		if (it != s_activeCallouts.end()) {
+
+			/*
+			 * Maintain correct interval for the
+			 * callouts that follow the just removed
+			 * entry.
+			 */
+			timeradd(&(*it)->m_interval, &m_interval,
+				 &(*it)->m_interval);
+		}
+		break;
+	}
+	m_pending = false;
+	return (true);
+}
+
+bool
+Callout::Reset(const timeval &interval, CalloutFunc_t *func, void *arg)
+{
+	bool cancelled(false);
+
+	if (!timerisset(&interval))
+		throw ZfsdException("Callout::Reset: interval of 0");
+
+	cancelled = Stop();
+
+	m_interval = interval;
+	m_func     = func;
+	m_arg      = arg;
+	m_pending  = true;
+
+	std::list<Callout *>::iterator it(s_activeCallouts.begin());
+	for (; it != s_activeCallouts.end(); it++) {
+
+		if (timercmp(&(*it)->m_interval, &m_interval, <=)) {
+			/*
+			 * Decrease our interval by those that come
+			 * before us.
+			 */
+			timersub(&m_interval, &(*it)->m_interval, &m_interval);
+		} else {
+			/*
+			 * Account for the time between the newly
+			 * inserted event and those that follow.
+			 */
+			timersub(&(*it)->m_interval, &m_interval,
+				 &(*it)->m_interval);
+			break;
+		}
+	}
+	s_activeCallouts.insert(it, this);
+
+
+	if (s_activeCallouts.front() == this) {
+		itimerval timerval = { {0, 0}, m_interval };
+
+		setitimer(ITIMER_REAL, &timerval, NULL);
+	}
+
+	return (cancelled);
+}
+
+void
+Callout::AlarmSignalHandler(int)
+{
+	s_alarmFired = true;
+	ZfsDaemon::WakeEventLoop();
+}
+
+void
+Callout::ExpireCallouts()
+{
+	if (!s_alarmFired)
+		return;
+
+	s_alarmFired = false;
+	if (s_activeCallouts.empty()) {
+		/* Callout removal/SIGALRM race was lost. */
+		return;
+	}
+
+	/*
+	 * Expire the first callout (the one we used to set the
+	 * interval timer) as well as any callouts following that
+	 * expire at the same time (have a zero interval from
+	 * the callout before it).
+	 */
+	do {
+		Callout *cur(s_activeCallouts.front());
+		s_activeCallouts.pop_front();
+		cur->m_pending = false;
+		cur->m_func(cur->m_arg);
+	} while (!s_activeCallouts.empty()
+	      && timerisset(&s_activeCallouts.front()->m_interval) == 0);
+
+	if (!s_activeCallouts.empty()) {
+		Callout *next(s_activeCallouts.front());
+		itimerval timerval = { { 0, 0 }, next->m_interval };
+
+		setitimer(ITIMER_REAL, &timerval, NULL);
+	}
+}

Added: projects/zfsd/head/cddl/sbin/zfsd/callout.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/zfsd/head/cddl/sbin/zfsd/callout.h	Tue Aug 16 23:47:53 2011	(r224922)
@@ -0,0 +1,170 @@
+/*-
+ * Copyright (c) 2011 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ */
+
+/**
+ * \file callout.h
+ *
+ * \brief Interface for timer based callback services.
+ */
+
+#ifndef _CALLOUT_H_
+#define _CALLOUT_H_
+
+#include <sys/time.h>
+
+#include <list>
+
+/**
+ * \brief Type of the function callback from a Callout.
+ */
+typedef void CalloutFunc_t(void *);
+
+/**
+ * \brief Interface to a schedulable one-shot timer with the granlarity
+ *        of the system clock (see setitimer(2)).
+ *
+ * Determination of callback expiration is triggered by the SIGALRM
+ * signal.  Callout callbacks are always delivered from Zfsd's event
+ * processing loop.
+ *
+ * Periodic actions can be triggered via the Callout mechanisms by
+ * resetting the Callout from within its callback.
+ */
+class Callout
+{
+public:
+
+	/**
+	 * Initialize the Callout subsystem.
+	 */
+	static void Init();
+
+	/**
+	 * Function called (via SIGALRM) when our interval
+	 * timer expires.
+	 */
+	static void AlarmSignalHandler(int);
+
+	/**
+	 * Execute callbacks for all callouts that have the same
+	 * expiration time as the first callout in the list.
+	 */
+	static void ExpireCallouts();
+
+	/** Constructor. */
+	Callout();
+
+	/**
+	 * returns true if callout has not been stopped,
+	 * or deactivated since the last time the callout was
+	 * reset.
+	 */
+	bool IsActive() const;
+
+	/**
+	 * Returns true if callout is still waiting to expire.
+	 */
+	bool IsPending() const;
+
+	/**
+	 * Disestablish a callout.
+	 */
+	bool Stop();
+
+	/**
+	 * \brief Establish or change a timeout.
+	 *
+	 * \param interval  Timeval indicating the time which must elapse
+	 *                  before this callout fires.
+	 * \param func	    Pointer to the callback funtion
+	 * \param arg       Argument pointer to pass to callback function
+	 *
+	 * \return  Cancelation status.
+	 *             true:  The previous callback was pending and therfore
+	 *                    was cancelled.
+	 *             false: The callout was not pending at the time of this
+	 *                    reset request.
+	 *          In all cases, a new callout is established.
+	 */
+	bool  Reset(const timeval &interval, CalloutFunc_t *func, void *arg);
+
+private:
+	/**
+	 * All active callouts sorted by expiration time.  The callout
+	 * with the nearest expiration time is at the head of the list.
+	 */
+	static std::list<Callout *> s_activeCallouts;
+
+	/**
+	 * The interval timer has expired.  This variable is set from
+	 * signal handler context and tested from Zfsd::EventLoop()
+	 * context via ExpireCallouts().
+	 */
+	static bool                 s_alarmFired;
+
+	/**
+	 * Time, realtive to others in the active list, until
+	 * this callout is fired.
+	 */
+	timeval                     m_interval;
+
+	/** Callback function argument. */
+	void                       *m_arg;
+
+	/**
+	 * The callback function associated with this timer
+	 * entry.
+	 */
+	CalloutFunc_t              *m_func;
+
+	/** State of this callout. */
+	bool                        m_pending;
+};
+
+//- Callout public const methods ----------------------------------------------
+inline bool
+Callout::IsPending() const
+{
+	return (m_pending);
+}
+
+//- Callout public methods ----------------------------------------------------
+inline
+Callout::Callout()
+ : m_arg(0),
+   m_func(NULL),
+   m_pending(false)
+{
+	timerclear(&m_interval);
+}
+
+#endif /* CALLOUT_H_ */

Added: projects/zfsd/head/cddl/sbin/zfsd/case_file.cc
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ projects/zfsd/head/cddl/sbin/zfsd/case_file.cc	Tue Aug 16 23:47:53 2011	(r224922)
@@ -0,0 +1,684 @@
+/*-
+ * Copyright (c) 2011 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
+ */
+
+/**
+ * \file case_file.cc
+ *
+ * We keep case files for any leaf vdev that is not in the optimal state.
+ * However, we only serialize to disk those events that need to be preserved
+ * across reboots.  For now, this is just a log of soft errors which we
+ * accumulate in order to mark a device as degraded.
+ */
+#include <dirent.h>
+#include <iomanip>
+#include <sstream>
+#include <syslog.h>
+#include <unistd.h>
+
+#include "case_file.h"
+#include "vdev.h"
+#include "zfsd.h"
+#include "zfsd_exception.h"
+#include "zpool_list.h"
+
+/*============================ Namespace Control =============================*/
+using std::auto_ptr;
+using std::hex;
+using std::stringstream;
+using std::setfill;
+using std::setw;
+
+/*--------------------------------- CaseFile ---------------------------------*/
+//- CaseFile Static Data -------------------------------------------------------
+CaseFileList  CaseFile::s_activeCases;
+const string  CaseFile::s_caseFilePath = "/etc/zfs/cases";
+const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
+
+//- CaseFile Static Public Methods ---------------------------------------------
+CaseFile *
+CaseFile::Find(uint64_t poolGUID, uint64_t vdevGUID)
+{
+	for (CaseFileList::iterator curCase = s_activeCases.begin();
+	     curCase != s_activeCases.end(); curCase++) {
+
+		if ((*curCase)->PoolGUID() != poolGUID
+		 || (*curCase)->VdevGUID() != vdevGUID)
+			continue;
+
+		/*
+		 * We only carry one active case per-vdev.
+		 */
+		return (*curCase);
+	}
+	return (NULL);
+}
+
+CaseFile *
+CaseFile::Find(const string &physPath)
+{
+	for (CaseFileList::iterator curCase = s_activeCases.begin();
+	     curCase != s_activeCases.end(); curCase++) {
+
+		if ((*curCase)->PhysicalPath() != physPath)
+			continue;
+
+		return (*curCase);
+	}
+	return (NULL);
+}
+
+CaseFile &
+CaseFile::Create(Vdev &vdev)
+{
+	CaseFile *activeCase;
+
+	activeCase = Find(vdev.PoolGUID(), vdev.GUID());
+	if (activeCase == NULL)
+		activeCase = new CaseFile(vdev);
+
+	return (*activeCase);
+}
+
+void
+CaseFile::DeSerialize()
+{
+	struct dirent **caseFiles;
+
+	int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
+			 DeSerializeSelector, /*compar*/NULL));
+
+	if (numCaseFiles == 0 || numCaseFiles == -1)
+		return;
+
+	for (int i = 0; i < numCaseFiles; i++) {
+
+		DeSerializeFile(caseFiles[i]->d_name);
+		free(caseFiles[i]);
+	}
+	free(caseFiles);
+}
+
+void
+CaseFile::LogAll()
+{
+	for (CaseFileList::iterator curCase = s_activeCases.begin();
+	     curCase != s_activeCases.end(); curCase++)
+		(*curCase)->Log();
+}
+
+void
+CaseFile::PurgeAll()
+{
+	/* CaseFiles remove themselves from this list on destruction. */
+	while (s_activeCases.size() != 0)
+		delete s_activeCases.front();
+}
+
+//- CaseFile Public Methods ----------------------------------------------------
+bool
+CaseFile::RefreshVdevState()
+{
+	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+	if (zpl.empty()) {
+		syslog(LOG_INFO,
+		       "CaseFile::RefreshVdevState: Unknown pool for "
+		       "Vdev(%ju,%ju).\n",
+		       m_poolGUID, m_vdevGUID);
+		return (false);
+	}
+
+	zpool_handle_t *casePool(zpl.front());
+	nvlist_t       *vdevConfig = VdevIterator(casePool).Find(VdevGUID());
+	if (vdevConfig == NULL) {
+		syslog(LOG_INFO,
+		       "CaseFile::RefreshVdevState: Unknown Vdev(%s,%s).\n",
+		       PoolGUIDString().c_str(), PoolGUIDString().c_str());
+		return (false);
+	}
+	Vdev caseVdev(casePool, vdevConfig);
+
+	m_vdevState    = caseVdev.State();
+	m_vdevPhysPath = caseVdev.PhysicalPath();
+	return (true);
+}
+
+bool
+CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
+{
+	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
+
+	if (zpl.empty() || !RefreshVdevState()) {
+		/*
+		 * The pool or vdev for this case file is no longer
+		 * part of the configuration.  This can happen
+		 * if we process a device arrival notification
+		 * before seeing the ZFS configuration change
+		 * event.
+		 */
+		syslog(LOG_INFO,
+		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
+		       "Closing\n", 
+		       PoolGUIDString().c_str(),
+		       VdevGUIDString().c_str());
+		Close();
+
+		/*
+		 * Since this event was not used to close this
+		 * case, do not report it as consumed.
+		 */
+		return (/*consumed*/false);
+	}
+	zpool_handle_t *pool(zpl.front());
+
+	if (VdevState() > VDEV_STATE_CANT_OPEN) {
+		/*
+		 * For now, newly discovered devices only help for
+		 * devices that are missing.  In the future, we might
+		 * use a newly inserted spare to replace a degraded
+		 * or faulted device.
+		 */
+		return (false);
+	}
+
+	if (vdev != NULL
+	 && vdev->PoolGUID() == m_poolGUID
+	 && vdev->GUID() == m_vdevGUID) {
+
+		zpool_vdev_online(pool, vdev->GUIDString().c_str(),
+				  ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
+				  &m_vdevState);
+		syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
+		       zpool_get_name(pool), vdev->GUIDString().c_str(),
+		       devPath.c_str(),
+		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+
+		/*
+		 * Check the vdev state post the online action to see
+		 * if we can retire this case.
+		 */
+		CloseIfSolved();
+
+		return (/*consumed*/true);
+	}
+
+	/*
+	 * If the auto-replace policy is enabled, and we have physical
+	 * path information, try a physical path replacement.
+	 */
+	if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
+		syslog(LOG_INFO,
+		       "CaseFile(%s:%s:%s): AutoReplace not set.  "
+		       "Ignoring device insertion.\n",
+		       PoolGUIDString().c_str(),
+		       VdevGUIDString().c_str(),
+		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+		return (false);
+	}
+
+	if (PhysicalPath().empty()) {
+		syslog(LOG_INFO,
+		       "CaseFile(%s:%s:%s): No vdev physical path information.  "
+		       "Ignoring device insertion.\n",
+		       PoolGUIDString().c_str(),
+		       VdevGUIDString().c_str(),
+		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+		return (false);
+	}
+
+	if (physPath != PhysicalPath()) {
+		syslog(LOG_INFO,
+		       "CaseFile(%s:%s:%s): Physical path mismatch.  "
+		       "Ignoring device insertion.\n",
+		       PoolGUIDString().c_str(),
+		       VdevGUIDString().c_str(),
+		       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+		return (false);
+	}
+
+	/* Write a label on the newly inserted disk. */
+	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
+		syslog(LOG_ERR,
+		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
+		       zpool_get_name(pool), VdevGUIDString().c_str(),
+		       libzfs_error_action(g_zfsHandle),
+		       libzfs_error_description(g_zfsHandle));
+		return (/*consumed*/false);
+	}
+
+	/*
+	 * Build a root vdev/leaf vdev configuration suitable for
+	 * zpool_vdev_attach. Only enough data for the kernel to find
+	 * the device (i.e. type and disk device node path) are needed.
+	 */
+	nvlist_t *nvroot(NULL);
+	nvlist_t *newvd(NULL);
+	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
+	 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
+		syslog(LOG_ERR, "Replace vdev(%s/%s) by physical path: "
+		       "Unable to allocate configuration data.\n",
+		       zpool_get_name(pool), VdevGUIDString().c_str());
+		if (nvroot != NULL)
+			nvlist_free(nvroot);
+		return (/*consumed*/false);
+	}
+
+	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0
+	 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, devPath.c_str()) != 0
+	 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
+	 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+				    &newvd, 1) != 0) {
+		syslog(LOG_ERR, "Replace vdev(%s/%s) by physical path: "
+		       "Unable to initialize configuration data.\n",
+		       zpool_get_name(pool), VdevGUIDString().c_str());
+		nvlist_free(newvd);
+		nvlist_free(nvroot);
+		return (1);
+	}
+
+	/* Data was copied when added to the root vdev. */
+	nvlist_free(newvd);
+
+	if (zpool_vdev_attach(pool, VdevGUIDString().c_str(),
+			      devPath.c_str(), nvroot,
+			      /*replace*/B_TRUE) != 0) {
+		syslog(LOG_ERR,
+		       "Replace vdev(%s/%s) by physical path(attach): %s: %s\n",
+		       zpool_get_name(pool), VdevGUIDString().c_str(),
+		       libzfs_error_action(g_zfsHandle),
+		       libzfs_error_description(g_zfsHandle));
+	} else {
+		syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
+		       zpool_get_name(pool), VdevGUIDString().c_str(),
+		       devPath.c_str());
+	}
+	nvlist_free(nvroot);
+
+	return (true);
+}
+
+bool
+CaseFile::ReEvaluate(const ZfsEvent &event)
+{
+	bool consumed(false);
+
+	if (!RefreshVdevState()) {
+		/*
+		 * The pool or vdev for this case file is no longer
+		 * part of the configuration.  This can happen
+		 * if we process a device arrival notification
+		 * before seeing the ZFS configuration change
+		 * event.
+		 */
+		syslog(LOG_INFO,
+		       "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
+		       "Closing\n", 
+		       PoolGUIDString().c_str(),
+		       VdevGUIDString().c_str());
+		Close();
+
+		/*
+		 * Since this event was not used to close this
+		 * case, do not report it as consumed.
+		 */
+		return (/*consumed*/false);
+	}
+
+	if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
+		/*
+		 * The Vdev we represent has been removed from the
+		 * configuration.  This case is no longer of value.
+		 */
+		Close();
+
+		return (/*consumed*/true);
+	}
+
+	if (event.Value("class") == "resource.fs.zfs.removed") {
+
+		/*
+		 * Discard any tentative I/O error events for
+		 * this case.  They were most likely caused by the
+		 * hot-unplug of this device.
+		 */
+		PurgeTentativeEvents();
+
+		/*
+		 * Rescan the drives in the system to see if a recent
+		 * drive arrival can be used to solve this case.
+		 */
+		ZfsDaemon::RequestSystemRescan();
+
+		consumed = true;
+	} else if (event.Value("class") == "ereport.fs.zfs.io"
+		|| event.Value("class") == "ereport.fs.zfs.checksum") {
+
+		m_tentativeEvents.push_front(event.DeepCopy());
+		if (!m_tentativeTimer.IsPending())
+			m_tentativeTimer.Reset(s_removeGracePeriod,
+					       OnGracePeriodEnded, this);
+		consumed = true;
+	}
+
+	bool closed(CloseIfSolved());
+
+	return (consumed || closed);
+}
+
+bool
+CaseFile::CloseIfSolved()
+{
+	if (m_events.empty()
+	 && m_tentativeEvents.empty()) {
+
+		/* 
+		 * We currently do not track or take actions on
+		 * devices in the degraded or faulted state.
+		 * Once we have support for spare pools, we'll
+		 * retain these cases so that any spares added in
+		 * the future can be applied to them.
+		 */
+		if (VdevState() > VDEV_STATE_CANT_OPEN
+		 && VdevState() <= VDEV_STATE_HEALTHY) {
+			Close();
+			return (true);
+		}
+
+		/*
+		 * Re-serialize the case in order to remove any
+		 * previous event data.
+		 */
+		Serialize();
+	}
+
+	return (false);
+}
+
+void
+CaseFile::Log()
+{
+	syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
+	       VdevGUIDString().c_str(), PhysicalPath().c_str());
+	syslog(LOG_INFO, "\tVdev State = %s\n",
+	       zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
+	if (m_tentativeEvents.size() != 0) {
+		syslog(LOG_INFO, "\t=== Tentative Events ===\n");
+		for (DevCtlEventList::iterator event(m_tentativeEvents.begin());
+		     event != m_tentativeEvents.end(); event++)
+			(*event)->Log(LOG_INFO);
+	}
+	if (m_events.size() != 0) {
+		syslog(LOG_INFO, "\t=== Events ===\n");
+		for (DevCtlEventList::iterator event(m_events.begin());
+		     event != m_events.end(); event++)
+			(*event)->Log(LOG_INFO);
+	}
+}
+
+//- CaseFile Static Protected Methods ------------------------------------------
+void
+CaseFile::OnGracePeriodEnded(void *arg)
+{
+	CaseFile &casefile(*static_cast<CaseFile *>(arg));
+
+	casefile.OnGracePeriodEnded();
+}
+
+int
+CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
+{
+	uintmax_t poolGUID;
+	uintmax_t vdevGUID;
+
+	if (dirEntry->d_type == DT_REG
+	 && sscanf(dirEntry->d_name, "pool_%ju_vdev_%ju.case",
+		   &poolGUID, &vdevGUID) == 2)
+		return (1);
+	return (0);
+}
+
+void
+CaseFile::DeSerializeFile(const char *fileName)
+{
+	string	  fullName(s_caseFilePath + '/' + fileName);
+	string	  evString;
+	CaseFile *existingCaseFile(NULL);
+	CaseFile *caseFile(NULL);
+	int	  fd(-1);
+
+	try {
+		uintmax_t poolGUID;
+		uintmax_t vdevGUID;
+		nvlist_t *vdevConf;
+
+		sscanf(fileName, "pool_%ju_vdev_%ju.case",
+		       &poolGUID, &vdevGUID);
+		existingCaseFile = Find(poolGUID, vdevGUID);
+		if (existingCaseFile != NULL) {
+			/*
+			 * If the vdev is already degraded or faulted,
+			 * there's no point in keeping the state around
+			 * that we use to put a drive into the degraded
+			 * state.  However, if the vdev is simply missing,
+			 * preseve the case data in the hopes that it will
+			 * return.
+			 */
+			caseFile = existingCaseFile;
+			vdev_state curState(caseFile->VdevState());
+			if (curState > VDEV_STATE_CANT_OPEN
+			 && curState < VDEV_STATE_HEALTHY) {
+				unlink(fileName);
+				return;
+			}
+		} else {
+			ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
+			if (zpl.empty()
+			 || (vdevConf = VdevIterator(zpl.front())
+						    .Find(vdevGUID)) == NULL) {
+				/*
+				 * Either the pool no longer exists
+				 * of this vdev is no longer a member of
+				 * the pool. 
+				 */
+				unlink(fullName.c_str());
+				return;
+			}
+
+			/*
+			 * Any vdev we find that does not have a case file
+			 * must be in the healthy state and thus worthy of
+			 * continued SERD data tracking.
+			 */
+			caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
+		}
+		
+		fd = open(fullName.c_str(), O_RDONLY);
+		if (fd == -1) {
+			throw ZfsdException("CaseFile::DeSerialize: Unable to "
+			       "read %s.\n", fileName);
+			return;
+		}
+
+		/* Re-load EventData */
+		EventBuffer eventBuffer(fd);
+		while (eventBuffer.ExtractEvent(evString)) {
+			DevCtlEvent *event(DevCtlEvent::CreateEvent(evString));
+			caseFile->m_events.push_back(event);
+		}
+		close(fd);
+	} catch (const ParseException &exp) {
+
+		exp.Log(evString);
+		if (caseFile != existingCaseFile)
+			delete caseFile;
+		close(fd);
+
+		/*
+		 * Since we can't parse the file, unlink it so we don't
+		 * trip over it again.
+		 */
+		unlink(fileName);
+	} catch (const ZfsdException &zfsException) {
+
+		zfsException.Log();
+		if (caseFile != existingCaseFile)
+			delete caseFile;
+	}
+}
+
+//- CaseFile Protected Methods -------------------------------------------------
+CaseFile::CaseFile(const Vdev &vdev)
+ : m_poolGUID(vdev.PoolGUID()),
+   m_vdevGUID(vdev.GUID()),
+   m_vdevState(vdev.State()),
+   m_vdevPhysPath(vdev.PhysicalPath())
+{
+	stringstream guidString;
+
+	guidString << m_vdevGUID;
+	m_vdevGUIDString = guidString.str();
+	guidString.str("");
+	guidString << m_poolGUID;
+	m_poolGUIDString = guidString.str();
+
+	s_activeCases.push_back(this);
+
+	syslog(LOG_INFO, "Creating new CaseFile:\n");
+	Log();
+}
+
+CaseFile::~CaseFile()
+{
+	PurgeEvents();
+	PurgeTentativeEvents();
+	m_tentativeTimer.Stop();
+	s_activeCases.remove(this);
+}
+
+void
+CaseFile::PurgeEvents()
+{
+	for (DevCtlEventList::iterator event(m_events.begin());

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-projects mailing list