svn commit: r252222 - in stable/9: etc/mtree include sbin sbin/nvmecontrol sys/amd64/conf sys/conf sys/dev/nvd sys/dev/nvme sys/i386/conf sys/modules sys/modules/nvme

Jim Harris jim.harris at gmail.com
Tue Jun 25 23:59:19 UTC 2013


On Tue, Jun 25, 2013 at 4:52 PM, Jim Harris <jimharris at freebsd.org> wrote:

> Author: jimharris
> Date: Tue Jun 25 23:52:39 2013
> New Revision: 252222
> URL: http://svnweb.freebsd.org/changeset/base/252222
>
> Log:
>   240618, 240621, 240633, 240671, 240672, 240697, 240700, 241433,
>   241434, 241657, 241658, 241659, 241660, 241661, 241662, 241663,
>   241664, 241665, 241689, 242420, 243951, 244410, 244411, 244413,
>   244549, 245136, 247963, 248729, 248730, 248731, 248732, 248733,
>   248734, 248735, 248736, 248737, 248738, 248739, 248740, 248741,
>   248746, 248747, 248748, 248749, 248754, 248755, 248756, 248757,
>   248758, 248759, 248760, 248761, 248762, 248763, 248764, 248765,
>   248766, 248767, 248768, 248769, 248770, 248771, 248772, 248773,
>   248780, 248834, 248835, 248913, 248977, 249067, 249416, 249417,
>   249418, 249419, 249420, 249421, 249422, 249432
>
>
Bad commit message obviously.  Following up with a forced commit for the
complete commit message.


> Deleted:
>   stable/9/sys/dev/nvme/nvme_uio.c
> Modified:
>   stable/9/etc/mtree/BSD.include.dist
>   stable/9/include/Makefile
>   stable/9/sbin/Makefile.amd64
>   stable/9/sbin/Makefile.i386
>   stable/9/sbin/nvmecontrol/nvmecontrol.8
>   stable/9/sbin/nvmecontrol/nvmecontrol.c
>   stable/9/sys/amd64/conf/NOTES
>   stable/9/sys/conf/files.amd64
>   stable/9/sys/conf/files.i386
>   stable/9/sys/dev/nvd/nvd.c
>   stable/9/sys/dev/nvme/nvme.c
>   stable/9/sys/dev/nvme/nvme.h
>   stable/9/sys/dev/nvme/nvme_ctrlr.c
>   stable/9/sys/dev/nvme/nvme_ctrlr_cmd.c
>   stable/9/sys/dev/nvme/nvme_ns.c
>   stable/9/sys/dev/nvme/nvme_ns_cmd.c
>   stable/9/sys/dev/nvme/nvme_private.h
>   stable/9/sys/dev/nvme/nvme_qpair.c
>   stable/9/sys/dev/nvme/nvme_sysctl.c
>   stable/9/sys/dev/nvme/nvme_test.c
>   stable/9/sys/i386/conf/NOTES
>   stable/9/sys/modules/Makefile
>   stable/9/sys/modules/nvme/Makefile
> Directory Properties:
>   stable/9/etc/   (props changed)
>   stable/9/etc/mtree/   (props changed)
>   stable/9/include/   (props changed)
>   stable/9/sbin/   (props changed)
>   stable/9/sbin/nvmecontrol/   (props changed)
>   stable/9/sys/   (props changed)
>   stable/9/sys/conf/   (props changed)
>   stable/9/sys/dev/   (props changed)
>   stable/9/sys/modules/   (props changed)
>
> Modified: stable/9/etc/mtree/BSD.include.dist
>
> ==============================================================================
> --- stable/9/etc/mtree/BSD.include.dist Tue Jun 25 23:30:48 2013
>  (r252221)
> +++ stable/9/etc/mtree/BSD.include.dist Tue Jun 25 23:52:39 2013
>  (r252222)
> @@ -126,6 +126,8 @@
>              mpilib
>              ..
>          ..
> +        nvme
> +        ..
>          ofw
>          ..
>          pbio
>
> Modified: stable/9/include/Makefile
>
> ==============================================================================
> --- stable/9/include/Makefile   Tue Jun 25 23:30:48 2013        (r252221)
> +++ stable/9/include/Makefile   Tue Jun 25 23:52:39 2013        (r252222)
> @@ -44,8 +44,8 @@ LDIRS=        bsm cam geom net net80211 netatal
>  LSUBDIRS=      cam/ata cam/scsi \
>         dev/acpica dev/agp dev/an dev/bktr dev/ciss dev/filemon
> dev/firewire \
>         dev/hwpmc \
> -       dev/ic dev/iicbus ${_dev_ieee488} dev/io dev/lmc dev/mfi dev/ofw \
> -       dev/pbio dev/pci ${_dev_powermac_nvram} dev/ppbus dev/smbus \
> +       dev/ic dev/iicbus ${_dev_ieee488} dev/io dev/lmc dev/mfi dev/nvme \
> +       dev/ofw dev/pbio dev/pci ${_dev_powermac_nvram} dev/ppbus
> dev/smbus \
>         dev/speaker dev/usb dev/utopia dev/vkbd dev/wi \
>         fs/devfs fs/fdescfs fs/fifofs fs/msdosfs fs/nfs fs/ntfs fs/nullfs \
>         ${_fs_nwfs} fs/portalfs fs/procfs fs/smbfs fs/udf fs/unionfs \
>
> Modified: stable/9/sbin/Makefile.amd64
>
> ==============================================================================
> --- stable/9/sbin/Makefile.amd64        Tue Jun 25 23:30:48 2013
>  (r252221)
> +++ stable/9/sbin/Makefile.amd64        Tue Jun 25 23:52:39 2013
>  (r252222)
> @@ -2,3 +2,4 @@
>
>  SUBDIR += bsdlabel
>  SUBDIR += fdisk
> +SUBDIR += nvmecontrol
>
> Modified: stable/9/sbin/Makefile.i386
>
> ==============================================================================
> --- stable/9/sbin/Makefile.i386 Tue Jun 25 23:30:48 2013        (r252221)
> +++ stable/9/sbin/Makefile.i386 Tue Jun 25 23:52:39 2013        (r252222)
> @@ -2,4 +2,5 @@
>
>  SUBDIR += bsdlabel
>  SUBDIR += fdisk
> +SUBDIR += nvmecontrol
>  SUBDIR += sconfig
>
> Modified: stable/9/sbin/nvmecontrol/nvmecontrol.8
>
> ==============================================================================
> --- stable/9/sbin/nvmecontrol/nvmecontrol.8     Tue Jun 25 23:30:48 2013
>      (r252221)
> +++ stable/9/sbin/nvmecontrol/nvmecontrol.8     Tue Jun 25 23:52:39 2013
>      (r252222)
> @@ -33,7 +33,7 @@
>  .\"
>  .\" $FreeBSD$
>  .\"
> -.Dd September 17, 2012
> +.Dd March 26, 2013
>  .Dt NVMECONTROL 8
>  .Os
>  .Sh NAME
> @@ -54,7 +54,10 @@
>  .Op Fl p
>  .Aq Fl s Ar size_in_bytes
>  .Aq Fl t Ar time_in_sec
> -.Aq device id
> +.Aq namespace id
> +.Nm
> +.Ic reset
> +.Aq controller id
>  .Sh DESCRIPTION
>  NVM Express (NVMe) is a storage protocol standard, for SSDs and other
>  high-speed storage devices over PCI Express.
> @@ -62,6 +65,7 @@ high-speed storage devices over PCI Expr
>  .Dl nvmecontrol devlist
>  .Pp
>  Display a list of NVMe controllers and namespaces along with their device
> nodes.
> +.Pp
>  .Dl nvmecontrol identify nvme0
>  .Pp
>  Display a human-readable summary of the nvme0 IDENTIFY_CONTROLLER data.
> @@ -77,6 +81,9 @@ Run a performance test on nvme0ns1 using
>  thread will issue a single 512 byte read command.  Results are printed to
>  stdout when 30 seconds expires.
>  .Pp
> +.Dl nvmecontrol reset nvme0
> +.Pp
> +Perform a controller-level reset of the nvme0 controller.
>  .Sh AUTHORS
>  .An -nosplit
>  .Nm
>
> Modified: stable/9/sbin/nvmecontrol/nvmecontrol.c
>
> ==============================================================================
> --- stable/9/sbin/nvmecontrol/nvmecontrol.c     Tue Jun 25 23:30:48 2013
>      (r252221)
> +++ stable/9/sbin/nvmecontrol/nvmecontrol.c     Tue Jun 25 23:52:39 2013
>      (r252222)
> @@ -56,6 +56,9 @@ __FBSDID("$FreeBSD$");
>  "                            <-i intr|wait> [-f refthread] [-p]\n"
>      \
>  "                            <namespace id>\n"
>
> +#define RESET_USAGE
>     \
> +"       nvmecontrol reset <controller id>\n"
> +
>  static void perftest_usage(void);
>
>  static void
> @@ -64,6 +67,7 @@ usage(void)
>         fprintf(stderr, "usage:\n");
>         fprintf(stderr, DEVLIST_USAGE);
>         fprintf(stderr, IDENTIFY_USAGE);
> +       fprintf(stderr, RESET_USAGE);
>         fprintf(stderr, PERFTEST_USAGE);
>         exit(EX_USAGE);
>  }
> @@ -206,6 +210,53 @@ ns_get_sector_size(struct nvme_namespace
>         return (1 << nsdata->lbaf[0].lbads);
>  }
>
> +static void
> +read_controller_data(int fd, struct nvme_controller_data *cdata)
> +{
> +       struct nvme_pt_command  pt;
> +
> +       memset(&pt, 0, sizeof(pt));
> +       pt.cmd.opc = NVME_OPC_IDENTIFY;
> +       pt.cmd.cdw10 = 1;
> +       pt.buf = cdata;
> +       pt.len = sizeof(*cdata);
> +       pt.is_read = 1;
> +
> +       if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) {
> +               printf("Identify request failed. errno=%d (%s)\n",
> +                   errno, strerror(errno));
> +               exit(EX_IOERR);
> +       }
> +
> +       if (nvme_completion_is_error(&pt.cpl)) {
> +               printf("Passthrough command returned error.\n");
> +               exit(EX_IOERR);
> +       }
> +}
> +
> +static void
> +read_namespace_data(int fd, int nsid, struct nvme_namespace_data *nsdata)
> +{
> +       struct nvme_pt_command  pt;
> +
> +       memset(&pt, 0, sizeof(pt));
> +       pt.cmd.opc = NVME_OPC_IDENTIFY;
> +       pt.cmd.nsid = nsid;
> +       pt.buf = nsdata;
> +       pt.len = sizeof(*nsdata);
> +       pt.is_read = 1;
> +
> +       if (ioctl(fd, NVME_PASSTHROUGH_CMD, &pt) < 0) {
> +               printf("Identify request failed. errno=%d (%s)\n",
> +                   errno, strerror(errno));
> +               exit(EX_IOERR);
> +       }
> +
> +       if (nvme_completion_is_error(&pt.cpl)) {
> +               printf("Passthrough command returned error.\n");
> +               exit(EX_IOERR);
> +       }
> +}
>
>  static void
>  devlist(int argc, char *argv[])
> @@ -241,34 +292,18 @@ devlist(int argc, char *argv[])
>
>                 fd = open(path, O_RDWR);
>                 if (fd < 0) {
> -                       printf("Could not open %s.\n", path);
> +                       printf("Could not open %s. errno=%d (%s)\n", path,
> +                           errno, strerror(errno));
>                         exit_code = EX_NOPERM;
>                         continue;
>                 }
>
> -               if (ioctl(fd, NVME_IDENTIFY_CONTROLLER, &cdata) == -1) {
> -                       printf("ioctl to %s failed.\n", path);
> -                       exit_code = EX_IOERR;
> -                       continue;
> -               }
> -
> +               read_controller_data(fd, &cdata);
>                 printf("%6s: %s\n", name, cdata.mn);
>
>                 for (i = 0; i < cdata.nn; i++) {
>                         sprintf(name, "nvme%dns%d", ctrlr, i+1);
> -                       sprintf(path, "/dev/%s", name);
> -
> -                       fd = open(path, O_RDWR);
> -                       if (fd < 0) {
> -                               printf("Could not open %s.\n", path);
> -                               exit_code = EX_NOPERM;
> -                               continue;
> -                       }
> -                       if (ioctl(fd, NVME_IDENTIFY_NAMESPACE, &nsdata) ==
> -1) {
> -                               printf("ioctl to %s failed.\n", path);
> -                               exit_code = EX_IOERR;
> -                               continue;
> -                       }
> +                       read_namespace_data(fd, i+1, &nsdata);
>                         printf("  %10s (%lldGB)\n",
>                                 name,
>                                 nsdata.nsze *
> @@ -307,21 +342,20 @@ identify_ctrlr(int argc, char *argv[])
>
>         sprintf(path, "/dev/%s", argv[optind]);
>
> -       if (stat(path, &devstat) != 0) {
> -               printf("Invalid device node '%s'.\n", path);
> +       if (stat(path, &devstat) < 0) {
> +               printf("Invalid device node %s. errno=%d (%s)\n", path,
> errno,
> +                   strerror(errno));
>                 exit(EX_IOERR);
>         }
>
>         fd = open(path, O_RDWR);
>         if (fd < 0) {
> -               printf("Could not open %s.\n", path);
> +               printf("Could not open %s. errno=%d (%s)\n", path, errno,
> +                   strerror(errno));
>                 exit(EX_NOPERM);
>         }
>
> -       if (ioctl(fd, NVME_IDENTIFY_CONTROLLER, &cdata) == -1) {
> -               printf("ioctl to %s failed.\n", path);
> -               exit(EX_IOERR);
> -       }
> +       read_controller_data(fd, &cdata);
>
>         if (hexflag == 1) {
>                 if (verboseflag == 1)
> @@ -348,7 +382,8 @@ identify_ns(int argc, char *argv[])
>         struct nvme_namespace_data      nsdata;
>         struct stat                     devstat;
>         char                            path[64];
> -       int                             ch, fd, hexflag = 0, hexlength;
> +       char                            *nsloc;
> +       int                             ch, fd, hexflag = 0, hexlength,
> nsid;
>         int                             verboseflag = 0;
>
>         while ((ch = getopt(argc, argv, "vx")) != -1) {
> @@ -364,23 +399,55 @@ identify_ns(int argc, char *argv[])
>                 }
>         }
>
> +       /*
> +        * Check if the specified device node exists before continuing.
> +        *  This is a cleaner check for cases where the correct controller
> +        *  is specified, but an invalid namespace on that controller.
> +        */
>         sprintf(path, "/dev/%s", argv[optind]);
> +       if (stat(path, &devstat) < 0) {
> +               printf("Invalid device node %s. errno=%d (%s)\n", path,
> errno,
> +                   strerror(errno));
> +               exit(EX_IOERR);
> +       }
> +
> +       nsloc = strstr(argv[optind], "ns");
> +       if (nsloc == NULL) {
> +               printf("Invalid namepsace %s.\n", argv[optind]);
> +               exit(EX_IOERR);
> +       }
> +
> +       /*
> +        * Pull the namespace id from the string. +2 skips past the "ns"
> part
> +        *  of the string.
> +        */
> +       nsid = strtol(nsloc + 2, NULL, 10);
> +       if (nsid == 0 && errno != 0) {
> +               printf("Invalid namespace ID %s.\n", argv[optind]);
> +               exit(EX_IOERR);
> +       }
>
> -       if (stat(path, &devstat) != 0) {
> -               printf("Invalid device node '%s'.\n", path);
> +       /*
> +        * We send IDENTIFY commands to the controller, not the namespace,
> +        *  since it is an admin cmd.  So the path should only include the
> +        *  nvmeX part of the nvmeXnsY string.
> +        */
> +       sprintf(path, "/dev/");
> +       strncat(path, argv[optind], nsloc - argv[optind]);
> +       if (stat(path, &devstat) < 0) {
> +               printf("Invalid device node %s. errno=%d (%s)\n", path,
> errno,
> +                   strerror(errno));
>                 exit(EX_IOERR);
>         }
>
>         fd = open(path, O_RDWR);
>         if (fd < 0) {
> -               printf("Could not open %s.\n", path);
> +               printf("Could not open %s. errno=%d (%s)\n", path, errno,
> +                   strerror(errno));
>                 exit(EX_NOPERM);
>         }
>
> -       if (ioctl(fd, NVME_IDENTIFY_NAMESPACE, &nsdata) == -1) {
> -               printf("ioctl to %s failed.\n", path);
> -               exit(EX_IOERR);
> -       }
> +       read_namespace_data(fd, nsid, &nsdata);
>
>         if (hexflag == 1) {
>                 if (verboseflag == 1)
> @@ -423,7 +490,7 @@ identify(int argc, char *argv[])
>         optind = 1;
>
>         /*
> -        * If devicde node contains "ns", we consider it a namespace,
> +        * If device node contains "ns", we consider it a namespace,
>          *  otherwise, consider it a controller.
>          */
>         if (strstr(target, "ns") == NULL)
> @@ -475,7 +542,7 @@ perftest(int argc, char *argv[])
>         char                            path[64];
>         u_long                          ioctl_cmd = NVME_IO_TEST;
>         bool                            nflag, oflag, sflag, tflag;
> -       int                             err, perthread = 0;
> +       int                             perthread = 0;
>
>         nflag = oflag = sflag = tflag = false;
>         name = NULL;
> @@ -565,14 +632,14 @@ perftest(int argc, char *argv[])
>
>         fd = open(path, O_RDWR);
>         if (fd < 0) {
> -               fprintf(stderr, "%s not valid device.\n", path);
> +               fprintf(stderr, "%s not valid device. errno=%d (%s)\n",
> path,
> +                   errno, strerror(errno));
>                 perftest_usage();
>         }
>
> -       err = ioctl(fd, ioctl_cmd, &io_test);
> -
> -       if (err) {
> -               fprintf(stderr, "NVME_IO_TEST returned %d\n", errno);
> +       if (ioctl(fd, ioctl_cmd, &io_test) < 0) {
> +               fprintf(stderr, "NVME_IO_TEST failed. errno=%d (%s)\n",
> errno,
> +                   strerror(errno));
>                 exit(EX_IOERR);
>         }
>
> @@ -580,6 +647,44 @@ perftest(int argc, char *argv[])
>         exit(EX_OK);
>  }
>
> +static void
> +reset_ctrlr(int argc, char *argv[])
> +{
> +       struct stat                     devstat;
> +       char                            path[64];
> +       int                             ch, fd;
> +
> +       while ((ch = getopt(argc, argv, "")) != -1) {
> +               switch ((char)ch) {
> +               default:
> +                       usage();
> +               }
> +       }
> +
> +       sprintf(path, "/dev/%s", argv[optind]);
> +
> +       if (stat(path, &devstat) < 0) {
> +               printf("Invalid device node %s. errno=%d (%s)\n", path,
> errno,
> +                   strerror(errno));
> +               exit(EX_IOERR);
> +       }
> +
> +       fd = open(path, O_RDWR);
> +       if (fd < 0) {
> +               printf("Could not open %s. errno=%d (%s)\n", path, errno,
> +                   strerror(errno));
> +               exit(EX_NOPERM);
> +       }
> +
> +       if (ioctl(fd, NVME_RESET_CONTROLLER) < 0) {
> +               printf("Reset request to %s failed. errno=%d (%s)\n", path,
> +                   errno, strerror(errno));
> +               exit(EX_IOERR);
> +       }
> +
> +       exit(EX_OK);
> +}
> +
>  int
>  main(int argc, char *argv[])
>  {
> @@ -593,6 +698,8 @@ main(int argc, char *argv[])
>                 identify(argc-1, &argv[1]);
>         else if (strcmp(argv[1], "perftest") == 0)
>                 perftest(argc-1, &argv[1]);
> +       else if (strcmp(argv[1], "reset") == 0)
> +               reset_ctrlr(argc-1, &argv[1]);
>
>         usage();
>
>
> Modified: stable/9/sys/amd64/conf/NOTES
>
> ==============================================================================
> --- stable/9/sys/amd64/conf/NOTES       Tue Jun 25 23:30:48 2013
>  (r252221)
> +++ stable/9/sys/amd64/conf/NOTES       Tue Jun 25 23:52:39 2013
>  (r252222)
> @@ -433,6 +433,11 @@ device             isci
>  options                ISCI_LOGGING    # enable debugging in isci HAL
>
>  #
> +# NVM Express (NVMe) support
> +device         nvme    # base NVMe driver
> +device         nvd     # expose NVMe namespaces as disks, depends on nvme
> +
> +#
>  # SafeNet crypto driver: can be moved to the MI NOTES as soon as
>  # it's tested on a big-endian machine
>  #
>
> Modified: stable/9/sys/conf/files.amd64
>
> ==============================================================================
> --- stable/9/sys/conf/files.amd64       Tue Jun 25 23:30:48 2013
>  (r252221)
> +++ stable/9/sys/conf/files.amd64       Tue Jun 25 23:52:39 2013
>  (r252222)
> @@ -213,7 +213,16 @@ dev/kbd/kbd.c                      optional
>  atkbd | sc | uk
>  dev/lindev/full.c              optional        lindev
>  dev/lindev/lindev.c            optional        lindev
>  dev/nfe/if_nfe.c               optional        nfe pci
> +dev/nvd/nvd.c                  optional        nvd nvme
>  dev/nve/if_nve.c               optional        nve pci
> +dev/nvme/nvme.c                        optional        nvme
> +dev/nvme/nvme_ctrlr.c          optional        nvme
> +dev/nvme/nvme_ctrlr_cmd.c      optional        nvme
> +dev/nvme/nvme_ns.c             optional        nvme
> +dev/nvme/nvme_ns_cmd.c         optional        nvme
> +dev/nvme/nvme_qpair.c          optional        nvme
> +dev/nvme/nvme_sysctl.c         optional        nvme
> +dev/nvme/nvme_test.c           optional        nvme
>  dev/nvram/nvram.c              optional        nvram isa
>  dev/random/ivy.c               optional        random rdrand_rng
>  dev/random/nehemiah.c          optional        random padlock_rng
>
> Modified: stable/9/sys/conf/files.i386
>
> ==============================================================================
> --- stable/9/sys/conf/files.i386        Tue Jun 25 23:30:48 2013
>  (r252221)
> +++ stable/9/sys/conf/files.i386        Tue Jun 25 23:52:39 2013
>  (r252222)
> @@ -222,7 +222,16 @@ dev/lindev/lindev.c                optional lindev
>  dev/mse/mse.c                  optional mse
>  dev/mse/mse_isa.c              optional mse isa
>  dev/nfe/if_nfe.c               optional nfe pci
> +dev/nvd/nvd.c                  optional nvd nvme
>  dev/nve/if_nve.c               optional nve pci
> +dev/nvme/nvme.c                        optional nvme
> +dev/nvme/nvme_ctrlr.c          optional nvme
> +dev/nvme/nvme_ctrlr_cmd.c      optional nvme
> +dev/nvme/nvme_ns.c             optional nvme
> +dev/nvme/nvme_ns_cmd.c         optional nvme
> +dev/nvme/nvme_qpair.c          optional nvme
> +dev/nvme/nvme_sysctl.c         optional nvme
> +dev/nvme/nvme_test.c           optional nvme
>  dev/nvram/nvram.c              optional nvram isa
>  dev/pcf/pcf_isa.c              optional pcf
>  dev/random/ivy.c               optional random rdrand_rng
>
> Modified: stable/9/sys/dev/nvd/nvd.c
>
> ==============================================================================
> --- stable/9/sys/dev/nvd/nvd.c  Tue Jun 25 23:30:48 2013        (r252221)
> +++ stable/9/sys/dev/nvd/nvd.c  Tue Jun 25 23:52:39 2013        (r252222)
> @@ -45,9 +45,12 @@ struct nvd_disk;
>  static disk_ioctl_t nvd_ioctl;
>  static disk_strategy_t nvd_strategy;
>
> -static void create_geom_disk(void *, struct nvme_namespace *ns);
> +static void *nvd_new_disk(struct nvme_namespace *ns, void *ctrlr);
>  static void destroy_geom_disk(struct nvd_disk *ndisk);
>
> +static void *nvd_new_controller(struct nvme_controller *ctrlr);
> +static void nvd_controller_fail(void *ctrlr);
> +
>  static int nvd_load(void);
>  static void nvd_unload(void);
>
> @@ -67,10 +70,18 @@ struct nvd_disk {
>
>         uint32_t                cur_depth;
>
> -       TAILQ_ENTRY(nvd_disk)   tailq;
> +       TAILQ_ENTRY(nvd_disk)   global_tailq;
> +       TAILQ_ENTRY(nvd_disk)   ctrlr_tailq;
> +};
> +
> +struct nvd_controller {
> +
> +       TAILQ_ENTRY(nvd_controller)     tailq;
> +       TAILQ_HEAD(, nvd_disk)          disk_head;
>  };
>
> -TAILQ_HEAD(, nvd_disk) nvd_head;
> +static TAILQ_HEAD(, nvd_controller)    ctrlr_head;
> +static TAILQ_HEAD(disk_list, nvd_disk) disk_head;
>
>  static int nvd_modevent(module_t mod, int type, void *arg)
>  {
> @@ -104,8 +115,11 @@ static int
>  nvd_load()
>  {
>
> -       TAILQ_INIT(&nvd_head);
> -       consumer_handle = nvme_register_consumer(create_geom_disk, NULL);
> +       TAILQ_INIT(&ctrlr_head);
> +       TAILQ_INIT(&disk_head);
> +
> +       consumer_handle = nvme_register_consumer(nvd_new_disk,
> +           nvd_new_controller, NULL, nvd_controller_fail);
>
>         return (consumer_handle != NULL ? 0 : -1);
>  }
> @@ -113,13 +127,20 @@ nvd_load()
>  static void
>  nvd_unload()
>  {
> -       struct nvd_disk *nvd;
> +       struct nvd_controller   *ctrlr;
> +       struct nvd_disk         *disk;
> +
> +       while (!TAILQ_EMPTY(&ctrlr_head)) {
> +               ctrlr = TAILQ_FIRST(&ctrlr_head);
> +               TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
> +               free(ctrlr, M_NVD);
> +       }
>
> -       while (!TAILQ_EMPTY(&nvd_head)) {
> -               nvd = TAILQ_FIRST(&nvd_head);
> -               TAILQ_REMOVE(&nvd_head, nvd, tailq);
> -               destroy_geom_disk(nvd);
> -               free(nvd, M_NVD);
> +       while (!TAILQ_EMPTY(&disk_head)) {
> +               disk = TAILQ_FIRST(&disk_head);
> +               TAILQ_REMOVE(&disk_head, disk, global_tailq);
> +               destroy_geom_disk(disk);
> +               free(disk, M_NVD);
>         }
>
>         nvme_unregister_consumer(consumer_handle);
> @@ -153,7 +174,7 @@ nvd_ioctl(struct disk *ndisk, u_long cmd
>  }
>
>  static void
> -nvd_done(void *arg, const struct nvme_completion *status)
> +nvd_done(void *arg, const struct nvme_completion *cpl)
>  {
>         struct bio *bp;
>         struct nvd_disk *ndisk;
> @@ -162,14 +183,13 @@ nvd_done(void *arg, const struct nvme_co
>
>         ndisk = bp->bio_disk->d_drv1;
>
> -       if (atomic_fetchadd_int(&ndisk->cur_depth, -1) == NVME_QD)
> -               taskqueue_enqueue(ndisk->tq, &ndisk->bioqtask);
> +       atomic_add_int(&ndisk->cur_depth, -1);
>
>         /*
>          * TODO: add more extensive translation of NVMe status codes
>          *  to different bio error codes (i.e. EIO, EINVAL, etc.)
>          */
> -       if (status->sf_sc || status->sf_sct) {
> +       if (nvme_completion_is_error(cpl)) {
>                 bp->bio_error = EIO;
>                 bp->bio_flags |= BIO_ERROR;
>                 bp->bio_resid = bp->bio_bcount;
> @@ -187,9 +207,6 @@ nvd_bioq_process(void *arg, int pending)
>         int err;
>
>         for (;;) {
> -               if (atomic_load_acq_int(&ndisk->cur_depth) >= NVME_QD)
> -                       break;
> -
>                 mtx_lock(&ndisk->bioqlock);
>                 bp = bioq_takefirst(&ndisk->bioq);
>                 mtx_unlock(&ndisk->bioqlock);
> @@ -210,13 +227,13 @@ nvd_bioq_process(void *arg, int pending)
>  #endif
>
>                 bp->bio_driver1 = NULL;
> -               atomic_add_acq_int(&ndisk->cur_depth, 1);
> +               atomic_add_int(&ndisk->cur_depth, 1);
>
>                 err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
>
>                 if (err) {
> -                       atomic_add_acq_int(&ndisk->cur_depth, -1);
> -                       bp->bio_error = EIO;
> +                       atomic_add_int(&ndisk->cur_depth, -1);
> +                       bp->bio_error = err;
>                         bp->bio_flags |= BIO_ERROR;
>                         bp->bio_resid = bp->bio_bcount;
>                         biodone(bp);
> @@ -237,13 +254,28 @@ nvd_bioq_process(void *arg, int pending)
>         }
>  }
>
> -static void
> -create_geom_disk(void *arg, struct nvme_namespace *ns)
> +static void *
> +nvd_new_controller(struct nvme_controller *ctrlr)
>  {
> -       struct nvd_disk *ndisk;
> -       struct disk *disk;
> +       struct nvd_controller   *nvd_ctrlr;
> +
> +       nvd_ctrlr = malloc(sizeof(struct nvd_controller), M_NVD,
> +           M_ZERO | M_WAITOK);
>
> -       ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_NOWAIT);
> +       TAILQ_INIT(&nvd_ctrlr->disk_head);
> +       TAILQ_INSERT_TAIL(&ctrlr_head, nvd_ctrlr, tailq);
> +
> +       return (nvd_ctrlr);
> +}
> +
> +static void *
> +nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg)
> +{
> +       struct nvd_disk         *ndisk;
> +       struct disk             *disk;
> +       struct nvd_controller   *ctrlr = ctrlr_arg;
> +
> +       ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_WAITOK);
>
>         disk = disk_alloc();
>         disk->d_strategy = nvd_strategy;
> @@ -255,10 +287,11 @@ create_geom_disk(void *arg, struct nvme_
>         disk->d_sectorsize = nvme_ns_get_sector_size(ns);
>         disk->d_mediasize = (off_t)nvme_ns_get_size(ns);
>
> -       if (TAILQ_EMPTY(&nvd_head))
> +       if (TAILQ_EMPTY(&disk_head))
>                 disk->d_unit = 0;
>         else
> -               disk->d_unit = TAILQ_FIRST(&nvd_head)->disk->d_unit + 1;
> +               disk->d_unit =
> +                   TAILQ_LAST(&disk_head, disk_list)->disk->d_unit + 1;
>
>         disk->d_flags = 0;
>
> @@ -268,6 +301,11 @@ create_geom_disk(void *arg, struct nvme_
>         if (nvme_ns_get_flags(ns) & NVME_NS_FLUSH_SUPPORTED)
>                 disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
>
> +/* ifdef used here to ease porting to stable branches at a later point. */
> +#ifdef DISKFLAG_UNMAPPED_BIO
> +       disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
> +#endif
> +
>         strlcpy(disk->d_ident, nvme_ns_get_serial_number(ns),
>             sizeof(disk->d_ident));
>
> @@ -290,7 +328,10 @@ create_geom_disk(void *arg, struct nvme_
>             taskqueue_thread_enqueue, &ndisk->tq);
>         taskqueue_start_threads(&ndisk->tq, 1, PI_DISK, "nvd taskq");
>
> -       TAILQ_INSERT_HEAD(&nvd_head, ndisk, tailq);
> +       TAILQ_INSERT_TAIL(&disk_head, ndisk, global_tailq);
> +       TAILQ_INSERT_TAIL(&ctrlr->disk_head, ndisk, ctrlr_tailq);
> +
> +       return (NULL);
>  }
>
>  static void
> @@ -316,3 +357,22 @@ destroy_geom_disk(struct nvd_disk *ndisk
>
>         mtx_destroy(&ndisk->bioqlock);
>  }
> +
> +static void
> +nvd_controller_fail(void *ctrlr_arg)
> +{
> +       struct nvd_controller   *ctrlr = ctrlr_arg;
> +       struct nvd_disk         *disk;
> +
> +       while (!TAILQ_EMPTY(&ctrlr->disk_head)) {
> +               disk = TAILQ_FIRST(&ctrlr->disk_head);
> +               TAILQ_REMOVE(&disk_head, disk, global_tailq);
> +               TAILQ_REMOVE(&ctrlr->disk_head, disk, ctrlr_tailq);
> +               destroy_geom_disk(disk);
> +               free(disk, M_NVD);
> +       }
> +
> +       TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
> +       free(ctrlr, M_NVD);
> +}
> +
>
> Modified: stable/9/sys/dev/nvme/nvme.c
>
> ==============================================================================
> --- stable/9/sys/dev/nvme/nvme.c        Tue Jun 25 23:30:48 2013
>  (r252221)
> +++ stable/9/sys/dev/nvme/nvme.c        Tue Jun 25 23:52:39 2013
>  (r252222)
> @@ -32,22 +32,33 @@ __FBSDID("$FreeBSD$");
>  #include <sys/conf.h>
>  #include <sys/module.h>
>
> +#include <vm/uma.h>
> +
> +#include <dev/pci/pcireg.h>
>  #include <dev/pci/pcivar.h>
>
>  #include "nvme_private.h"
>
>  struct nvme_consumer {
> -       nvme_consumer_cb_fn_t           cb_fn;
> -       void                            *cb_arg;
> +       uint32_t                id;
> +       nvme_cons_ns_fn_t       ns_fn;
> +       nvme_cons_ctrlr_fn_t    ctrlr_fn;
> +       nvme_cons_async_fn_t    async_fn;
> +       nvme_cons_fail_fn_t     fail_fn;
>  };
>
>  struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
> +#define        INVALID_CONSUMER_ID     0xFFFF
> +
> +uma_zone_t     nvme_request_zone;
> +int32_t                nvme_retry_count;
>
>  MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
>
>  static int    nvme_probe(device_t);
>  static int    nvme_attach(device_t);
>  static int    nvme_detach(device_t);
> +static int    nvme_modevent(module_t mod, int type, void *arg);
>
>  static devclass_t nvme_devclass;
>
> @@ -65,7 +76,7 @@ static driver_t nvme_pci_driver = {
>         sizeof(struct nvme_controller),
>  };
>
> -DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, 0, 0);
> +DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, nvme_modevent,
> 0);
>  MODULE_VERSION(nvme, 1);
>
>  static struct _pcsid
> @@ -75,15 +86,19 @@ static struct _pcsid
>  } pci_ids[] = {
>         { 0x01118086,           "NVMe Controller"  },
>         { CHATHAM_PCI_ID,       "Chatham Prototype NVMe Controller"  },
> -       { IDT_PCI_ID,           "IDT NVMe Controller"  },
> +       { IDT32_PCI_ID,         "IDT NVMe Controller (32 channel)"  },
> +       { IDT8_PCI_ID,          "IDT NVMe Controller (8 channel)" },
>         { 0x00000000,           NULL  }
>  };
>
>  static int
>  nvme_probe (device_t device)
>  {
> -       u_int32_t type = pci_get_devid(device);
> -       struct _pcsid *ep = pci_ids;
> +       struct _pcsid   *ep;
> +       u_int32_t       type;
> +
> +       type = pci_get_devid(device);
> +       ep = pci_ids;
>
>         while (ep->type && ep->type != type)
>                 ++ep;
> @@ -91,11 +106,43 @@ nvme_probe (device_t device)
>         if (ep->desc) {
>                 device_set_desc(device, ep->desc);
>                 return (BUS_PROBE_DEFAULT);
> -       } else
> -               return (ENXIO);
> +       }
> +
> +#if defined(PCIS_STORAGE_NVM)
> +       if (pci_get_class(device)    == PCIC_STORAGE &&
> +           pci_get_subclass(device) == PCIS_STORAGE_NVM &&
> +           pci_get_progif(device)   ==
> PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
> +               device_set_desc(device, "Generic NVMe Device");
> +               return (BUS_PROBE_GENERIC);
> +       }
> +#endif
> +
> +       return (ENXIO);
>  }
>
>  static void
> +nvme_init(void)
> +{
> +       uint32_t        i;
> +
> +       nvme_request_zone = uma_zcreate("nvme_request",
> +           sizeof(struct nvme_request), NULL, NULL, NULL, NULL, 0, 0);
> +
> +       for (i = 0; i < NVME_MAX_CONSUMERS; i++)
> +               nvme_consumer[i].id = INVALID_CONSUMER_ID;
> +}
> +
> +SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
> +
> +static void
> +nvme_uninit(void)
> +{
> +       uma_zdestroy(nvme_request_zone);
> +}
> +
> +SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit,
> NULL);
> +
> +static void
>  nvme_load(void)
>  {
>  }
> @@ -160,24 +207,14 @@ nvme_modevent(module_t mod, int type, vo
>         return (0);
>  }
>
> -moduledata_t nvme_mod = {
> -       "nvme",
> -       (modeventhand_t)nvme_modevent,
> -       0
> -};
> -
> -DECLARE_MODULE(nvme, nvme_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
> -
>  void
>  nvme_dump_command(struct nvme_command *cmd)
>  {
> -       printf("opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x "
> -           "mptr:%qx prp1:%qx prp2:%qx cdw:%x %x %x %x %x %x\n",
> +       printf(
> +"opc:%x f:%x r1:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx
> cdw:%x %x %x %x %x %x\n",
>             cmd->opc, cmd->fuse, cmd->rsvd1, cmd->cid, cmd->nsid,
>             cmd->rsvd2, cmd->rsvd3,
> -           (long long unsigned int)cmd->mptr,
> -           (long long unsigned int)cmd->prp1,
> -           (long long unsigned int)cmd->prp2,
> +           (uintmax_t)cmd->mptr, (uintmax_t)cmd->prp1,
> (uintmax_t)cmd->prp2,
>             cmd->cdw10, cmd->cdw11, cmd->cdw12, cmd->cdw13, cmd->cdw14,
>             cmd->cdw15);
>  }
> @@ -188,87 +225,8 @@ nvme_dump_completion(struct nvme_complet
>         printf("cdw0:%08x sqhd:%04x sqid:%04x "
>             "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
>             cpl->cdw0, cpl->sqhd, cpl->sqid,
> -           cpl->cid, cpl->p, cpl->sf_sc, cpl->sf_sct, cpl->sf_m,
> -           cpl->sf_dnr);
> -}
> -
> -void
> -nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
> -{
> -       struct nvme_tracker     *tr;
> -       struct nvme_qpair       *qpair;
> -       struct nvme_prp_list    *prp_list;
> -       uint32_t                cur_nseg;
> -
> -       KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
> -
> -       tr = (struct nvme_tracker *)arg;
> -       qpair = tr->qpair;
> -
> -       /*
> -        * Note that we specified PAGE_SIZE for alignment and max
> -        *  segment size when creating the bus dma tags.  So here
> -        *  we can safely just transfer each segment to its
> -        *  associated PRP entry.
> -        */
> -       tr->cmd.prp1 = seg[0].ds_addr;
> -
> -       if (nseg == 2) {
> -               tr->cmd.prp2 = seg[1].ds_addr;
> -       } else if (nseg > 2) {
> -               KASSERT(tr->prp_list,
> -                   ("prp_list needed but not attached to tracker\n"));
> -               cur_nseg = 1;
> -               prp_list = tr->prp_list;
> -               tr->cmd.prp2 = (uint64_t)prp_list->bus_addr;
> -               while (cur_nseg < nseg) {
> -                       prp_list->prp[cur_nseg-1] =
> -                           (uint64_t)seg[cur_nseg].ds_addr;
> -                       cur_nseg++;
> -               }
> -       }
> -
> -       nvme_qpair_submit_cmd(qpair, tr);
> -}
> -
> -struct nvme_tracker *
> -nvme_allocate_tracker(struct nvme_controller *ctrlr, boolean_t is_admin,
> -    nvme_cb_fn_t cb_fn, void *cb_arg, uint32_t payload_size, void
> *payload)
> -{
> -       struct nvme_tracker     *tr;
> -       struct nvme_qpair       *qpair;
> -       uint32_t                modulo, offset, num_prps;
> -       boolean_t               alloc_prp_list = FALSE;
> -
> -       if (is_admin) {
> -               qpair = &ctrlr->adminq;
> -       } else {
> -               if (ctrlr->per_cpu_io_queues)
> -                       qpair = &ctrlr->ioq[curcpu];
> -               else
> -                       qpair = &ctrlr->ioq[0];
> -       }
> -
> -       num_prps = payload_size / PAGE_SIZE;
> -       modulo = payload_size % PAGE_SIZE;
> -       offset = (uint32_t)((uintptr_t)payload % PAGE_SIZE);
> -
> -       if (modulo || offset)
> -               num_prps += 1 + (modulo + offset - 1) / PAGE_SIZE;
> -
> -       if (num_prps > 2)
> -               alloc_prp_list = TRUE;
> -
> -       tr = nvme_qpair_allocate_tracker(qpair, alloc_prp_list);
> -
> -       memset(&tr->cmd, 0, sizeof(tr->cmd));
> -
> -       tr->qpair = qpair;
> -       tr->cb_fn = cb_fn;
> -       tr->cb_arg = cb_arg;
> -       tr->payload_size = payload_size;
> -
> -       return (tr);
> +           cpl->cid, cpl->status.p, cpl->status.sc, cpl->status.sct,
> +           cpl->status.m, cpl->status.dnr);
>  }
>
>  static int
> @@ -287,15 +245,17 @@ nvme_attach(device_t dev)
>          *  to cc.en==0.  This is because we don't really know what status
>          *  the controller was left in when boot handed off to OS.
>          */
> -       status = nvme_ctrlr_reset(ctrlr);
> +       status = nvme_ctrlr_hw_reset(ctrlr);
>         if (status != 0)
>                 return (status);
>
> -       status = nvme_ctrlr_reset(ctrlr);
> +       status = nvme_ctrlr_hw_reset(ctrlr);
>         if (status != 0)
>                 return (status);
>
> -       ctrlr->config_hook.ich_func = nvme_ctrlr_start;
> +       nvme_sysctl_initialize_ctrlr(ctrlr);
> +
> +       ctrlr->config_hook.ich_func = nvme_ctrlr_start_config_hook;
>         ctrlr->config_hook.ich_arg = ctrlr;
>
>         config_intrhook_establish(&ctrlr->config_hook);
> @@ -307,77 +267,75 @@ static int
>  nvme_detach (device_t dev)
>  {
>         struct nvme_controller  *ctrlr = DEVICE2SOFTC(dev);
> -       struct nvme_namespace   *ns;
> -       int                     i;
> -
> -       if (ctrlr->taskqueue) {
> -               taskqueue_drain(ctrlr->taskqueue, &ctrlr->task);
> -               taskqueue_free(ctrlr->taskqueue);
> -       }
> -
> -       for (i = 0; i < NVME_MAX_NAMESPACES; i++) {
> -               ns = &ctrlr->ns[i];
> -               if (ns->cdev)
> -                       destroy_dev(ns->cdev);
> -       }
> -
> -       if (ctrlr->cdev)
> -               destroy_dev(ctrlr->cdev);
> -
> -       for (i = 0; i < ctrlr->num_io_queues; i++) {
> -               nvme_io_qpair_destroy(&ctrlr->ioq[i]);
> -       }
> -
> -       free(ctrlr->ioq, M_NVME);
> -
> -       nvme_admin_qpair_destroy(&ctrlr->adminq);
> -
> -       if (ctrlr->resource != NULL) {
> -               bus_release_resource(dev, SYS_RES_MEMORY,
> -                   ctrlr->resource_id, ctrlr->resource);
> -       }
> -
> -#ifdef CHATHAM2
> -       if (ctrlr->chatham_resource != NULL) {
> -               bus_release_resource(dev, SYS_RES_MEMORY,
> -                   ctrlr->chatham_resource_id, ctrlr->chatham_resource);
> -       }
> -#endif
> -
> -       if (ctrlr->tag)
> -               bus_teardown_intr(ctrlr->dev, ctrlr->res, ctrlr->tag);
> -
> -       if (ctrlr->res)
> -               bus_release_resource(ctrlr->dev, SYS_RES_IRQ,
> -                   rman_get_rid(ctrlr->res), ctrlr->res);
> -
> -       if (ctrlr->msix_enabled)
> -               pci_release_msi(dev);
>
> +       nvme_ctrlr_destruct(ctrlr, dev);
>         return (0);
>  }
>
>  static void
> -nvme_notify_consumer(struct nvme_consumer *consumer)
> +nvme_notify_consumer(struct nvme_consumer *cons)
>  {
>         device_t                *devlist;
>         struct nvme_controller  *ctrlr;
> -       int                     dev, ns, devcount;
> +       struct nvme_namespace   *ns;
> +       void                    *ctrlr_cookie;
> +       int                     dev_idx, ns_idx, devcount;
>
>         if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
>                 return;
>
> -       for (dev = 0; dev < devcount; dev++) {
> -               ctrlr = DEVICE2SOFTC(devlist[dev]);
> -               for (ns = 0; ns < ctrlr->cdata.nn; ns++)
> -                       (*consumer->cb_fn)(consumer->cb_arg,
> &ctrlr->ns[ns]);
> +       for (dev_idx = 0; dev_idx < devcount; dev_idx++) {
> +               ctrlr = DEVICE2SOFTC(devlist[dev_idx]);
> +               if (cons->ctrlr_fn != NULL)
> +                       ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr);
> +               else
> +                       ctrlr_cookie = NULL;
> +               ctrlr->cons_cookie[cons->id] = ctrlr_cookie;
> +               for (ns_idx = 0; ns_idx < ctrlr->cdata.nn; ns_idx++) {
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
>


More information about the svn-src-stable-9 mailing list