From e97310b5583577612d81730cfaeb7a70d39039fe Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh@NetBSD.org>
Date: Tue, 25 Feb 2020 00:40:05 +0000
Subject: [PATCH] New ioctl DIOCGSECTORALIGN returns sector alignment
 parameters:

struct disk_sectoralign {
	/* First aligned sector number.  */
	uint32_t dsa_firstaligned;

	/* Number of sectors per aligned unit.  */
	uint32_t dsa_alignment;
};

- Teach wd(4) to get it from ATA.
- Teach cgd(4) to pass it through from the underlying disk.
- Teach dk(4) to pass it through with adjustments.
- Teach zpool to take advantage of it.
  => XXX zpool doesn't seem to understand when the vdev's starting
     sector is misaligned.
---
 .../osnet/dist/uts/common/fs/zfs/vdev_disk.c  | 17 ++++++-
 sys/dev/ata/wd.c                              | 47 ++++++++++++++++++-
 sys/dev/ata/wdvar.h                           |  1 +
 sys/dev/cgd.c                                 | 27 +++++++++++
 sys/dev/dkwedge/dk.c                          | 17 +++++++
 sys/sys/disk.h                                |  6 +++
 sys/sys/dkio.h                                |  3 ++
 7 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c b/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c
index 3a7e9d54c2a5..95000681486b 100644
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/vdev_disk.c
@@ -151,6 +151,7 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
 	unsigned secsize;
 	struct disk *pdk;
 	struct dkwedge_info dkw;
+	struct disk_sectoralign dsa;
 
 	/*
 	 * We must have a pathname, and it must be absolute.
@@ -260,7 +261,21 @@ skip_open:
 	*max_psize = *psize;
 
 	*ashift = highbit(MAX(secsize, SPA_MINBLOCKSIZE)) - 1;
-	*pashift = *ashift;
+
+	/*
+	 * Try to determine whether the disk has a preferred physical
+	 * sector size even if it can emulate a smaller logical sector
+	 * size with r/m/w cycles, e.g. a disk with 4096-byte sectors
+	 * that for compatibility claims to support 512-byte ones.
+	 */
+	if (VOP_IOCTL(vp, DIOCGSECTORALIGN, &dsa, FREAD, NOCRED) == 0)
+		*pashift = highbit(dsa.dsa_alignment * secsize) - 1;
+	else
+		*pashift = *ashift;
+	if (dsa.dsa_firstaligned % dsa.dsa_alignment)
+		printf("ZFS WARNING: vdev %s: sectors are misaligned"
+		    " (alignment=%"PRIu32", firstaligned=%"PRIu32")\n",
+		    vd->vdev_path, dsa.dsa_alignment, dsa.dsa_firstaligned);
 
 	vd->vdev_wholedisk = 0;
 	if (getdiskinfo(vp, &dkw) != 0 &&
diff --git a/sys/dev/ata/wd.c b/sys/dev/ata/wd.c
index 039fd844c0d5..be12f307ac5f 100644
--- a/sys/dev/ata/wd.c
+++ b/sys/dev/ata/wd.c
@@ -430,16 +430,40 @@ wdattach(device_t parent, device_t self, void *aux)
 	} else {
 		wd->sc_blksize = 512;
 	}
+	wd->sc_sectoralign.dsa_firstaligned = 0;
+	wd->sc_sectoralign.dsa_alignment = 1;
+	if ((wd->sc_params.atap_secsz & ATA_SECSZ_VALID_MASK) == ATA_SECSZ_VALID
+	    && ((wd->sc_params.atap_secsz & ATA_SECSZ_LPS) != 0)) {
+		wd->sc_sectoralign.dsa_alignment = 1 <<
+		    (wd->sc_params.atap_secsz & ATA_SECSZ_LPS_SZMSK);
+		if ((wd->sc_params.atap_logical_align & ATA_LA_VALID_MASK) ==
+		    ATA_LA_VALID) {
+			wd->sc_sectoralign.dsa_firstaligned =
+			    (wd->sc_sectoralign.dsa_alignment -
+				(wd->sc_params.atap_logical_align &
+				    ATA_LA_MASK));
+		}
+	}
 	wd->sc_capacity512 = (wd->sc_capacity * wd->sc_blksize) / DEV_BSIZE;
 	format_bytes(pbuf, sizeof(pbuf), wd->sc_capacity * wd->sc_blksize);
 	aprint_normal_dev(self, "%s, %d cyl, %d head, %d sec, "
-	    "%d bytes/sect x %llu sectors\n",
+	    "%d bytes/sect x %llu sectors",
 	    pbuf,
 	    (wd->sc_flags & WDF_LBA) ? (int)(wd->sc_capacity /
 		(wd->sc_params.atap_heads * wd->sc_params.atap_sectors)) :
 		wd->sc_params.atap_cylinders,
 	    wd->sc_params.atap_heads, wd->sc_params.atap_sectors,
 	    wd->sc_blksize, (unsigned long long)wd->sc_capacity);
+	if (wd->sc_sectoralign.dsa_alignment != 1) {
+		aprint_normal(" (%d bytes/physsect",
+		    wd->sc_sectoralign.dsa_alignment & wd->sc_blksize);
+		if (wd->sc_sectoralign.dsa_firstaligned != 0) {
+			aprint_normal("; first aligned sector: %jd",
+			    (intmax_t)wd->sc_sectoralign.dsa_firstaligned);
+		}
+		aprint_normal(")");
+	}
+	aprint_normal("\n");
 
 	ATADEBUG_PRINT(("%s: atap_dmatiming_mimi=%d, atap_dmatiming_recom=%d\n",
 	    device_xname(self), wd->sc_params.atap_dmatiming_mimi,
@@ -1409,6 +1433,27 @@ wdioctl(dev_t dev, u_long cmd, void *addr, int flag, struct lwp *l)
 		return(error1);
 		}
 
+	case DIOCGSECTORALIGN: {
+		struct disk_sectoralign *dsa = addr;
+		int part = WDPART(dev);
+
+		*dsa = wd->sc_sectoralign;
+		if (part != RAW_PART) {
+			struct disklabel *lp = dksc->sc_dkdev.dk_label;
+			daddr_t offset = lp->d_partitions[part].p_offset;
+			uint32_t r = offset % dsa->dsa_alignment;
+
+			if (r < dsa->dsa_firstaligned)
+				dsa->dsa_firstaligned = dsa->dsa_firstaligned
+				    - r;
+			else
+				dsa->dsa_firstaligned = (dsa->dsa_firstaligned
+				    + dsa->dsa_alignment) - r;
+		}
+
+		return 0;
+	}
+
 	default:
 		return dk_ioctl(dksc, dev, cmd, addr, flag, l);
 	}
diff --git a/sys/dev/ata/wdvar.h b/sys/dev/ata/wdvar.h
index 2f13ded15e38..461c08bdd023 100644
--- a/sys/dev/ata/wdvar.h
+++ b/sys/dev/ata/wdvar.h
@@ -59,6 +59,7 @@ struct wd_softc {
 	uint64_t sc_capacity512; /* ... in DEV_BSIZE blocks */
 	uint32_t sc_capacity28; /* capacity accessible with LBA28 commands */
 	uint32_t sc_blksize; /* logical block size, in bytes */
+	struct disk_sectoralign sc_sectoralign; /* sector alignment */
 
 #ifdef WD_SOFTBADSECT
 	SLIST_HEAD(, disk_badsectors)	sc_bslist;
diff --git a/sys/dev/cgd.c b/sys/dev/cgd.c
index 4c7348c364d0..46512e0bbbf0 100644
--- a/sys/dev/cgd.c
+++ b/sys/dev/cgd.c
@@ -722,6 +722,33 @@ cgdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
 		 * We pass this call down to the underlying disk.
 		 */
 		return VOP_IOCTL(cs->sc_tvn, cmd, data, flag, l->l_cred);
+	case DIOCGSECTORALIGN: {
+		struct disk_sectoralign *dsa = data;
+		int error;
+
+		if (!DK_ATTACHED(dksc))
+			return ENOENT;
+
+		/* Get the underlying disk's sector alignment.  */
+		error = VOP_IOCTL(cs->sc_tvn, cmd, data, flag, l->l_cred);
+		if (error)
+			return error;
+
+		/* Adjust for the disklabel partition if necessary.  */
+		if (part != RAW_PART) {
+			struct disklabel *lp = dksc->sc_dkdev.dk_label;
+			daddr_t offset = lp->d_partitions[part].p_offset;
+			uint32_t r = offset % dsa->dsa_alignment;
+
+			if (r < dsa->dsa_firstaligned)
+				dsa->dsa_firstaligned = dsa->dsa_firstaligned
+				    - r;
+			else
+				dsa->dsa_firstaligned = (dsa->dsa_firstaligned
+				    + dsa->dsa_alignment) - r;
+		}
+		return 0;
+	}
 	case DIOCGSTRATEGY:
 	case DIOCSSTRATEGY:
 		if (!DK_ATTACHED(dksc))
diff --git a/sys/dev/dkwedge/dk.c b/sys/dev/dkwedge/dk.c
index 30fc333d0b48..9492cf0d88ca 100644
--- a/sys/dev/dkwedge/dk.c
+++ b/sys/dev/dkwedge/dk.c
@@ -1501,7 +1501,24 @@ dkioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
 
 		break;
 	    }
+	case DIOCGSECTORALIGN:
+	    {
+		struct disk_sectoralign *dsa = data;
+		uint32_t r;
+
+		error = VOP_IOCTL(sc->sc_parent->dk_rawvp, cmd, dsa, flag,
+		    l != NULL ? l->l_cred : NOCRED);
+		if (error)
+			break;
 
+		r = sc->sc_offset % dsa->dsa_alignment;
+		if (r < dsa->dsa_firstaligned)
+			dsa->dsa_firstaligned = dsa->dsa_firstaligned - r;
+		else
+			dsa->dsa_firstaligned = (dsa->dsa_firstaligned +
+			    dsa->dsa_alignment) - r;
+		break;
+	    }
 	default:
 		error = ENOTTY;
 	}
diff --git a/sys/sys/disk.h b/sys/sys/disk.h
index 100a44c3f880..ed25eb657017 100644
--- a/sys/sys/disk.h
+++ b/sys/sys/disk.h
@@ -300,6 +300,12 @@ struct disk_strategy {
 	size_t dks_paramlen;		/* notyet; should be 0 */
 };
 
+/* Sector alignment */
+struct disk_sectoralign {
+	uint32_t	dsa_firstaligned; /* first aligned sector # */
+	uint32_t	dsa_alignment;	  /* sectors per aligned sector */
+};
+
 #ifdef _KERNEL
 #include <sys/device.h>
 #include <sys/mutex.h>
diff --git a/sys/sys/dkio.h b/sys/sys/dkio.h
index 84046268d0a4..937df511d791 100644
--- a/sys/sys/dkio.h
+++ b/sys/sys/dkio.h
@@ -133,4 +133,7 @@
 		/* mass removal */
 #define	DIOCRMWEDGES	_IOR('d', 134, int)	/* remove all wedges */
 
+		/* sector alignment */
+#define	DIOCGSECTORALIGN _IOR('d', 135, struct disk_sectoralign)
+
 #endif /* _SYS_DKIO_H_ */