diff --git a/include/vfn/nvme/rq.h b/include/vfn/nvme/rq.h index 47b847d..c87ff9b 100644 --- a/include/vfn/nvme/rq.h +++ b/include/vfn/nvme/rq.h @@ -212,6 +212,9 @@ static inline void nvme_rq_exec(struct nvme_rq *rq, union nvme_cmd *cmd) * * Map a buffer of size @len into the command payload. * + * This helper uses a pre-allocated PRP list page within @rq and same with + * calling ``nvme_map_prp(ctrl, rq->page.vaddr, ...)``. + * * Return: ``0`` on success, ``-1`` on error and sets errno. */ int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova, @@ -230,6 +233,9 @@ int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd * * allowed to be unaligned, but the entry MUST end on a page boundary. All * subsequent entries MUST be page aligned. * + * This helper uses a pre-allocated PRP list page within @rq and same with + * calling ``nvme_mapv_prp(ctrl, rq->page.vaddr, cmd, iova, niov)``; + * * Return: ``0`` on success, ``-1`` on error and sets errno. */ int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, @@ -246,6 +252,9 @@ int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd * * Map the memory contained in @iov into the request SGL. * + * This helper uses a pre-allocated SGL segment list page within @rq and same + * with calling ``nvme_mapv_sgl(ctrl, rq->page.vaddr, cmd, iova, niov)``; + * * Return: ``0`` on success, ``-1`` on error and sets errno. */ int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, diff --git a/include/vfn/nvme/util.h b/include/vfn/nvme/util.h index 6d36876..4abd31c 100644 --- a/include/vfn/nvme/util.h +++ b/include/vfn/nvme/util.h @@ -110,4 +110,55 @@ int nvme_sync(struct nvme_ctrl *ctrl, struct nvme_sq *sq, union nvme_cmd *sqe, v int nvme_admin(struct nvme_ctrl *ctrl, union nvme_cmd *sqe, void *buf, size_t len, struct nvme_cqe *cqe_copy); +/** + * nvme_map_prp - Set up the Physical Region Pages in the data pointer of the + * command from a buffer that is contiguous in iova mapped + * memory. + * @ctrl: &struct nvme_ctrl + * @prplist: The first PRP list page address + * @cmd: NVMe command prototype (&union nvme_cmd) + * @iova: I/O Virtual Address + * @len: Length of buffer + * + * Map a buffer of size @len into the command payload. + * + * Return: ``0`` on success, ``-1`` on error and sets errno. + */ +int nvme_map_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, union nvme_cmd *cmd, + uint64_t iova, size_t len); + +/** + * nvme_mapv_prp - Set up the Physical Region Pages in the data pointer of + * the command from an iovec. + * @ctrl: &struct nvme_ctrl + * @prplist: The first PRP list page address + * @cmd: NVMe command prototype (&union nvme_cmd) + * @iov: array of iovecs + * @niov: number of iovec in @iovec + * + * Map the memory contained in @iov into the request PRPs. The first entry is + * allowed to be unaligned, but the entry MUST end on a page boundary. All + * subsequent entries MUST be page aligned. + * + * Return: ``0`` on success, ``-1`` on error and sets errno. + */ +int nvme_mapv_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, + union nvme_cmd *cmd, struct iovec *iov, int niov); + +/** + * nvme_mapv_sgl - Set up a Scatter/Gather List in the data pointer of the + * command from an iovec. + * @ctrl: &struct nvme_ctrl + * @seglist: SGL segment list page address + * @cmd: NVMe command prototype (&union nvme_cmd) + * @iov: array of iovecs + * @niov: number of iovec in @iovec + * + * Map the memory contained in @iov into the request SGL. + * + * Return: ``0`` on success, ``-1`` on error and sets errno. + */ +int nvme_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_sgld *seglist, union nvme_cmd *cmd, + struct iovec *iov, int niov); + #endif /* LIBVFN_NVME_UTIL_H */ diff --git a/src/nvme/rq.c b/src/nvme/rq.c index 9e1986d..490ca8c 100644 --- a/src/nvme/rq.c +++ b/src/nvme/rq.c @@ -35,241 +35,25 @@ #include #include -#include "ccan/minmax/minmax.h" - #include "iommu/context.h" #include "types.h" -static inline int __map_prp_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len, - int pageshift) +int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, + uint64_t iova, size_t len) { - size_t pagesize = 1 << pageshift; - int max_prps = 1 << (pageshift - 3); - - /* number of prps required to map the buffer */ - int prpcount = 1; - - *prp1 = cpu_to_le64(iova); - - /* account for what is covered with the first prp */ - len -= min_t(size_t, len, pagesize - (iova & (pagesize - 1))); - - /* any residual just adds more prps */ - if (len) - prpcount += (int)ALIGN_UP(len, pagesize) >> pageshift; - - if (prpcount > 1 && !ALIGNED(iova, pagesize)) - /* align down to simplify loop below */ - iova = ALIGN_DOWN(iova, pagesize); - - if (prpcount > max_prps) { - errno = EINVAL; - return -1; - } - - /* - * Map the remaining parts of the buffer into prp2/prplist. iova will be - * aligned from the above, which simplifies this. - */ - for (int i = 1; i < prpcount; i++) - prplist[i - 1] = cpu_to_le64(iova + ((uint64_t)i << pageshift)); - - /* - * prpcount may be zero if the buffer length was less than the page - * size, so clamp it to 1 in that case. - */ - return clamp_t(int, prpcount, 1, prpcount); -} - -static inline int __map_prp_append(leint64_t *prplist, uint64_t iova, size_t len, int max_prps, - int pageshift) -{ - int prpcount = max_t(int, 1, (int)len >> pageshift); - size_t pagesize = 1 << pageshift; - - if (prpcount > max_prps) { - log_error("too many prps required\n"); - - errno = EINVAL; - return -1; - } - - if (!ALIGNED(iova, pagesize)) { - log_error("unaligned iova 0x%" PRIx64 "\n", iova); - - errno = EINVAL; - return -1; - } - - for (int i = 0; i < prpcount; i++) - prplist[i] = cpu_to_le64(iova + ((uint64_t)i << pageshift)); - - return prpcount; -} - -static inline void __set_prp2(leint64_t *prp2, leint64_t prplist, leint64_t prplist0, int prpcount) -{ - if (prpcount == 2) - *prp2 = prplist0; - else if (prpcount > 2) - *prp2 = prplist; - else - *prp2 = 0x0; -} - -int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova, - size_t len) -{ - int prpcount; - leint64_t *prplist = rq->page.vaddr; - int pageshift = __mps_to_pageshift(ctrl->config.mps); - - prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift); - if (prpcount < 0) { - errno = EINVAL; - return -1; - } - - __set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount); - - return 0; + return nvme_map_prp(ctrl, rq->page.vaddr, cmd, iova, len); } int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, struct iovec *iov, int niov) { - struct iommu_ctx *ctx = __iommu_ctx(ctrl); - - leint64_t *prplist = rq->page.vaddr; - size_t len = iov->iov_len; - int pageshift = __mps_to_pageshift(ctrl->config.mps); - size_t pagesize = 1 << pageshift; - int max_prps = 1 << (pageshift - 3); - int ret, prpcount; - uint64_t iova; - - if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) { - errno = EFAULT; - return -1; - } - - /* map the first segment */ - prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift); - if (prpcount < 0) - goto invalid; - - /* - * At this point, one of three conditions must hold: - * - * a) a single prp entry was set up by __map_first, or - * b) the iovec only has a single entry, or - * c) the first buffer ends on a page size boundary - * - * If none holds, the buffer(s) within the iovec cannot be mapped given - * the PRP alignment requirements. - */ - if (!(prpcount == 1 || niov == 1 || ALIGNED(iova + len, pagesize))) { - log_error("iov[0].iov_base/len invalid\n"); - - goto invalid; - } - - /* map remaining iovec entries; these must be page size aligned */ - for (int i = 1; i < niov; i++) { - if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) { - errno = EFAULT; - return -1; - } - - len = iov[i].iov_len; - - /* all entries but the last must have a page size aligned len */ - if (i < niov - 1 && !ALIGNED(len, pagesize)) { - log_error("unaligned iov[%u].len (%zu)\n", i, len); - - goto invalid; - } - - ret = __map_prp_append(&prplist[prpcount - 1], iova, len, max_prps - prpcount, - pageshift); - if (ret < 0) - goto invalid; - - prpcount += ret; - } - - __set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount); - - return 0; - -invalid: - errno = EINVAL; - return -1; -} - -static inline void __sgl_data(struct nvme_sgld *sgld, uint64_t iova, size_t len) -{ - sgld->addr = cpu_to_le64(iova); - sgld->len = cpu_to_le32((uint32_t)len); - - sgld->type = NVME_SGLD_TYPE_DATA_BLOCK << 4; -} - -static inline void __sgl_segment(struct nvme_sgld *sgld, uint64_t iova, int n) -{ - sgld->addr = cpu_to_le64(iova); - sgld->len = cpu_to_le32(n << 4); - - sgld->type = NVME_SGLD_TYPE_LAST_SEGMENT << 4; + return nvme_mapv_prp(ctrl, rq->page.vaddr, cmd, iov, niov); } int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, struct iovec *iov, int niov) { - struct nvme_sgld *seg = rq->page.vaddr; - struct iommu_ctx *ctx = __iommu_ctx(ctrl); - - int pageshift = __mps_to_pageshift(ctrl->config.mps); - int max_sglds = 1 << (pageshift - 4); - int dword_align = ctrl->flags & NVME_CTRL_F_SGLS_DWORD_ALIGNMENT; - - uint64_t iova; - - if (niov == 1) { - if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) { - errno = EFAULT; - return -1; - } - - __sgl_data(&cmd->dptr.sgl, iova, iov->iov_len); - - return 0; - } - - if (niov > max_sglds) { - errno = EINVAL; - return -1; - } - - __sgl_segment(&cmd->dptr.sgl, rq->page.iova, niov); - - for (int i = 0; i < niov; i++) { - if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) { - errno = EFAULT; - return -1; - } - - if (dword_align && (iova & 0x3)) { - errno = EINVAL; - return -1; - } - - __sgl_data(&seg[i], iova, iov[i].iov_len); - } - - cmd->flags |= NVME_FIELD_SET(NVME_CMD_FLAGS_PSDT_SGL_MPTR_CONTIG, CMD_FLAGS_PSDT); - - return 0; + return nvme_mapv_sgl(ctrl, rq->page.vaddr, cmd, iov, niov); } int nvme_rq_mapv(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, diff --git a/src/nvme/rq_test.c b/src/nvme/rq_test.c index 79c691a..7c73d77 100644 --- a/src/nvme/rq_test.c +++ b/src/nvme/rq_test.c @@ -56,7 +56,7 @@ int main(void) assert(pgmap((void **)&rq.page.vaddr, __VFN_PAGESIZE) > 0); - rq.page.iova = 0x8000000; + rq.page.iova = (uint64_t)rq.page.vaddr; prplist = rq.page.vaddr; sglds = rq.page.vaddr; @@ -93,7 +93,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000000, 0x2010) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -102,7 +102,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000000, 0x3000) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -111,7 +111,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000000, 0x3018) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -148,7 +148,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1001000 - 4, 0x1008) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1001000 - 4); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -164,7 +164,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000004, 0x2000) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -173,7 +173,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000004, 0x2010) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -189,7 +189,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000004, 0x3000) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); ok1(le64_to_cpu(prplist[2]) == 0x1003000); @@ -199,7 +199,7 @@ int main(void) ok1(nvme_rq_map_prp(&ctrl, &rq, &cmd, 0x1000004, 0x3018) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); ok1(le64_to_cpu(prplist[2]) == 0x1003000); @@ -234,7 +234,7 @@ int main(void) ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 1) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -255,7 +255,7 @@ int main(void) ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 3) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -266,7 +266,7 @@ int main(void) ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 3) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000000); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); @@ -303,7 +303,7 @@ int main(void) ok1(nvme_rq_mapv_prp(&ctrl, &rq, &cmd, iov, 3) == 0); ok1(le64_to_cpu(cmd.dptr.prp1) == 0x1000004); - ok1(le64_to_cpu(cmd.dptr.prp2) == 0x8000000); + ok1(le64_to_cpu(cmd.dptr.prp2) == rq.page.iova); ok1(le64_to_cpu(prplist[0]) == 0x1001000); ok1(le64_to_cpu(prplist[1]) == 0x1002000); diff --git a/src/nvme/util.c b/src/nvme/util.c index a509ceb..f761cf4 100644 --- a/src/nvme/util.c +++ b/src/nvme/util.c @@ -34,6 +34,7 @@ #include #include +#include "ccan/minmax/minmax.h" #include "types.h" #include "crc64table.h" @@ -141,3 +142,250 @@ int nvme_admin(struct nvme_ctrl *ctrl, union nvme_cmd *sqe, void *buf, size_t le { return nvme_sync(ctrl, ctrl->adminq.sq, sqe, buf, len, cqe_copy); } + +static inline int __map_prp_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len, + int pageshift) +{ + size_t pagesize = 1 << pageshift; + int max_prps = 1 << (pageshift - 3); + + /* number of prps required to map the buffer */ + int prpcount = 1; + + *prp1 = cpu_to_le64(iova); + + /* account for what is covered with the first prp */ + len -= min_t(size_t, len, pagesize - (iova & (pagesize - 1))); + + /* any residual just adds more prps */ + if (len) + prpcount += (int)ALIGN_UP(len, pagesize) >> pageshift; + + if (prpcount > 1 && !ALIGNED(iova, pagesize)) + /* align down to simplify loop below */ + iova = ALIGN_DOWN(iova, pagesize); + + if (prpcount > max_prps) { + errno = EINVAL; + return -1; + } + + /* + * Map the remaining parts of the buffer into prp2/prplist. iova will be + * aligned from the above, which simplifies this. + */ + for (int i = 1; i < prpcount; i++) + prplist[i - 1] = cpu_to_le64(iova + ((uint64_t)i << pageshift)); + + /* + * prpcount may be zero if the buffer length was less than the page + * size, so clamp it to 1 in that case. + */ + return clamp_t(int, prpcount, 1, prpcount); +} + +static inline int __map_prp_append(leint64_t *prplist, uint64_t iova, size_t len, int max_prps, + int pageshift) +{ + int prpcount = max_t(int, 1, (int)len >> pageshift); + size_t pagesize = 1 << pageshift; + + if (prpcount > max_prps) { + log_error("too many prps required\n"); + + errno = EINVAL; + return -1; + } + + if (!ALIGNED(iova, pagesize)) { + log_error("unaligned iova 0x%" PRIx64 "\n", iova); + + errno = EINVAL; + return -1; + } + + for (int i = 0; i < prpcount; i++) + prplist[i] = cpu_to_le64(iova + ((uint64_t)i << pageshift)); + + return prpcount; +} + +static inline void __set_prp2(leint64_t *prp2, leint64_t prplist, leint64_t prplist0, int prpcount) +{ + if (prpcount == 2) + *prp2 = prplist0; + else if (prpcount > 2) + *prp2 = prplist; + else + *prp2 = 0x0; +} + +int nvme_map_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, union nvme_cmd *cmd, + uint64_t iova, size_t len) +{ + struct iommu_ctx *ctx = __iommu_ctx(ctrl); + int prpcount; + int pageshift = __mps_to_pageshift(ctrl->config.mps); + uint64_t prplist_iova; + + if (!iommu_translate_vaddr(ctx, prplist, &prplist_iova)) { + errno = EFAULT; + return -1; + } + + prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift); + if (prpcount < 0) { + errno = EINVAL; + return -1; + } + + __set_prp2(&cmd->dptr.prp2, cpu_to_le64(prplist_iova), prplist[0], prpcount); + + return 0; +} + +int nvme_mapv_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, + union nvme_cmd *cmd, struct iovec *iov, int niov) +{ + struct iommu_ctx *ctx = __iommu_ctx(ctrl); + + size_t len = iov->iov_len; + int pageshift = __mps_to_pageshift(ctrl->config.mps); + size_t pagesize = 1 << pageshift; + int max_prps = 1 << (pageshift - 3); + int ret, prpcount; + uint64_t iova, prplist_iova; + + if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) { + errno = EFAULT; + return -1; + } + + if (!iommu_translate_vaddr(ctx, prplist, &prplist_iova)) { + errno = EFAULT; + return -1; + } + + /* map the first segment */ + prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift); + if (prpcount < 0) + goto invalid; + + /* + * At this point, one of three conditions must hold: + * + * a) a single prp entry was set up by __map_first, or + * b) the iovec only has a single entry, or + * c) the first buffer ends on a page size boundary + * + * If none holds, the buffer(s) within the iovec cannot be mapped given + * the PRP alignment requirements. + */ + if (!(prpcount == 1 || niov == 1 || ALIGNED(iova + len, pagesize))) { + log_error("iov[0].iov_base/len invalid\n"); + + goto invalid; + } + + /* map remaining iovec entries; these must be page size aligned */ + for (int i = 1; i < niov; i++) { + if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) { + errno = EFAULT; + return -1; + } + + len = iov[i].iov_len; + + /* all entries but the last must have a page size aligned len */ + if (i < niov - 1 && !ALIGNED(len, pagesize)) { + log_error("unaligned iov[%u].len (%zu)\n", i, len); + + goto invalid; + } + + ret = __map_prp_append(&prplist[prpcount - 1], iova, len, max_prps - prpcount, + pageshift); + if (ret < 0) + goto invalid; + + prpcount += ret; + } + + __set_prp2(&cmd->dptr.prp2, cpu_to_le64(prplist_iova), prplist[0], prpcount); + + return 0; + +invalid: + errno = EINVAL; + return -1; +} + +static inline void __sgl_data(struct nvme_sgld *sgld, uint64_t iova, size_t len) +{ + sgld->addr = cpu_to_le64(iova); + sgld->len = cpu_to_le32((uint32_t)len); + + sgld->type = NVME_SGLD_TYPE_DATA_BLOCK << 4; +} + +static inline void __sgl_segment(struct nvme_sgld *sgld, uint64_t iova, int n) +{ + sgld->addr = cpu_to_le64(iova); + sgld->len = cpu_to_le32(n << 4); + + sgld->type = NVME_SGLD_TYPE_LAST_SEGMENT << 4; +} + +int nvme_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_sgld *seg, union nvme_cmd *cmd, + struct iovec *iov, int niov) +{ + struct iommu_ctx *ctx = __iommu_ctx(ctrl); + + int pageshift = __mps_to_pageshift(ctrl->config.mps); + int max_sglds = 1 << (pageshift - 4); + int dword_align = ctrl->flags & NVME_CTRL_F_SGLS_DWORD_ALIGNMENT; + + uint64_t iova; + uint64_t seg_iova; + + if (niov == 1) { + if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) { + errno = EFAULT; + return -1; + } + + __sgl_data(&cmd->dptr.sgl, iova, iov->iov_len); + + return 0; + } + + if (niov > max_sglds) { + errno = EINVAL; + return -1; + } + + if (!iommu_translate_vaddr(ctx, seg, &seg_iova)) { + errno = EFAULT; + return -1; + } + + __sgl_segment(&cmd->dptr.sgl, seg_iova, niov); + + for (int i = 0; i < niov; i++) { + if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) { + errno = EFAULT; + return -1; + } + + if (dword_align && (iova & 0x3)) { + errno = EINVAL; + return -1; + } + + __sgl_data(&seg[i], iova, iov[i].iov_len); + } + + cmd->flags |= NVME_FIELD_SET(NVME_CMD_FLAGS_PSDT_SGL_MPTR_CONTIG, CMD_FLAGS_PSDT); + + return 0; +}