Skip to content

Commit

Permalink
nvme: add low-level api to map prp/sgl without an nvme_rq
Browse files Browse the repository at this point in the history
Add low-level helpers to map PRP and SGL data and data list which
contains the actual prp and sgl entries within a memory page.

The existing nvme_rq mapping helpers always use the preallocated struct
nvme_rq page.vaddr member. Some applications might want to locate the
prplist or sgl segments in another location such as the CMB (Controller
Memory Buffer) rather than the host memory. In that case, the
application could update the struct nvme_rq page member, but it's not a
good idea since it is a library-specific data structure and has its own
policy.

To give more flexible options to upper layer applications, this patch
adds a low-level API which are more generic using parameters for the
prplist or segment list.

The newly added public helpers are:
  * nvme_map_prp
  * nvme_mapv_prp
  * nvme_mapv_sgl

This does not have functional change on the existing API.

Signed-off-by: Minwoo Im <minwoo.im@samsung.com>
[k.jensen: update commit message and fixed tests]
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
  • Loading branch information
minwooim authored and birkelund committed Dec 10, 2024
1 parent 5173ae4 commit bce0f6c
Show file tree
Hide file tree
Showing 5 changed files with 326 additions and 234 deletions.
9 changes: 9 additions & 0 deletions include/vfn/nvme/rq.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,9 @@ static inline void nvme_rq_exec(struct nvme_rq *rq, union nvme_cmd *cmd)
*
* Map a buffer of size @len into the command payload.
*
* This helper uses a pre-allocated PRP list page within @rq and same with
* calling ``nvme_map_prp(ctrl, rq->page.vaddr, ...)``.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova,
Expand All @@ -230,6 +233,9 @@ int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *
* allowed to be unaligned, but the entry MUST end on a page boundary. All
* subsequent entries MUST be page aligned.
*
* This helper uses a pre-allocated PRP list page within @rq and same with
* calling ``nvme_mapv_prp(ctrl, rq->page.vaddr, cmd, iova, niov)``;
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
Expand All @@ -246,6 +252,9 @@ int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd
*
* Map the memory contained in @iov into the request SGL.
*
* This helper uses a pre-allocated SGL segment list page within @rq and same
* with calling ``nvme_mapv_sgl(ctrl, rq->page.vaddr, cmd, iova, niov)``;
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
Expand Down
51 changes: 51 additions & 0 deletions include/vfn/nvme/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,55 @@ int nvme_sync(struct nvme_ctrl *ctrl, struct nvme_sq *sq, union nvme_cmd *sqe, v
int nvme_admin(struct nvme_ctrl *ctrl, union nvme_cmd *sqe, void *buf, size_t len,
struct nvme_cqe *cqe_copy);

/**
* nvme_map_prp - Set up the Physical Region Pages in the data pointer of the
* command from a buffer that is contiguous in iova mapped
* memory.
* @ctrl: &struct nvme_ctrl
* @prplist: The first PRP list page address
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iova: I/O Virtual Address
* @len: Length of buffer
*
* Map a buffer of size @len into the command payload.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_map_prp(struct nvme_ctrl *ctrl, leint64_t *prplist, union nvme_cmd *cmd,
uint64_t iova, size_t len);

/**
* nvme_mapv_prp - Set up the Physical Region Pages in the data pointer of
* the command from an iovec.
* @ctrl: &struct nvme_ctrl
* @prplist: The first PRP list page address
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iov: array of iovecs
* @niov: number of iovec in @iovec
*
* Map the memory contained in @iov into the request PRPs. The first entry is
* allowed to be unaligned, but the entry MUST end on a page boundary. All
* subsequent entries MUST be page aligned.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_mapv_prp(struct nvme_ctrl *ctrl, leint64_t *prplist,
union nvme_cmd *cmd, struct iovec *iov, int niov);

/**
* nvme_mapv_sgl - Set up a Scatter/Gather List in the data pointer of the
* command from an iovec.
* @ctrl: &struct nvme_ctrl
* @seglist: SGL segment list page address
* @cmd: NVMe command prototype (&union nvme_cmd)
* @iov: array of iovecs
* @niov: number of iovec in @iovec
*
* Map the memory contained in @iov into the request SGL.
*
* Return: ``0`` on success, ``-1`` on error and sets errno.
*/
int nvme_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_sgld *seglist, union nvme_cmd *cmd,
struct iovec *iov, int niov);

#endif /* LIBVFN_NVME_UTIL_H */
226 changes: 5 additions & 221 deletions src/nvme/rq.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,241 +35,25 @@
#include <vfn/vfio.h>
#include <vfn/nvme.h>

#include "ccan/minmax/minmax.h"

#include "iommu/context.h"
#include "types.h"

static inline int __map_prp_first(leint64_t *prp1, leint64_t *prplist, uint64_t iova, size_t len,
int pageshift)
int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
uint64_t iova, size_t len)
{
size_t pagesize = 1 << pageshift;
int max_prps = 1 << (pageshift - 3);

/* number of prps required to map the buffer */
int prpcount = 1;

*prp1 = cpu_to_le64(iova);

/* account for what is covered with the first prp */
len -= min_t(size_t, len, pagesize - (iova & (pagesize - 1)));

/* any residual just adds more prps */
if (len)
prpcount += (int)ALIGN_UP(len, pagesize) >> pageshift;

if (prpcount > 1 && !ALIGNED(iova, pagesize))
/* align down to simplify loop below */
iova = ALIGN_DOWN(iova, pagesize);

if (prpcount > max_prps) {
errno = EINVAL;
return -1;
}

/*
* Map the remaining parts of the buffer into prp2/prplist. iova will be
* aligned from the above, which simplifies this.
*/
for (int i = 1; i < prpcount; i++)
prplist[i - 1] = cpu_to_le64(iova + ((uint64_t)i << pageshift));

/*
* prpcount may be zero if the buffer length was less than the page
* size, so clamp it to 1 in that case.
*/
return clamp_t(int, prpcount, 1, prpcount);
}

static inline int __map_prp_append(leint64_t *prplist, uint64_t iova, size_t len, int max_prps,
int pageshift)
{
int prpcount = max_t(int, 1, (int)len >> pageshift);
size_t pagesize = 1 << pageshift;

if (prpcount > max_prps) {
log_error("too many prps required\n");

errno = EINVAL;
return -1;
}

if (!ALIGNED(iova, pagesize)) {
log_error("unaligned iova 0x%" PRIx64 "\n", iova);

errno = EINVAL;
return -1;
}

for (int i = 0; i < prpcount; i++)
prplist[i] = cpu_to_le64(iova + ((uint64_t)i << pageshift));

return prpcount;
}

static inline void __set_prp2(leint64_t *prp2, leint64_t prplist, leint64_t prplist0, int prpcount)
{
if (prpcount == 2)
*prp2 = prplist0;
else if (prpcount > 2)
*prp2 = prplist;
else
*prp2 = 0x0;
}

int nvme_rq_map_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd, uint64_t iova,
size_t len)
{
int prpcount;
leint64_t *prplist = rq->page.vaddr;
int pageshift = __mps_to_pageshift(ctrl->config.mps);

prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift);
if (prpcount < 0) {
errno = EINVAL;
return -1;
}

__set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount);

return 0;
return nvme_map_prp(ctrl, rq->page.vaddr, cmd, iova, len);
}

int nvme_rq_mapv_prp(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov)
{
struct iommu_ctx *ctx = __iommu_ctx(ctrl);

leint64_t *prplist = rq->page.vaddr;
size_t len = iov->iov_len;
int pageshift = __mps_to_pageshift(ctrl->config.mps);
size_t pagesize = 1 << pageshift;
int max_prps = 1 << (pageshift - 3);
int ret, prpcount;
uint64_t iova;

if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) {
errno = EFAULT;
return -1;
}

/* map the first segment */
prpcount = __map_prp_first(&cmd->dptr.prp1, prplist, iova, len, pageshift);
if (prpcount < 0)
goto invalid;

/*
* At this point, one of three conditions must hold:
*
* a) a single prp entry was set up by __map_first, or
* b) the iovec only has a single entry, or
* c) the first buffer ends on a page size boundary
*
* If none holds, the buffer(s) within the iovec cannot be mapped given
* the PRP alignment requirements.
*/
if (!(prpcount == 1 || niov == 1 || ALIGNED(iova + len, pagesize))) {
log_error("iov[0].iov_base/len invalid\n");

goto invalid;
}

/* map remaining iovec entries; these must be page size aligned */
for (int i = 1; i < niov; i++) {
if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) {
errno = EFAULT;
return -1;
}

len = iov[i].iov_len;

/* all entries but the last must have a page size aligned len */
if (i < niov - 1 && !ALIGNED(len, pagesize)) {
log_error("unaligned iov[%u].len (%zu)\n", i, len);

goto invalid;
}

ret = __map_prp_append(&prplist[prpcount - 1], iova, len, max_prps - prpcount,
pageshift);
if (ret < 0)
goto invalid;

prpcount += ret;
}

__set_prp2(&cmd->dptr.prp2, cpu_to_le64(rq->page.iova), prplist[0], prpcount);

return 0;

invalid:
errno = EINVAL;
return -1;
}

static inline void __sgl_data(struct nvme_sgld *sgld, uint64_t iova, size_t len)
{
sgld->addr = cpu_to_le64(iova);
sgld->len = cpu_to_le32((uint32_t)len);

sgld->type = NVME_SGLD_TYPE_DATA_BLOCK << 4;
}

static inline void __sgl_segment(struct nvme_sgld *sgld, uint64_t iova, int n)
{
sgld->addr = cpu_to_le64(iova);
sgld->len = cpu_to_le32(n << 4);

sgld->type = NVME_SGLD_TYPE_LAST_SEGMENT << 4;
return nvme_mapv_prp(ctrl, rq->page.vaddr, cmd, iov, niov);
}

int nvme_rq_mapv_sgl(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
struct iovec *iov, int niov)
{
struct nvme_sgld *seg = rq->page.vaddr;
struct iommu_ctx *ctx = __iommu_ctx(ctrl);

int pageshift = __mps_to_pageshift(ctrl->config.mps);
int max_sglds = 1 << (pageshift - 4);
int dword_align = ctrl->flags & NVME_CTRL_F_SGLS_DWORD_ALIGNMENT;

uint64_t iova;

if (niov == 1) {
if (!iommu_translate_vaddr(ctx, iov->iov_base, &iova)) {
errno = EFAULT;
return -1;
}

__sgl_data(&cmd->dptr.sgl, iova, iov->iov_len);

return 0;
}

if (niov > max_sglds) {
errno = EINVAL;
return -1;
}

__sgl_segment(&cmd->dptr.sgl, rq->page.iova, niov);

for (int i = 0; i < niov; i++) {
if (!iommu_translate_vaddr(ctx, iov[i].iov_base, &iova)) {
errno = EFAULT;
return -1;
}

if (dword_align && (iova & 0x3)) {
errno = EINVAL;
return -1;
}

__sgl_data(&seg[i], iova, iov[i].iov_len);
}

cmd->flags |= NVME_FIELD_SET(NVME_CMD_FLAGS_PSDT_SGL_MPTR_CONTIG, CMD_FLAGS_PSDT);

return 0;
return nvme_mapv_sgl(ctrl, rq->page.vaddr, cmd, iov, niov);
}

int nvme_rq_mapv(struct nvme_ctrl *ctrl, struct nvme_rq *rq, union nvme_cmd *cmd,
Expand Down
Loading

0 comments on commit bce0f6c

Please sign in to comment.