block-6.14-20250221

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAme4rUUQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpj9+EADLPOFPa9hT1PGBbpnj74vBayoTO/M+w2Gp
 +k2b8if3eGlY43WO2k+ytceWbA901iyvLPRqt1M1Ez8+BrNBg4NKcLv7q4O9NA3i
 nDPLggugSc5sdRbLRimxiwHkkpSOBenkdb7R9XGmMXTCfSbRKl0kK01ivpgkbiG4
 pbyPWYcoMyHaECBfPhazrJig4+rugXOYbkYoOM4NHsLqlTNfmowcMRPu+6czXt7q
 ITHW2RTWK3ue8q+c3nwGPDk2ZKM8X/49rA/6bvD3voLNs+jQ8KFg2KULENf0Xaq6
 1ZGrhLcr45iEHP0/+RORMzx27PqbTCSGIOTMZtwZNqh5+V+ybrGJq/F/T5rkrA3F
 QqHld/WSSKWJ10RVAyjDP7NQ5vNZTwwGAEVagjyIFEfk7G7RTY2kIpSZiUgrZ9oD
 4CkOKUGmVkUsKQW6gb0JQObtYyXXoNtmg8wQU2WwhISjFDkoYWw53LHwH/LnxLyi
 Vg182amVBmERk4I5nTUiIML/7TzS69srb0Q7yaQS3eTwzLorDaB+3tPAxQmCTkGq
 KeBfuBtbP3LTOy2Oek4YbKl8CA2KDYtK7FbCE6PECUbdTjpNrcAAgA/ZcgoKV8s7
 EHWZFx7dZyFS6LFNWzT9VhTtgSZS92JIsZwgnjSJPV2UazyxmqHFChzVVGWJbaB3
 agkMor3nVg==
 =L+Lr
 -----END PGP SIGNATURE-----

Merge tag 'block-6.14-20250221' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - NVMe pull request via Keith:
      - FC controller state check fixes (Daniel)
      - PCI Endpoint fixes (Damien)
      - TCP connection failure fixe (Caleb)
      - TCP handling C2HTermReq PDU (Maurizio)
      - RDMA queue state check (Ruozhu)
      - Apple controller fixes (Hector)
      - Target crash on disbaled namespace (Hannes)

 - MD pull request via Yu:
      - Fix queue limits error handling for raid0, raid1 and raid10

 - Fix for a NULL pointer deref in request data mapping

 - Code cleanup for request merging

* tag 'block-6.14-20250221' of git://git.kernel.dk/linux:
  nvme: only allow entering LIVE from CONNECTING state
  nvme-fc: rely on state transitions to handle connectivity loss
  apple-nvme: Support coprocessors left idle
  apple-nvme: Release power domains when probe fails
  nvmet: Use enum definitions instead of hardcoded values
  nvme: Cleanup the definition of the controller config register fields
  nvme/ioctl: add missing space in err message
  nvme-tcp: fix connect failure on receiving partial ICResp PDU
  nvme: tcp: Fix compilation warning with W=1
  nvmet: pci-epf: Avoid RCU stalls under heavy workload
  nvmet: pci-epf: Do not uselessly write the CSTS register
  nvmet: pci-epf: Correctly initialize CSTS when enabling the controller
  nvmet-rdma: recheck queue state is LIVE in state lock in recv done
  nvmet: Fix crash when a namespace is disabled
  nvme-tcp: add basic support for the C2HTermReq PDU
  nvme-pci: quirk Acer FA100 for non-uniqueue identifiers
  block: fix NULL pointer dereferenced within __blk_rq_map_sg
  block/merge: remove unnecessary min() with UINT_MAX
  md/raid*: Fix the set_queue_limits implementations
This commit is contained in:
Linus Torvalds 2025-02-21 09:36:28 -08:00
commit 8a61cb6e15
16 changed files with 218 additions and 152 deletions

View File

@ -270,7 +270,7 @@ static bool bvec_split_segs(const struct queue_limits *lim,
const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
unsigned max_segs, unsigned max_bytes)
{
unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
unsigned max_len = max_bytes - *bytes;
unsigned len = min(bv->bv_len, max_len);
unsigned total_len = 0;
unsigned seg_size = 0;
@ -556,11 +556,14 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
{
struct req_iterator iter = {
.bio = rq->bio,
.iter = rq->bio->bi_iter,
};
struct phys_vec vec;
int nsegs = 0;
/* the internal flush request may not have bio attached */
if (iter.bio)
iter.iter = iter.bio->bi_iter;
while (blk_map_iter_next(rq, &iter, &vec)) {
*last_sg = blk_next_sg(last_sg, sglist);
sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len,

View File

@ -386,10 +386,8 @@ static int raid0_set_limits(struct mddev *mddev)
lim.io_opt = lim.io_min * mddev->raid_disks;
lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err) {
queue_limits_cancel_update(mddev->gendisk->queue);
if (err)
return err;
}
return queue_limits_set(mddev->gendisk->queue, &lim);
}

View File

@ -3219,10 +3219,8 @@ static int raid1_set_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = 0;
lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err) {
queue_limits_cancel_update(mddev->gendisk->queue);
if (err)
return err;
}
return queue_limits_set(mddev->gendisk->queue, &lim);
}

View File

@ -4020,10 +4020,8 @@ static int raid10_set_queue_limits(struct mddev *mddev)
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err) {
queue_limits_cancel_update(mddev->gendisk->queue);
if (err)
return err;
}
return queue_limits_set(mddev->gendisk->queue, &lim);
}

View File

@ -1011,25 +1011,37 @@ static void apple_nvme_reset_work(struct work_struct *work)
ret = apple_rtkit_shutdown(anv->rtk);
if (ret)
goto out;
writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
}
writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
/*
* Only do the soft-reset if the CPU is not running, which means either we
* or the previous stage shut it down cleanly.
*/
if (!(readl(anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL) &
APPLE_ANS_COPROC_CPU_CONTROL_RUN)) {
ret = reset_control_assert(anv->reset);
if (ret)
goto out;
ret = reset_control_assert(anv->reset);
if (ret)
goto out;
ret = apple_rtkit_reinit(anv->rtk);
if (ret)
goto out;
ret = apple_rtkit_reinit(anv->rtk);
if (ret)
goto out;
ret = reset_control_deassert(anv->reset);
if (ret)
goto out;
ret = reset_control_deassert(anv->reset);
if (ret)
goto out;
writel(APPLE_ANS_COPROC_CPU_CONTROL_RUN,
anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
ret = apple_rtkit_boot(anv->rtk);
} else {
ret = apple_rtkit_wake(anv->rtk);
}
writel(APPLE_ANS_COPROC_CPU_CONTROL_RUN,
anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
ret = apple_rtkit_boot(anv->rtk);
if (ret) {
dev_err(anv->dev, "ANS did not boot");
goto out;
@ -1516,6 +1528,7 @@ static struct apple_nvme *apple_nvme_alloc(struct platform_device *pdev)
return anv;
put_dev:
apple_nvme_detach_genpd(anv);
put_device(anv->dev);
return ERR_PTR(ret);
}
@ -1549,6 +1562,7 @@ out_uninit_ctrl:
nvme_uninit_ctrl(&anv->ctrl);
out_put_ctrl:
nvme_put_ctrl(&anv->ctrl);
apple_nvme_detach_genpd(anv);
return ret;
}
@ -1563,9 +1577,12 @@ static void apple_nvme_remove(struct platform_device *pdev)
apple_nvme_disable(anv, true);
nvme_uninit_ctrl(&anv->ctrl);
if (apple_rtkit_is_running(anv->rtk))
if (apple_rtkit_is_running(anv->rtk)) {
apple_rtkit_shutdown(anv->rtk);
writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
}
apple_nvme_detach_genpd(anv);
}
@ -1574,8 +1591,11 @@ static void apple_nvme_shutdown(struct platform_device *pdev)
struct apple_nvme *anv = platform_get_drvdata(pdev);
apple_nvme_disable(anv, true);
if (apple_rtkit_is_running(anv->rtk))
if (apple_rtkit_is_running(anv->rtk)) {
apple_rtkit_shutdown(anv->rtk);
writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
}
}
static int apple_nvme_resume(struct device *dev)
@ -1592,10 +1612,11 @@ static int apple_nvme_suspend(struct device *dev)
apple_nvme_disable(anv, true);
if (apple_rtkit_is_running(anv->rtk))
if (apple_rtkit_is_running(anv->rtk)) {
ret = apple_rtkit_shutdown(anv->rtk);
writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL);
}
return ret;
}

View File

@ -564,8 +564,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (new_state) {
case NVME_CTRL_LIVE:
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
changed = true;
fallthrough;

View File

@ -781,61 +781,12 @@ restart:
static void
nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
{
enum nvme_ctrl_state state;
unsigned long flags;
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller connectivity lost. Awaiting "
"Reconnect", ctrl->cnum);
spin_lock_irqsave(&ctrl->lock, flags);
set_bit(ASSOC_FAILED, &ctrl->flags);
state = nvme_ctrl_state(&ctrl->ctrl);
spin_unlock_irqrestore(&ctrl->lock, flags);
switch (state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
/*
* Schedule a controller reset. The reset will terminate the
* association and schedule the reconnect timer. Reconnects
* will be attempted until either the ctlr_loss_tmo
* (max_retries * connect_delay) expires or the remoteport's
* dev_loss_tmo expires.
*/
if (nvme_reset_ctrl(&ctrl->ctrl)) {
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: Couldn't schedule reset.\n",
ctrl->cnum);
nvme_delete_ctrl(&ctrl->ctrl);
}
break;
case NVME_CTRL_CONNECTING:
/*
* The association has already been terminated and the
* controller is attempting reconnects. No need to do anything
* futher. Reconnects will be attempted until either the
* ctlr_loss_tmo (max_retries * connect_delay) expires or the
* remoteport's dev_loss_tmo expires.
*/
break;
case NVME_CTRL_RESETTING:
/*
* Controller is already in the process of terminating the
* association. No need to do anything further. The reconnect
* step will kick in naturally after the association is
* terminated.
*/
break;
case NVME_CTRL_DELETING:
case NVME_CTRL_DELETING_NOIO:
default:
/* no action to take - let it delete */
break;
}
nvme_reset_ctrl(&ctrl->ctrl);
}
/**
@ -3071,7 +3022,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
struct nvmefc_ls_rcv_op *disls = NULL;
unsigned long flags;
int ret;
bool changed;
++ctrl->ctrl.nr_reconnects;
@ -3177,23 +3127,18 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
else
ret = nvme_fc_recreate_io_queues(ctrl);
}
if (ret)
goto out_term_aen_ops;
spin_lock_irqsave(&ctrl->lock, flags);
if (!test_bit(ASSOC_FAILED, &ctrl->flags))
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
else
if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
ret = -EIO;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (ret)
goto out_term_aen_ops;
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) {
ret = -EIO;
goto out_term_aen_ops;
}
ctrl->ctrl.nr_reconnects = 0;
if (changed)
nvme_start_ctrl(&ctrl->ctrl);
nvme_start_ctrl(&ctrl->ctrl);
return 0; /* Success */

View File

@ -283,8 +283,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl,
{
if (ns && nsid != ns->head->ns_id) {
dev_err(ctrl->device,
"%s: nsid (%u) in cmd does not match nsid (%u)"
"of namespace\n",
"%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n",
current->comm, nsid, ns->head->ns_id);
return false;
}

View File

@ -3706,6 +3706,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1dbe, 0x5216), /* Acer/INNOGRIT FA100/5216 NVMe SSD */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1dbe, 0x5236), /* ADATA XPG GAMMIX S70 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e49, 0x0021), /* ZHITAI TiPro5000 NVMe SSD */

View File

@ -763,6 +763,40 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
return 0;
}
static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue,
struct nvme_tcp_term_pdu *pdu)
{
u16 fes;
const char *msg;
u32 plen = le32_to_cpu(pdu->hdr.plen);
static const char * const msg_table[] = {
[NVME_TCP_FES_INVALID_PDU_HDR] = "Invalid PDU Header Field",
[NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error",
[NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error",
[NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range",
[NVME_TCP_FES_R2T_LIMIT_EXCEEDED] = "R2T Limit Exceeded",
[NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter",
};
if (plen < NVME_TCP_MIN_C2HTERM_PLEN ||
plen > NVME_TCP_MAX_C2HTERM_PLEN) {
dev_err(queue->ctrl->ctrl.device,
"Received a malformed C2HTermReq PDU (plen = %u)\n",
plen);
return;
}
fes = le16_to_cpu(pdu->fes);
if (fes && fes < ARRAY_SIZE(msg_table))
msg = msg_table[fes];
else
msg = "Unknown";
dev_err(queue->ctrl->ctrl.device,
"Received C2HTermReq (FES = %s)\n", msg);
}
static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
unsigned int *offset, size_t *len)
{
@ -784,6 +818,15 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
return 0;
hdr = queue->pdu;
if (unlikely(hdr->type == nvme_tcp_c2h_term)) {
/*
* C2HTermReq never includes Header or Data digests.
* Skip the checks.
*/
nvme_tcp_handle_c2h_term(queue, (void *)queue->pdu);
return -EINVAL;
}
if (queue->hdr_digest) {
ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen);
if (unlikely(ret))
@ -1449,11 +1492,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
msg.msg_control = cbuf;
msg.msg_controllen = sizeof(cbuf);
}
msg.msg_flags = MSG_WAITALL;
ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
iov.iov_len, msg.msg_flags);
if (ret < 0) {
if (ret < sizeof(*icresp)) {
pr_warn("queue %d: failed to receive icresp, error %d\n",
nvme_tcp_queue_id(queue), ret);
if (ret >= 0)
ret = -ECONNRESET;
goto free_icresp;
}
ret = -ENOTCONN;
@ -1565,7 +1611,7 @@ static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
ctrl->io_queues[HCTX_TYPE_POLL];
}
/**
/*
* Track the number of queues assigned to each cpu using a global per-cpu
* counter and select the least used cpu from the mq_map. Our goal is to spread
* different controllers I/O threads across different cpu cores.

View File

@ -606,6 +606,9 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
goto out_dev_put;
}
if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL))
goto out_pr_exit;
nvmet_ns_changed(subsys, ns->nsid);
ns->enabled = true;
xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED);
@ -613,6 +616,9 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
out_unlock:
mutex_unlock(&subsys->lock);
return ret;
out_pr_exit:
if (ns->pr.enable)
nvmet_pr_exit_ns(ns);
out_dev_put:
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
@ -638,6 +644,19 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
mutex_unlock(&subsys->lock);
/*
* Now that we removed the namespaces from the lookup list, we
* can kill the per_cpu ref and wait for any remaining references
* to be dropped, as well as a RCU grace period for anyone only
* using the namepace under rcu_read_lock(). Note that we can't
* use call_rcu here as we need to ensure the namespaces have
* been fully destroyed before unloading the module.
*/
percpu_ref_kill(&ns->ref);
synchronize_rcu();
wait_for_completion(&ns->disable_done);
percpu_ref_exit(&ns->ref);
if (ns->pr.enable)
nvmet_pr_exit_ns(ns);
@ -660,22 +679,6 @@ void nvmet_ns_free(struct nvmet_ns *ns)
if (ns->nsid == subsys->max_nsid)
subsys->max_nsid = nvmet_max_nsid(subsys);
mutex_unlock(&subsys->lock);
/*
* Now that we removed the namespaces from the lookup list, we
* can kill the per_cpu ref and wait for any remaining references
* to be dropped, as well as a RCU grace period for anyone only
* using the namepace under rcu_read_lock(). Note that we can't
* use call_rcu here as we need to ensure the namespaces have
* been fully destroyed before unloading the module.
*/
percpu_ref_kill(&ns->ref);
synchronize_rcu();
wait_for_completion(&ns->disable_done);
percpu_ref_exit(&ns->ref);
mutex_lock(&subsys->lock);
subsys->nr_namespaces--;
mutex_unlock(&subsys->lock);
@ -705,9 +708,6 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->nsid = nsid;
ns->subsys = subsys;
if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL))
goto out_free;
if (ns->nsid > subsys->max_nsid)
subsys->max_nsid = nsid;
@ -730,8 +730,6 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
return ns;
out_exit:
subsys->max_nsid = nvmet_max_nsid(subsys);
percpu_ref_exit(&ns->ref);
out_free:
kfree(ns);
out_unlock:
mutex_unlock(&subsys->lock);

View File

@ -784,37 +784,37 @@ u16 nvmet_report_invalid_opcode(struct nvmet_req *req);
static inline bool nvmet_cc_en(u32 cc)
{
return (cc >> NVME_CC_EN_SHIFT) & 0x1;
return (cc & NVME_CC_ENABLE) >> NVME_CC_EN_SHIFT;
}
static inline u8 nvmet_cc_css(u32 cc)
{
return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
return (cc & NVME_CC_CSS_MASK) >> NVME_CC_CSS_SHIFT;
}
static inline u8 nvmet_cc_mps(u32 cc)
{
return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
return (cc & NVME_CC_MPS_MASK) >> NVME_CC_MPS_SHIFT;
}
static inline u8 nvmet_cc_ams(u32 cc)
{
return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
return (cc & NVME_CC_AMS_MASK) >> NVME_CC_AMS_SHIFT;
}
static inline u8 nvmet_cc_shn(u32 cc)
{
return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
return (cc & NVME_CC_SHN_MASK) >> NVME_CC_SHN_SHIFT;
}
static inline u8 nvmet_cc_iosqes(u32 cc)
{
return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
return (cc & NVME_CC_IOSQES_MASK) >> NVME_CC_IOSQES_SHIFT;
}
static inline u8 nvmet_cc_iocqes(u32 cc)
{
return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
return (cc & NVME_CC_IOCQES_MASK) >> NVME_CC_IOCQES_SHIFT;
}
/* Convert a 32-bit number to a 16-bit 0's based number */

View File

@ -46,7 +46,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex);
/*
* BAR CC register and SQ polling intervals.
*/
#define NVMET_PCI_EPF_CC_POLL_INTERVAL msecs_to_jiffies(5)
#define NVMET_PCI_EPF_CC_POLL_INTERVAL msecs_to_jiffies(10)
#define NVMET_PCI_EPF_SQ_POLL_INTERVAL msecs_to_jiffies(5)
#define NVMET_PCI_EPF_SQ_POLL_IDLE msecs_to_jiffies(5000)
@ -1694,6 +1694,7 @@ static void nvmet_pci_epf_poll_sqs_work(struct work_struct *work)
struct nvmet_pci_epf_ctrl *ctrl =
container_of(work, struct nvmet_pci_epf_ctrl, poll_sqs.work);
struct nvmet_pci_epf_queue *sq;
unsigned long limit = jiffies;
unsigned long last = 0;
int i, nr_sqs;
@ -1708,6 +1709,16 @@ static void nvmet_pci_epf_poll_sqs_work(struct work_struct *work)
nr_sqs++;
}
/*
* If we have been running for a while, reschedule to let other
* tasks run and to avoid RCU stalls.
*/
if (time_is_before_jiffies(limit + secs_to_jiffies(1))) {
cond_resched();
limit = jiffies;
continue;
}
if (nr_sqs) {
last = jiffies;
continue;
@ -1822,14 +1833,14 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
if (ctrl->io_sqes < sizeof(struct nvme_command)) {
dev_err(ctrl->dev, "Unsupported I/O SQES %zu (need %zu)\n",
ctrl->io_sqes, sizeof(struct nvme_command));
return -EINVAL;
goto err;
}
ctrl->io_cqes = 1UL << nvmet_cc_iocqes(ctrl->cc);
if (ctrl->io_cqes < sizeof(struct nvme_completion)) {
dev_err(ctrl->dev, "Unsupported I/O CQES %zu (need %zu)\n",
ctrl->io_sqes, sizeof(struct nvme_completion));
return -EINVAL;
goto err;
}
/* Create the admin queue. */
@ -1844,7 +1855,7 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
qsize, pci_addr, 0);
if (status != NVME_SC_SUCCESS) {
dev_err(ctrl->dev, "Failed to create admin completion queue\n");
return -EINVAL;
goto err;
}
qsize = aqa & 0x00000fff;
@ -1854,17 +1865,22 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
if (status != NVME_SC_SUCCESS) {
dev_err(ctrl->dev, "Failed to create admin submission queue\n");
nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
return -EINVAL;
goto err;
}
ctrl->sq_ab = NVMET_PCI_EPF_SQ_AB;
ctrl->irq_vector_threshold = NVMET_PCI_EPF_IV_THRESHOLD;
ctrl->enabled = true;
ctrl->csts = NVME_CSTS_RDY;
/* Start polling the controller SQs. */
schedule_delayed_work(&ctrl->poll_sqs, 0);
return 0;
err:
ctrl->csts = 0;
return -EINVAL;
}
static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
@ -1889,6 +1905,8 @@ static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
/* Delete the admin queue last. */
nvmet_pci_epf_delete_sq(ctrl->tctrl, 0);
nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
ctrl->csts &= ~NVME_CSTS_RDY;
}
static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
@ -1903,19 +1921,19 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
old_cc = ctrl->cc;
new_cc = nvmet_pci_epf_bar_read32(ctrl, NVME_REG_CC);
if (new_cc == old_cc)
goto reschedule_work;
ctrl->cc = new_cc;
if (nvmet_cc_en(new_cc) && !nvmet_cc_en(old_cc)) {
ret = nvmet_pci_epf_enable_ctrl(ctrl);
if (ret)
return;
ctrl->csts |= NVME_CSTS_RDY;
goto reschedule_work;
}
if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) {
if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc))
nvmet_pci_epf_disable_ctrl(ctrl);
ctrl->csts &= ~NVME_CSTS_RDY;
}
if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) {
nvmet_pci_epf_disable_ctrl(ctrl);
@ -1928,6 +1946,7 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
nvmet_update_cc(ctrl->tctrl, ctrl->cc);
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts);
reschedule_work:
schedule_delayed_work(&ctrl->poll_cc, NVMET_PCI_EPF_CC_POLL_INTERVAL);
}

View File

@ -996,6 +996,27 @@ out_err:
nvmet_req_complete(&cmd->req, status);
}
static bool nvmet_rdma_recv_not_live(struct nvmet_rdma_queue *queue,
struct nvmet_rdma_rsp *rsp)
{
unsigned long flags;
bool ret = true;
spin_lock_irqsave(&queue->state_lock, flags);
/*
* recheck queue state is not live to prevent a race condition
* with RDMA_CM_EVENT_ESTABLISHED handler.
*/
if (queue->state == NVMET_RDMA_Q_LIVE)
ret = false;
else if (queue->state == NVMET_RDMA_Q_CONNECTING)
list_add_tail(&rsp->wait_list, &queue->rsp_wait_list);
else
nvmet_rdma_put_rsp(rsp);
spin_unlock_irqrestore(&queue->state_lock, flags);
return ret;
}
static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvmet_rdma_cmd *cmd =
@ -1038,17 +1059,9 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
rsp->n_rdma = 0;
rsp->invalidate_rkey = 0;
if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
unsigned long flags;
spin_lock_irqsave(&queue->state_lock, flags);
if (queue->state == NVMET_RDMA_Q_CONNECTING)
list_add_tail(&rsp->wait_list, &queue->rsp_wait_list);
else
nvmet_rdma_put_rsp(rsp);
spin_unlock_irqrestore(&queue->state_lock, flags);
if (unlikely(queue->state != NVMET_RDMA_Q_LIVE) &&
nvmet_rdma_recv_not_live(queue, rsp))
return;
}
nvmet_rdma_handle_command(queue, rsp);
}

View File

@ -13,6 +13,8 @@
#define NVME_TCP_ADMIN_CCSZ SZ_8K
#define NVME_TCP_DIGEST_LENGTH 4
#define NVME_TCP_MIN_MAXH2CDATA 4096
#define NVME_TCP_MIN_C2HTERM_PLEN 24
#define NVME_TCP_MAX_C2HTERM_PLEN 152
enum nvme_tcp_pfv {
NVME_TCP_PFV_1_0 = 0x0,

View File

@ -199,28 +199,54 @@ enum {
#define NVME_NVM_IOSQES 6
#define NVME_NVM_IOCQES 4
/*
* Controller Configuration (CC) register (Offset 14h)
*/
enum {
/* Enable (EN): bit 0 */
NVME_CC_ENABLE = 1 << 0,
NVME_CC_EN_SHIFT = 0,
/* Bits 03:01 are reserved (NVMe Base Specification rev 2.1) */
/* I/O Command Set Selected (CSS): bits 06:04 */
NVME_CC_CSS_SHIFT = 4,
NVME_CC_MPS_SHIFT = 7,
NVME_CC_AMS_SHIFT = 11,
NVME_CC_SHN_SHIFT = 14,
NVME_CC_IOSQES_SHIFT = 16,
NVME_CC_IOCQES_SHIFT = 20,
NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT,
NVME_CC_CSS_NVM = 0 << NVME_CC_CSS_SHIFT,
NVME_CC_CSS_CSI = 6 << NVME_CC_CSS_SHIFT,
NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT,
/* Memory Page Size (MPS): bits 10:07 */
NVME_CC_MPS_SHIFT = 7,
NVME_CC_MPS_MASK = 0xf << NVME_CC_MPS_SHIFT,
/* Arbitration Mechanism Selected (AMS): bits 13:11 */
NVME_CC_AMS_SHIFT = 11,
NVME_CC_AMS_MASK = 7 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT,
NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT,
/* Shutdown Notification (SHN): bits 15:14 */
NVME_CC_SHN_SHIFT = 14,
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT,
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
/* I/O Submission Queue Entry Size (IOSQES): bits 19:16 */
NVME_CC_IOSQES_SHIFT = 16,
NVME_CC_IOSQES_MASK = 0xf << NVME_CC_IOSQES_SHIFT,
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
/* I/O Completion Queue Entry Size (IOCQES): bits 23:20 */
NVME_CC_IOCQES_SHIFT = 20,
NVME_CC_IOCQES_MASK = 0xf << NVME_CC_IOCQES_SHIFT,
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
/* Controller Ready Independent of Media Enable (CRIME): bit 24 */
NVME_CC_CRIME = 1 << 24,
/* Bits 25:31 are reserved (NVMe Base Specification rev 2.1) */
};
enum {