Merge: ceph: fix client race condition validating r_parent before applying state
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7303 ceph: fix client race condition validating r_parent before applying state JIRA: https://issues.redhat.com/browse/RHEL-109212 Signed-off-by: Alex Markuze <amarkuze@redhat.com> Approved-by: Ilya Dryomov <idryomov@redhat.com> Approved-by: Xiubo Li <xiubli@redhat.com> Approved-by: Venky Shankar <vshankar@redhat.com> Approved-by: David Howells <dhowells@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
This commit is contained in:
commit
bb65ee2ca1
|
@ -55,8 +55,6 @@ static int mdsc_show(struct seq_file *s, void *p)
|
||||||
struct ceph_mds_client *mdsc = fsc->mdsc;
|
struct ceph_mds_client *mdsc = fsc->mdsc;
|
||||||
struct ceph_mds_request *req;
|
struct ceph_mds_request *req;
|
||||||
struct rb_node *rp;
|
struct rb_node *rp;
|
||||||
int pathlen = 0;
|
|
||||||
u64 pathbase;
|
|
||||||
char *path;
|
char *path;
|
||||||
|
|
||||||
mutex_lock(&mdsc->mutex);
|
mutex_lock(&mdsc->mutex);
|
||||||
|
@ -81,8 +79,8 @@ static int mdsc_show(struct seq_file *s, void *p)
|
||||||
if (req->r_inode) {
|
if (req->r_inode) {
|
||||||
seq_printf(s, " #%llx", ceph_ino(req->r_inode));
|
seq_printf(s, " #%llx", ceph_ino(req->r_inode));
|
||||||
} else if (req->r_dentry) {
|
} else if (req->r_dentry) {
|
||||||
path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
|
struct ceph_path_info path_info;
|
||||||
&pathbase, 0);
|
path = ceph_mdsc_build_path(req->r_dentry, &path_info, 0);
|
||||||
if (IS_ERR(path))
|
if (IS_ERR(path))
|
||||||
path = NULL;
|
path = NULL;
|
||||||
spin_lock(&req->r_dentry->d_lock);
|
spin_lock(&req->r_dentry->d_lock);
|
||||||
|
@ -91,7 +89,7 @@ static int mdsc_show(struct seq_file *s, void *p)
|
||||||
req->r_dentry,
|
req->r_dentry,
|
||||||
path ? path : "");
|
path ? path : "");
|
||||||
spin_unlock(&req->r_dentry->d_lock);
|
spin_unlock(&req->r_dentry->d_lock);
|
||||||
ceph_mdsc_free_path(path, pathlen);
|
ceph_mdsc_free_path_info(&path_info);
|
||||||
} else if (req->r_path1) {
|
} else if (req->r_path1) {
|
||||||
seq_printf(s, " #%llx/%s", req->r_ino1.ino,
|
seq_printf(s, " #%llx/%s", req->r_ino1.ino,
|
||||||
req->r_path1);
|
req->r_path1);
|
||||||
|
@ -100,8 +98,8 @@ static int mdsc_show(struct seq_file *s, void *p)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req->r_old_dentry) {
|
if (req->r_old_dentry) {
|
||||||
path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen,
|
struct ceph_path_info path_info;
|
||||||
&pathbase, 0);
|
path = ceph_mdsc_build_path(req->r_old_dentry, &path_info, 0);
|
||||||
if (IS_ERR(path))
|
if (IS_ERR(path))
|
||||||
path = NULL;
|
path = NULL;
|
||||||
spin_lock(&req->r_old_dentry->d_lock);
|
spin_lock(&req->r_old_dentry->d_lock);
|
||||||
|
@ -111,7 +109,7 @@ static int mdsc_show(struct seq_file *s, void *p)
|
||||||
req->r_old_dentry,
|
req->r_old_dentry,
|
||||||
path ? path : "");
|
path ? path : "");
|
||||||
spin_unlock(&req->r_old_dentry->d_lock);
|
spin_unlock(&req->r_old_dentry->d_lock);
|
||||||
ceph_mdsc_free_path(path, pathlen);
|
ceph_mdsc_free_path_info(&path_info);
|
||||||
} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
|
} else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) {
|
||||||
if (req->r_ino2.ino)
|
if (req->r_ino2.ino)
|
||||||
seq_printf(s, " #%llx/%s", req->r_ino2.ino,
|
seq_printf(s, " #%llx/%s", req->r_ino2.ino,
|
||||||
|
|
|
@ -1224,10 +1224,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
|
||||||
|
|
||||||
/* If op failed, mark everyone involved for errors */
|
/* If op failed, mark everyone involved for errors */
|
||||||
if (result) {
|
if (result) {
|
||||||
int pathlen = 0;
|
struct ceph_path_info path_info = {0};
|
||||||
u64 base = 0;
|
char *path = ceph_mdsc_build_path(dentry, &path_info, 0);
|
||||||
char *path = ceph_mdsc_build_path(dentry, &pathlen,
|
|
||||||
&base, 0);
|
|
||||||
|
|
||||||
/* mark error on parent + clear complete */
|
/* mark error on parent + clear complete */
|
||||||
mapping_set_error(req->r_parent->i_mapping, result);
|
mapping_set_error(req->r_parent->i_mapping, result);
|
||||||
|
@ -1241,8 +1239,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
|
||||||
mapping_set_error(req->r_old_inode->i_mapping, result);
|
mapping_set_error(req->r_old_inode->i_mapping, result);
|
||||||
|
|
||||||
pr_warn("async unlink failure path=(%llx)%s result=%d!\n",
|
pr_warn("async unlink failure path=(%llx)%s result=%d!\n",
|
||||||
base, IS_ERR(path) ? "<<bad>>" : path, result);
|
path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
|
||||||
ceph_mdsc_free_path(path, pathlen);
|
ceph_mdsc_free_path_info(&path_info);
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
iput(req->r_old_inode);
|
iput(req->r_old_inode);
|
||||||
|
|
|
@ -576,14 +576,12 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
|
||||||
mapping_set_error(req->r_parent->i_mapping, result);
|
mapping_set_error(req->r_parent->i_mapping, result);
|
||||||
|
|
||||||
if (result) {
|
if (result) {
|
||||||
int pathlen = 0;
|
struct ceph_path_info path_info = {0};
|
||||||
u64 base = 0;
|
char *path = ceph_mdsc_build_path(req->r_dentry, &path_info, 0);
|
||||||
char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
|
|
||||||
&base, 0);
|
|
||||||
|
|
||||||
pr_warn("async create failure path=(%llx)%s result=%d!\n",
|
pr_warn("async create failure path=(%llx)%s result=%d!\n",
|
||||||
base, IS_ERR(path) ? "<<bad>>" : path, result);
|
path_info.vino.ino, IS_ERR(path) ? "<<bad>>" : path, result);
|
||||||
ceph_mdsc_free_path(path, pathlen);
|
ceph_mdsc_free_path_info(&path_info);
|
||||||
|
|
||||||
ceph_dir_clear_complete(req->r_parent);
|
ceph_dir_clear_complete(req->r_parent);
|
||||||
if (!d_unhashed(dentry))
|
if (!d_unhashed(dentry))
|
||||||
|
|
|
@ -55,6 +55,52 @@ static int ceph_set_ino_cb(struct inode *inode, void *data)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if the parent inode matches the vino from directory reply info
|
||||||
|
*/
|
||||||
|
static inline bool ceph_vino_matches_parent(struct inode *parent,
|
||||||
|
struct ceph_vino vino)
|
||||||
|
{
|
||||||
|
return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Validate that the directory inode referenced by @req->r_parent matches the
|
||||||
|
* inode number and snapshot id contained in the reply's directory record. If
|
||||||
|
* they do not match – which can theoretically happen if the parent dentry was
|
||||||
|
* moved between the time the request was issued and the reply arrived – fall
|
||||||
|
* back to looking up the correct inode in the inode cache.
|
||||||
|
*
|
||||||
|
* A reference is *always* returned. Callers that receive a different inode
|
||||||
|
* than the original @parent are responsible for dropping the extra reference
|
||||||
|
* once the reply has been processed.
|
||||||
|
*/
|
||||||
|
static struct inode *ceph_get_reply_dir(struct super_block *sb,
|
||||||
|
struct inode *parent,
|
||||||
|
struct ceph_mds_reply_info_parsed *rinfo)
|
||||||
|
{
|
||||||
|
struct ceph_vino vino;
|
||||||
|
|
||||||
|
if (unlikely(!rinfo->diri.in))
|
||||||
|
return parent; /* nothing to compare against */
|
||||||
|
|
||||||
|
/* If we didn't have a cached parent inode to begin with, just bail out. */
|
||||||
|
if (!parent)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
vino.ino = le64_to_cpu(rinfo->diri.in->ino);
|
||||||
|
vino.snap = le64_to_cpu(rinfo->diri.in->snapid);
|
||||||
|
|
||||||
|
if (likely(ceph_vino_matches_parent(parent, vino)))
|
||||||
|
return parent; /* matches – use the original reference */
|
||||||
|
|
||||||
|
/* Mismatch – this should be rare. Emit a WARN and obtain the correct inode. */
|
||||||
|
WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n",
|
||||||
|
ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap);
|
||||||
|
|
||||||
|
return ceph_get_inode(sb, vino, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ceph_new_inode - allocate a new inode in advance of an expected create
|
* ceph_new_inode - allocate a new inode in advance of an expected create
|
||||||
* @dir: parent directory for new inode
|
* @dir: parent directory for new inode
|
||||||
|
@ -1489,6 +1535,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||||
struct inode *in = NULL;
|
struct inode *in = NULL;
|
||||||
struct ceph_vino tvino, dvino;
|
struct ceph_vino tvino, dvino;
|
||||||
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
|
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
|
||||||
|
struct inode *parent_dir = NULL;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
dout("fill_trace %p is_dentry %d is_target %d\n", req,
|
dout("fill_trace %p is_dentry %d is_target %d\n", req,
|
||||||
|
@ -1502,10 +1549,18 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rinfo->head->is_dentry) {
|
if (rinfo->head->is_dentry) {
|
||||||
struct inode *dir = req->r_parent;
|
/*
|
||||||
|
* r_parent may be stale, in cases when R_PARENT_LOCKED is not set,
|
||||||
|
* so we need to get the correct inode
|
||||||
|
*/
|
||||||
|
parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo);
|
||||||
|
if (unlikely(IS_ERR(parent_dir))) {
|
||||||
|
err = PTR_ERR(parent_dir);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
if (dir) {
|
if (parent_dir) {
|
||||||
err = ceph_fill_inode(dir, NULL, &rinfo->diri,
|
err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri,
|
||||||
rinfo->dirfrag, session, -1,
|
rinfo->dirfrag, session, -1,
|
||||||
&req->r_caps_reservation);
|
&req->r_caps_reservation);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
|
@ -1514,14 +1569,14 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||||
WARN_ON_ONCE(1);
|
WARN_ON_ONCE(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
|
if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
|
||||||
test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
|
test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
|
||||||
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
|
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
|
||||||
bool is_nokey = false;
|
bool is_nokey = false;
|
||||||
struct qstr dname;
|
struct qstr dname;
|
||||||
struct dentry *dn, *parent;
|
struct dentry *dn, *parent;
|
||||||
struct fscrypt_str oname = FSTR_INIT(NULL, 0);
|
struct fscrypt_str oname = FSTR_INIT(NULL, 0);
|
||||||
struct ceph_fname fname = { .dir = dir,
|
struct ceph_fname fname = { .dir = parent_dir,
|
||||||
.name = rinfo->dname,
|
.name = rinfo->dname,
|
||||||
.ctext = rinfo->altname,
|
.ctext = rinfo->altname,
|
||||||
.name_len = rinfo->dname_len,
|
.name_len = rinfo->dname_len,
|
||||||
|
@ -1530,10 +1585,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||||
BUG_ON(!rinfo->head->is_target);
|
BUG_ON(!rinfo->head->is_target);
|
||||||
BUG_ON(req->r_dentry);
|
BUG_ON(req->r_dentry);
|
||||||
|
|
||||||
parent = d_find_any_alias(dir);
|
parent = d_find_any_alias(parent_dir);
|
||||||
BUG_ON(!parent);
|
BUG_ON(!parent);
|
||||||
|
|
||||||
err = ceph_fname_alloc_buffer(dir, &oname);
|
err = ceph_fname_alloc_buffer(parent_dir, &oname);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
dput(parent);
|
dput(parent);
|
||||||
goto done;
|
goto done;
|
||||||
|
@ -1542,7 +1597,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||||
err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
|
err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
dput(parent);
|
dput(parent);
|
||||||
ceph_fname_free_buffer(dir, &oname);
|
ceph_fname_free_buffer(parent_dir, &oname);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
dname.name = oname.name;
|
dname.name = oname.name;
|
||||||
|
@ -1550,6 +1605,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
|
||||||
dname.hash = full_name_hash(parent, dname.name, dname.len);
|
dname.hash = full_name_hash(parent, dname.name, dname.len);
|
||||||
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
|
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
|
||||||
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
|
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
|
||||||
|
|
||||||
retry_lookup:
|
retry_lookup:
|
||||||
dn = d_lookup(parent, &dname);
|
dn = d_lookup(parent, &dname);
|
||||||
dout("d_lookup on parent=%p name=%.*s got %p\n",
|
dout("d_lookup on parent=%p name=%.*s got %p\n",
|
||||||
|
@ -1561,7 +1617,7 @@ retry_lookup:
|
||||||
dname.len, dname.name, dn);
|
dname.len, dname.name, dn);
|
||||||
if (!dn) {
|
if (!dn) {
|
||||||
dput(parent);
|
dput(parent);
|
||||||
ceph_fname_free_buffer(dir, &oname);
|
ceph_fname_free_buffer(parent_dir, &oname);
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
@ -1576,12 +1632,12 @@ retry_lookup:
|
||||||
ceph_snap(d_inode(dn)) != tvino.snap)) {
|
ceph_snap(d_inode(dn)) != tvino.snap)) {
|
||||||
dout(" dn %p points to wrong inode %p\n",
|
dout(" dn %p points to wrong inode %p\n",
|
||||||
dn, d_inode(dn));
|
dn, d_inode(dn));
|
||||||
ceph_dir_clear_ordered(dir);
|
ceph_dir_clear_ordered(parent_dir);
|
||||||
d_delete(dn);
|
d_delete(dn);
|
||||||
dput(dn);
|
dput(dn);
|
||||||
goto retry_lookup;
|
goto retry_lookup;
|
||||||
}
|
}
|
||||||
ceph_fname_free_buffer(dir, &oname);
|
ceph_fname_free_buffer(parent_dir, &oname);
|
||||||
|
|
||||||
req->r_dentry = dn;
|
req->r_dentry = dn;
|
||||||
dput(parent);
|
dput(parent);
|
||||||
|
@ -1763,6 +1819,9 @@ retry_lookup:
|
||||||
&dvino, ptvino);
|
&dvino, ptvino);
|
||||||
}
|
}
|
||||||
done:
|
done:
|
||||||
|
/* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */
|
||||||
|
if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent))
|
||||||
|
iput(parent_dir);
|
||||||
dout("fill_trace done err=%d\n", err);
|
dout("fill_trace done err=%d\n", err);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2631,8 +2631,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
|
||||||
/**
|
/**
|
||||||
* ceph_mdsc_build_path - build a path string to a given dentry
|
* ceph_mdsc_build_path - build a path string to a given dentry
|
||||||
* @dentry: dentry to which path should be built
|
* @dentry: dentry to which path should be built
|
||||||
* @plen: returned length of string
|
* @path_info: output path, length, base ino+snap, and freepath ownership flag
|
||||||
* @pbase: returned base inode number
|
|
||||||
* @for_wire: is this path going to be sent to the MDS?
|
* @for_wire: is this path going to be sent to the MDS?
|
||||||
*
|
*
|
||||||
* Build a string that represents the path to the dentry. This is mostly called
|
* Build a string that represents the path to the dentry. This is mostly called
|
||||||
|
@ -2649,7 +2648,7 @@ static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
|
||||||
* Encode hidden .snap dirs as a double /, i.e.
|
* Encode hidden .snap dirs as a double /, i.e.
|
||||||
* foo/.snap/bar -> foo//bar
|
* foo/.snap/bar -> foo//bar
|
||||||
*/
|
*/
|
||||||
char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
|
char *ceph_mdsc_build_path(struct dentry *dentry, struct ceph_path_info *path_info,
|
||||||
int for_wire)
|
int for_wire)
|
||||||
{
|
{
|
||||||
struct dentry *cur;
|
struct dentry *cur;
|
||||||
|
@ -2761,16 +2760,28 @@ retry:
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
*pbase = base;
|
/* Initialize the output structure */
|
||||||
*plen = PATH_MAX - 1 - pos;
|
memset(path_info, 0, sizeof(*path_info));
|
||||||
|
|
||||||
|
path_info->vino.ino = base;
|
||||||
|
path_info->pathlen = PATH_MAX - 1 - pos;
|
||||||
|
path_info->path = path + pos;
|
||||||
|
path_info->freepath = true;
|
||||||
|
|
||||||
|
/* Set snap from dentry if available */
|
||||||
|
if (d_inode(dentry))
|
||||||
|
path_info->vino.snap = ceph_snap(d_inode(dentry));
|
||||||
|
else
|
||||||
|
path_info->vino.snap = CEPH_NOSNAP;
|
||||||
|
|
||||||
dout("build_path on %p %d built %llx '%.*s'\n",
|
dout("build_path on %p %d built %llx '%.*s'\n",
|
||||||
dentry, d_count(dentry), base, *plen, path + pos);
|
dentry, d_count(dentry), base, PATH_MAX - 1 - pos, path + pos);
|
||||||
return path + pos;
|
return path + pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
|
static int build_dentry_path(struct dentry *dentry, struct inode *dir,
|
||||||
const char **ppath, int *ppathlen, u64 *pino,
|
struct ceph_path_info *path_info,
|
||||||
bool *pfreepath, bool parent_locked)
|
bool parent_locked)
|
||||||
{
|
{
|
||||||
char *path;
|
char *path;
|
||||||
|
|
||||||
|
@ -2779,40 +2790,46 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
|
||||||
dir = d_inode_rcu(dentry->d_parent);
|
dir = d_inode_rcu(dentry->d_parent);
|
||||||
if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP &&
|
if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP &&
|
||||||
!IS_ENCRYPTED(dir)) {
|
!IS_ENCRYPTED(dir)) {
|
||||||
*pino = ceph_ino(dir);
|
path_info->vino.ino = ceph_ino(dir);
|
||||||
|
path_info->vino.snap = ceph_snap(dir);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
*ppath = dentry->d_name.name;
|
path_info->path = dentry->d_name.name;
|
||||||
*ppathlen = dentry->d_name.len;
|
path_info->pathlen = dentry->d_name.len;
|
||||||
|
path_info->freepath = false;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
|
path = ceph_mdsc_build_path(dentry, path_info, 1);
|
||||||
if (IS_ERR(path))
|
if (IS_ERR(path))
|
||||||
return PTR_ERR(path);
|
return PTR_ERR(path);
|
||||||
*ppath = path;
|
/*
|
||||||
*pfreepath = true;
|
* ceph_mdsc_build_path already fills path_info, including snap handling.
|
||||||
|
*/
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int build_inode_path(struct inode *inode,
|
static int build_inode_path(struct inode *inode, struct ceph_path_info *path_info)
|
||||||
const char **ppath, int *ppathlen, u64 *pino,
|
|
||||||
bool *pfreepath)
|
|
||||||
{
|
{
|
||||||
struct dentry *dentry;
|
struct dentry *dentry;
|
||||||
char *path;
|
char *path;
|
||||||
|
|
||||||
if (ceph_snap(inode) == CEPH_NOSNAP) {
|
if (ceph_snap(inode) == CEPH_NOSNAP) {
|
||||||
*pino = ceph_ino(inode);
|
path_info->vino.ino = ceph_ino(inode);
|
||||||
*ppathlen = 0;
|
path_info->vino.snap = ceph_snap(inode);
|
||||||
|
path_info->pathlen = 0;
|
||||||
|
path_info->freepath = false;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
dentry = d_find_alias(inode);
|
dentry = d_find_alias(inode);
|
||||||
path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
|
path = ceph_mdsc_build_path(dentry, path_info, 1);
|
||||||
dput(dentry);
|
dput(dentry);
|
||||||
if (IS_ERR(path))
|
if (IS_ERR(path))
|
||||||
return PTR_ERR(path);
|
return PTR_ERR(path);
|
||||||
*ppath = path;
|
/*
|
||||||
*pfreepath = true;
|
* ceph_mdsc_build_path already fills path_info, including snap from dentry.
|
||||||
|
* Override with inode's snap since that's what this function is for.
|
||||||
|
*/
|
||||||
|
path_info->vino.snap = ceph_snap(inode);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2822,25 +2839,30 @@ static int build_inode_path(struct inode *inode,
|
||||||
*/
|
*/
|
||||||
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
|
static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
|
||||||
struct inode *rdiri, const char *rpath,
|
struct inode *rdiri, const char *rpath,
|
||||||
u64 rino, const char **ppath, int *pathlen,
|
u64 rino, struct ceph_path_info *path_info,
|
||||||
u64 *ino, bool *freepath, bool parent_locked)
|
bool parent_locked)
|
||||||
{
|
{
|
||||||
int r = 0;
|
int r = 0;
|
||||||
|
|
||||||
|
/* Initialize the output structure */
|
||||||
|
memset(path_info, 0, sizeof(*path_info));
|
||||||
|
|
||||||
if (rinode) {
|
if (rinode) {
|
||||||
r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
|
r = build_inode_path(rinode, path_info);
|
||||||
dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
|
dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
|
||||||
ceph_snap(rinode));
|
ceph_snap(rinode));
|
||||||
} else if (rdentry) {
|
} else if (rdentry) {
|
||||||
r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
|
r = build_dentry_path(rdentry, rdiri, path_info, parent_locked);
|
||||||
freepath, parent_locked);
|
dout(" dentry %p %llx/%.*s\n", rdentry, path_info->vino.ino,
|
||||||
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
|
path_info->pathlen, path_info->path);
|
||||||
*ppath);
|
|
||||||
} else if (rpath || rino) {
|
} else if (rpath || rino) {
|
||||||
*ino = rino;
|
path_info->vino.ino = rino;
|
||||||
*ppath = rpath;
|
path_info->vino.snap = CEPH_NOSNAP;
|
||||||
*pathlen = rpath ? strlen(rpath) : 0;
|
path_info->path = rpath;
|
||||||
dout(" path %.*s\n", *pathlen, rpath);
|
path_info->pathlen = rpath ? strlen(rpath) : 0;
|
||||||
|
path_info->freepath = false;
|
||||||
|
|
||||||
|
dout(" path %.*s\n", path_info->pathlen, rpath);
|
||||||
}
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
|
@ -2893,28 +2915,49 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
|
||||||
struct ceph_mds_client *mdsc = session->s_mdsc;
|
struct ceph_mds_client *mdsc = session->s_mdsc;
|
||||||
struct ceph_msg *msg;
|
struct ceph_msg *msg;
|
||||||
struct ceph_mds_request_head_old *head;
|
struct ceph_mds_request_head_old *head;
|
||||||
const char *path1 = NULL;
|
struct ceph_path_info path_info1 = {0};
|
||||||
const char *path2 = NULL;
|
struct ceph_path_info path_info2 = {0};
|
||||||
u64 ino1 = 0, ino2 = 0;
|
|
||||||
int pathlen1 = 0, pathlen2 = 0;
|
|
||||||
bool freepath1 = false, freepath2 = false;
|
|
||||||
struct dentry *old_dentry = NULL;
|
struct dentry *old_dentry = NULL;
|
||||||
int len;
|
int len;
|
||||||
u16 releases;
|
u16 releases;
|
||||||
void *p, *end;
|
void *p, *end;
|
||||||
int ret;
|
int ret;
|
||||||
bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
|
bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
|
||||||
|
bool parent_locked = test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
|
||||||
|
|
||||||
ret = set_request_path_attr(req->r_inode, req->r_dentry,
|
ret = set_request_path_attr(req->r_inode, req->r_dentry,
|
||||||
req->r_parent, req->r_path1, req->r_ino1.ino,
|
req->r_parent, req->r_path1, req->r_ino1.ino,
|
||||||
&path1, &pathlen1, &ino1, &freepath1,
|
&path_info1, parent_locked);
|
||||||
test_bit(CEPH_MDS_R_PARENT_LOCKED,
|
|
||||||
&req->r_req_flags));
|
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
msg = ERR_PTR(ret);
|
msg = ERR_PTR(ret);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When the parent directory's i_rwsem is *not* locked, req->r_parent may
|
||||||
|
* have become stale (e.g. after a concurrent rename) between the time the
|
||||||
|
* dentry was looked up and now. If we detect that the stored r_parent
|
||||||
|
* does not match the inode number we just encoded for the request, switch
|
||||||
|
* to the correct inode so that the MDS receives a valid parent reference.
|
||||||
|
*/
|
||||||
|
if (!parent_locked && req->r_parent && path_info1.vino.ino &&
|
||||||
|
ceph_ino(req->r_parent) != path_info1.vino.ino) {
|
||||||
|
struct inode *old_parent = req->r_parent;
|
||||||
|
struct inode *correct_dir = ceph_get_inode(mdsc->fsc->sb, path_info1.vino, NULL);
|
||||||
|
if (!IS_ERR(correct_dir)) {
|
||||||
|
WARN_ONCE(1, "ceph: r_parent mismatch (had %llx wanted %llx) - updating\n",
|
||||||
|
ceph_ino(old_parent), path_info1.vino.ino);
|
||||||
|
/*
|
||||||
|
* Transfer CEPH_CAP_PIN from the old parent to the new one.
|
||||||
|
* The pin was taken earlier in ceph_mdsc_submit_request().
|
||||||
|
*/
|
||||||
|
ceph_put_cap_refs(ceph_inode(old_parent), CEPH_CAP_PIN);
|
||||||
|
iput(old_parent);
|
||||||
|
req->r_parent = correct_dir;
|
||||||
|
ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* If r_old_dentry is set, then assume that its parent is locked */
|
/* If r_old_dentry is set, then assume that its parent is locked */
|
||||||
if (req->r_old_dentry &&
|
if (req->r_old_dentry &&
|
||||||
!(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
|
!(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED))
|
||||||
|
@ -2922,7 +2965,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
|
||||||
ret = set_request_path_attr(NULL, old_dentry,
|
ret = set_request_path_attr(NULL, old_dentry,
|
||||||
req->r_old_dentry_dir,
|
req->r_old_dentry_dir,
|
||||||
req->r_path2, req->r_ino2.ino,
|
req->r_path2, req->r_ino2.ino,
|
||||||
&path2, &pathlen2, &ino2, &freepath2, true);
|
&path_info2, true);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
msg = ERR_PTR(ret);
|
msg = ERR_PTR(ret);
|
||||||
goto out_free1;
|
goto out_free1;
|
||||||
|
@ -2939,7 +2982,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
|
||||||
|
|
||||||
/* filepaths */
|
/* filepaths */
|
||||||
len += 2 * (1 + sizeof(u32) + sizeof(u64));
|
len += 2 * (1 + sizeof(u32) + sizeof(u64));
|
||||||
len += pathlen1 + pathlen2;
|
len += path_info1.pathlen + path_info2.pathlen;
|
||||||
|
|
||||||
/* cap releases */
|
/* cap releases */
|
||||||
len += sizeof(struct ceph_mds_request_release) *
|
len += sizeof(struct ceph_mds_request_release) *
|
||||||
|
@ -2947,9 +2990,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
|
||||||
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
|
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
|
||||||
|
|
||||||
if (req->r_dentry_drop)
|
if (req->r_dentry_drop)
|
||||||
len += pathlen1;
|
len += path_info1.pathlen;
|
||||||
if (req->r_old_dentry_drop)
|
if (req->r_old_dentry_drop)
|
||||||
len += pathlen2;
|
len += path_info2.pathlen;
|
||||||
|
|
||||||
/* MClientRequest tail */
|
/* MClientRequest tail */
|
||||||
|
|
||||||
|
@ -3008,8 +3051,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
|
||||||
head->ino = cpu_to_le64(req->r_deleg_ino);
|
head->ino = cpu_to_le64(req->r_deleg_ino);
|
||||||
head->args = req->r_args;
|
head->args = req->r_args;
|
||||||
|
|
||||||
ceph_encode_filepath(&p, end, ino1, path1);
|
ceph_encode_filepath(&p, end, path_info1.vino.ino, path_info1.path);
|
||||||
ceph_encode_filepath(&p, end, ino2, path2);
|
ceph_encode_filepath(&p, end, path_info2.vino.ino, path_info2.path);
|
||||||
|
|
||||||
/* make note of release offset, in case we need to replay */
|
/* make note of release offset, in case we need to replay */
|
||||||
req->r_request_release_offset = p - msg->front.iov_base;
|
req->r_request_release_offset = p - msg->front.iov_base;
|
||||||
|
@ -3072,11 +3115,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
|
||||||
msg->hdr.data_off = cpu_to_le16(0);
|
msg->hdr.data_off = cpu_to_le16(0);
|
||||||
|
|
||||||
out_free2:
|
out_free2:
|
||||||
if (freepath2)
|
ceph_mdsc_free_path_info(&path_info2);
|
||||||
ceph_mdsc_free_path((char *)path2, pathlen2);
|
|
||||||
out_free1:
|
out_free1:
|
||||||
if (freepath1)
|
ceph_mdsc_free_path_info(&path_info1);
|
||||||
ceph_mdsc_free_path((char *)path1, pathlen1);
|
|
||||||
out:
|
out:
|
||||||
return msg;
|
return msg;
|
||||||
out_err:
|
out_err:
|
||||||
|
@ -4323,24 +4364,20 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
|
||||||
struct ceph_pagelist *pagelist = recon_state->pagelist;
|
struct ceph_pagelist *pagelist = recon_state->pagelist;
|
||||||
struct dentry *dentry;
|
struct dentry *dentry;
|
||||||
struct ceph_cap *cap;
|
struct ceph_cap *cap;
|
||||||
char *path;
|
struct ceph_path_info path_info = {0};
|
||||||
int pathlen = 0, err;
|
int err;
|
||||||
u64 pathbase;
|
|
||||||
u64 snap_follows;
|
u64 snap_follows;
|
||||||
|
|
||||||
dentry = d_find_primary(inode);
|
dentry = d_find_primary(inode);
|
||||||
if (dentry) {
|
if (dentry) {
|
||||||
/* set pathbase to parent dir when msg_version >= 2 */
|
/* set pathbase to parent dir when msg_version >= 2 */
|
||||||
path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase,
|
char *path = ceph_mdsc_build_path(dentry, &path_info,
|
||||||
recon_state->msg_version >= 2);
|
recon_state->msg_version >= 2);
|
||||||
dput(dentry);
|
dput(dentry);
|
||||||
if (IS_ERR(path)) {
|
if (IS_ERR(path)) {
|
||||||
err = PTR_ERR(path);
|
err = PTR_ERR(path);
|
||||||
goto out_err;
|
goto out_err;
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
path = NULL;
|
|
||||||
pathbase = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(&ci->i_ceph_lock);
|
spin_lock(&ci->i_ceph_lock);
|
||||||
|
@ -4373,7 +4410,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
|
||||||
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
|
rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
|
||||||
rec.v2.issued = cpu_to_le32(cap->issued);
|
rec.v2.issued = cpu_to_le32(cap->issued);
|
||||||
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
||||||
rec.v2.pathbase = cpu_to_le64(pathbase);
|
rec.v2.pathbase = cpu_to_le64(path_info.vino.ino);
|
||||||
rec.v2.flock_len = (__force __le32)
|
rec.v2.flock_len = (__force __le32)
|
||||||
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
|
((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -4384,7 +4421,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg)
|
||||||
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
|
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
|
||||||
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
|
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
|
||||||
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
|
||||||
rec.v1.pathbase = cpu_to_le64(pathbase);
|
rec.v1.pathbase = cpu_to_le64(path_info.vino.ino);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (list_empty(&ci->i_cap_snaps)) {
|
if (list_empty(&ci->i_cap_snaps)) {
|
||||||
|
@ -4446,7 +4483,7 @@ encode_again:
|
||||||
sizeof(struct ceph_filelock);
|
sizeof(struct ceph_filelock);
|
||||||
rec.v2.flock_len = cpu_to_le32(struct_len);
|
rec.v2.flock_len = cpu_to_le32(struct_len);
|
||||||
|
|
||||||
struct_len += sizeof(u32) + pathlen + sizeof(rec.v2);
|
struct_len += sizeof(u32) + path_info.pathlen + sizeof(rec.v2);
|
||||||
|
|
||||||
if (struct_v >= 2)
|
if (struct_v >= 2)
|
||||||
struct_len += sizeof(u64); /* snap_follows */
|
struct_len += sizeof(u64); /* snap_follows */
|
||||||
|
@ -4470,7 +4507,7 @@ encode_again:
|
||||||
ceph_pagelist_encode_8(pagelist, 1);
|
ceph_pagelist_encode_8(pagelist, 1);
|
||||||
ceph_pagelist_encode_32(pagelist, struct_len);
|
ceph_pagelist_encode_32(pagelist, struct_len);
|
||||||
}
|
}
|
||||||
ceph_pagelist_encode_string(pagelist, path, pathlen);
|
ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen);
|
||||||
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
|
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
|
||||||
ceph_locks_to_pagelist(flocks, pagelist,
|
ceph_locks_to_pagelist(flocks, pagelist,
|
||||||
num_fcntl_locks, num_flock_locks);
|
num_fcntl_locks, num_flock_locks);
|
||||||
|
@ -4481,17 +4518,17 @@ out_freeflocks:
|
||||||
} else {
|
} else {
|
||||||
err = ceph_pagelist_reserve(pagelist,
|
err = ceph_pagelist_reserve(pagelist,
|
||||||
sizeof(u64) + sizeof(u32) +
|
sizeof(u64) + sizeof(u32) +
|
||||||
pathlen + sizeof(rec.v1));
|
path_info.pathlen + sizeof(rec.v1));
|
||||||
if (err)
|
if (err)
|
||||||
goto out_err;
|
goto out_err;
|
||||||
|
|
||||||
ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
|
ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
|
||||||
ceph_pagelist_encode_string(pagelist, path, pathlen);
|
ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen);
|
||||||
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
|
ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
|
||||||
}
|
}
|
||||||
|
|
||||||
out_err:
|
out_err:
|
||||||
ceph_mdsc_free_path(path, pathlen);
|
ceph_mdsc_free_path_info(&path_info);
|
||||||
if (!err)
|
if (!err)
|
||||||
recon_state->nr_caps++;
|
recon_state->nr_caps++;
|
||||||
return err;
|
return err;
|
||||||
|
|
|
@ -577,13 +577,23 @@ extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
|
||||||
void *arg);
|
void *arg);
|
||||||
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
|
extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
|
||||||
|
|
||||||
static inline void ceph_mdsc_free_path(char *path, int len)
|
/*
|
||||||
|
* Structure to group path-related output parameters for build_*_path functions
|
||||||
|
*/
|
||||||
|
struct ceph_path_info {
|
||||||
|
const char *path;
|
||||||
|
int pathlen;
|
||||||
|
struct ceph_vino vino;
|
||||||
|
bool freepath;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void ceph_mdsc_free_path_info(const struct ceph_path_info *path_info)
|
||||||
{
|
{
|
||||||
if (!IS_ERR_OR_NULL(path))
|
if (path_info && path_info->freepath && !IS_ERR_OR_NULL(path_info->path))
|
||||||
__putname(path - (PATH_MAX - 1 - len));
|
__putname((char *)path_info->path - (PATH_MAX - 1 - path_info->pathlen));
|
||||||
}
|
}
|
||||||
|
|
||||||
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
|
extern char *ceph_mdsc_build_path(struct dentry *dentry, struct ceph_path_info *path_info,
|
||||||
int for_wire);
|
int for_wire);
|
||||||
|
|
||||||
extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
|
extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
|
||||||
|
|
Loading…
Reference in New Issue