Centos-kernel-stream-9/fs/ext4/fast_commit.h

188 lines
4.2 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __FAST_COMMIT_H__
#define __FAST_COMMIT_H__
/*
* Note this file is present in e2fsprogs/lib/ext2fs/fast_commit.h and
* linux/fs/ext4/fast_commit.h. These file should always be byte identical.
*/
/* Fast commit tags */
#define EXT4_FC_TAG_ADD_RANGE 0x0001
#define EXT4_FC_TAG_DEL_RANGE 0x0002
#define EXT4_FC_TAG_CREAT 0x0003
#define EXT4_FC_TAG_LINK 0x0004
#define EXT4_FC_TAG_UNLINK 0x0005
#define EXT4_FC_TAG_INODE 0x0006
#define EXT4_FC_TAG_PAD 0x0007
#define EXT4_FC_TAG_TAIL 0x0008
#define EXT4_FC_TAG_HEAD 0x0009
#define EXT4_FC_SUPPORTED_FEATURES 0x0
/* On disk fast commit tlv value structures */
/* Fast commit on disk tag length structure */
struct ext4_fc_tl {
__le16 fc_tag;
__le16 fc_len;
};
/* Value structure for tag EXT4_FC_TAG_HEAD. */
struct ext4_fc_head {
__le32 fc_features;
__le32 fc_tid;
};
/* Value structure for EXT4_FC_TAG_ADD_RANGE. */
struct ext4_fc_add_range {
__le32 fc_ino;
__u8 fc_ex[12];
};
/* Value structure for tag EXT4_FC_TAG_DEL_RANGE. */
struct ext4_fc_del_range {
__le32 fc_ino;
__le32 fc_lblk;
__le32 fc_len;
};
/*
* This is the value structure for tags EXT4_FC_TAG_CREAT, EXT4_FC_TAG_LINK
* and EXT4_FC_TAG_UNLINK.
*/
struct ext4_fc_dentry_info {
__le32 fc_parent_ino;
__le32 fc_ino;
treewide: Replace zero-length arrays with flexible-array members Conflicts: drop changes to arch/alpha/include/asm/hwrpb.h arch/ia64/include/asm/sal.h arch/sh/include/asm/thread_info.h arch/sparc/include/asm/vio.h arch/um/include/shared/net_kern.h arch/xtensa/include/asm/bootparam.h - unsupported arches drop drivers/misc/habanalabs/include/common/cpucp_if.h - unsupported config drop drivers/staging/r8188eu/include/rtw_cmd.h fs/ksmbd/ksmbd_netlink.h fs/ksmbd/ntlmssp.h fs/ksmbd/smb2pdu.h fs/ksmbd/transport_rdma.c drivers/scsi/qla2xxx/qla_edif_bsg.h fs/ksmbd/xattr.h include/net/ioam6.h - files not in CS9 fs/xfs/scrub/attr.h arch/x86/include/asm/microcode_amd.h arch/x86/include/asm/microcode_intel.h drivers/net/ethernet/marvell/octeontx2/af/npc.h include/sound/sof/channel_map.h sound/soc/intel/atom/sst-mfld-dsp.h sound/soc/intel/skylake/skl-topology.h drivers/scsi/qla2xxx/qla_edif.c drivers/scsi/qla2xxx/qla_edif_bsg.h drivers/scsi/mpi3mr/mpi3mr.h - This patch already applied. drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c drivers/gpu/drm/nouveau/include/nvfw/hs.h change done under RHEL-only commit 99fc716fc479 ("Merge DRM changes from upstream v5.17..v5.18") JIRA: https://issues.redhat.com/browse/RHEL-1848 commit 5224f79096170bf7b92cc8fe42a12f44b91e5f62 Author: Gustavo A. R. Silva <gustavoars@kernel.org> Date: Mon Feb 14 19:11:44 2022 -0600 treewide: Replace zero-length arrays with flexible-array members There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. This code was transformed with the help of Coccinelle: (next-20220214$ spatch --jobs $(getconf _NPROCESSORS_ONLN) --sp-file script. cocci --include-headers --dir . > output.patch) @@ identifier S, member, array; type T1, T2; @@ struct S { ... T1 member; T2 array[ - 0 ]; }; UAPI and wireless changes were intentionally excluded from this patch and will be sent out separately. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-lengt h-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org> Signed-off-by: Chris von Recklinghausen <crecklin@redhat.com>
2023-10-20 10:12:35 +00:00
__u8 fc_dname[];
};
/* Value structure for EXT4_FC_TAG_INODE. */
struct ext4_fc_inode {
__le32 fc_ino;
treewide: Replace zero-length arrays with flexible-array members Conflicts: drop changes to arch/alpha/include/asm/hwrpb.h arch/ia64/include/asm/sal.h arch/sh/include/asm/thread_info.h arch/sparc/include/asm/vio.h arch/um/include/shared/net_kern.h arch/xtensa/include/asm/bootparam.h - unsupported arches drop drivers/misc/habanalabs/include/common/cpucp_if.h - unsupported config drop drivers/staging/r8188eu/include/rtw_cmd.h fs/ksmbd/ksmbd_netlink.h fs/ksmbd/ntlmssp.h fs/ksmbd/smb2pdu.h fs/ksmbd/transport_rdma.c drivers/scsi/qla2xxx/qla_edif_bsg.h fs/ksmbd/xattr.h include/net/ioam6.h - files not in CS9 fs/xfs/scrub/attr.h arch/x86/include/asm/microcode_amd.h arch/x86/include/asm/microcode_intel.h drivers/net/ethernet/marvell/octeontx2/af/npc.h include/sound/sof/channel_map.h sound/soc/intel/atom/sst-mfld-dsp.h sound/soc/intel/skylake/skl-topology.h drivers/scsi/qla2xxx/qla_edif.c drivers/scsi/qla2xxx/qla_edif_bsg.h drivers/scsi/mpi3mr/mpi3mr.h - This patch already applied. drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c drivers/gpu/drm/nouveau/include/nvfw/hs.h change done under RHEL-only commit 99fc716fc479 ("Merge DRM changes from upstream v5.17..v5.18") JIRA: https://issues.redhat.com/browse/RHEL-1848 commit 5224f79096170bf7b92cc8fe42a12f44b91e5f62 Author: Gustavo A. R. Silva <gustavoars@kernel.org> Date: Mon Feb 14 19:11:44 2022 -0600 treewide: Replace zero-length arrays with flexible-array members There is a regular need in the kernel to provide a way to declare having a dynamically sized set of trailing elements in a structure. Kernel code should always use “flexible array members”[1] for these cases. The older style of one-element or zero-length arrays should no longer be used[2]. This code was transformed with the help of Coccinelle: (next-20220214$ spatch --jobs $(getconf _NPROCESSORS_ONLN) --sp-file script. cocci --include-headers --dir . > output.patch) @@ identifier S, member, array; type T1, T2; @@ struct S { ... T1 member; T2 array[ - 0 ]; }; UAPI and wireless changes were intentionally excluded from this patch and will be sent out separately. [1] https://en.wikipedia.org/wiki/Flexible_array_member [2] https://www.kernel.org/doc/html/v5.16/process/deprecated.html#zero-lengt h-and-one-element-arrays Link: https://github.com/KSPP/linux/issues/78 Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org> Signed-off-by: Chris von Recklinghausen <crecklin@redhat.com>
2023-10-20 10:12:35 +00:00
__u8 fc_raw_inode[];
};
/* Value structure for tag EXT4_FC_TAG_TAIL. */
struct ext4_fc_tail {
__le32 fc_tid;
__le32 fc_crc;
};
/* Tag base length */
#define EXT4_FC_TAG_BASE_LEN (sizeof(struct ext4_fc_tl))
/*
* Fast commit status codes
*/
enum {
EXT4_FC_STATUS_OK = 0,
EXT4_FC_STATUS_INELIGIBLE,
EXT4_FC_STATUS_SKIPPED,
EXT4_FC_STATUS_FAILED,
};
/*
* Fast commit ineligiblity reasons:
*/
enum {
EXT4_FC_REASON_XATTR = 0,
EXT4_FC_REASON_CROSS_RENAME,
EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
EXT4_FC_REASON_NOMEM,
EXT4_FC_REASON_SWAP_BOOT,
EXT4_FC_REASON_RESIZE,
EXT4_FC_REASON_RENAME_DIR,
EXT4_FC_REASON_FALLOC_RANGE,
EXT4_FC_REASON_INODE_JOURNAL_DATA,
EXT4_FC_REASON_ENCRYPTED_FILENAME,
EXT4_FC_REASON_MAX
};
#ifdef __KERNEL__
/*
* In memory list of dentry updates that are performed on the file
* system used by fast commit code.
*/
struct ext4_fc_dentry_update {
int fcd_op; /* Type of update create / unlink / link */
int fcd_parent; /* Parent inode number */
int fcd_ino; /* Inode number */
struct qstr fcd_name; /* Dirent name */
unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
struct list_head fcd_list;
ext4: improve fast_commit performance and scalability Bugzilla: https://bugzilla.redhat.com/2079868 Tested: xfstests Upstream Status: upstream commit b3998b3bc658017dc36c69a8224fb11a3d1b1382 Author: Ritesh Harjani <riteshh@linux.ibm.com> Currently ext4_fc_commit_dentry_updates() is of quadratic time complexity, which is causing performance bottlenecks with high threads/file/dir count with fs_mark. This patch makes commit dentry updates (and hence ext4_fc_commit()) path to linear time complexity. Hence improves the performance of workloads which does fsync on multiple threads/open files one-by-one. Absolute numbers in avg file creates per sec (from fs_mark in 1K order) ======================================================================= no. Order without-patch(K) with-patch(K) Diff(%) 1 1 16.90 17.51 +3.60 2 2,2 32.08 31.80 -0.87 3 3,3 53.97 55.01 +1.92 4 4,4 78.94 76.90 -2.58 5 5,5 95.82 95.37 -0.46 6 6,6 87.92 103.38 +17.58 7 6,10 0.73 126.13 +17178.08 8 6,14 2.33 143.19 +6045.49 workload type ============== For e.g. 7th row order of 6,10 (2^6 == 64 && 2^10 == 1024) echo /run/riteshh/mnt/{1..64} |sed -E 's/[[:space:]]+/ -d /g' \ | xargs -I {} bash -c "sudo fs_mark -L 100 -D 1024 -n 1024 -s0 -S5 -d {}" Perf profile (w/o patches) ============================= 87.15% [kernel] [k] ext4_fc_commit --> Heavy contention/bottleneck 1.98% [kernel] [k] perf_event_interrupt 0.96% [kernel] [k] power_pmu_enable 0.91% [kernel] [k] update_sd_lb_stats.constprop.0 0.67% [kernel] [k] ktime_get Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com> Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com> Link: https://lore.kernel.org/r/930f35d4fd5f83e2673c868781d9ebf15e91bf4e.1645426817.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Lukas Czerner <lczerner@redhat.com>
2022-02-21 07:56:15 +00:00
struct list_head fcd_dilist;
};
struct ext4_fc_stats {
unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX];
unsigned long fc_num_commits;
unsigned long fc_ineligible_commits;
unsigned long fc_failed_commits;
unsigned long fc_skipped_commits;
unsigned long fc_numblks;
u64 s_fc_avg_commit_time;
};
#define EXT4_FC_REPLAY_REALLOC_INCREMENT 4
/*
* Physical block regions added to different inodes due to fast commit
* recovery. These are set during the SCAN phase. During the replay phase,
* our allocator excludes these from its allocation. This ensures that
* we don't accidentally allocating a block that is going to be used by
* another inode.
*/
struct ext4_fc_alloc_region {
ext4_lblk_t lblk;
ext4_fsblk_t pblk;
int ino, len;
};
/*
* Fast commit replay state.
*/
struct ext4_fc_replay_state {
int fc_replay_num_tags;
int fc_replay_expected_off;
int fc_current_pass;
int fc_cur_tag;
int fc_crc;
struct ext4_fc_alloc_region *fc_regions;
int fc_regions_size, fc_regions_used, fc_regions_valid;
int *fc_modified_inodes;
int fc_modified_inodes_used, fc_modified_inodes_size;
};
#define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1)
#endif
static inline const char *tag2str(__u16 tag)
{
switch (tag) {
case EXT4_FC_TAG_LINK:
return "ADD_ENTRY";
case EXT4_FC_TAG_UNLINK:
return "DEL_ENTRY";
case EXT4_FC_TAG_ADD_RANGE:
return "ADD_RANGE";
case EXT4_FC_TAG_CREAT:
return "CREAT_DENTRY";
case EXT4_FC_TAG_DEL_RANGE:
return "DEL_RANGE";
case EXT4_FC_TAG_INODE:
return "INODE";
case EXT4_FC_TAG_PAD:
return "PAD";
case EXT4_FC_TAG_TAIL:
return "TAIL";
case EXT4_FC_TAG_HEAD:
return "HEAD";
default:
return "ERROR";
}
}
#endif /* __FAST_COMMIT_H__ */