diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 9725586f4259..74bac68cecd4 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -17,8 +17,8 @@ #define GMAP_NOTIFY_MPROT 0x1 /* Status bits only for huge segment entries */ -#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ -#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* dirty (migration) */ +#define _SEGMENT_ENTRY_GMAP_IN 0x0800 /* invalidation notify bit */ +#define _SEGMENT_ENTRY_GMAP_UC 0x0002 /* dirty (migration) */ /** * struct gmap_struct - guest address space diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index deb198a61039..cabfa146bc3b 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -10,6 +10,8 @@ #define _ASM_S390_HUGETLB_H #include +#include +#include #include #define hugetlb_free_pgd_range free_pgd_range @@ -91,7 +93,7 @@ static inline int huge_pte_none(pte_t pte) static inline int huge_pte_none_mostly(pte_t pte) { - return huge_pte_none(pte); + return huge_pte_none(pte) || is_pte_marker(pte); } static inline int huge_pte_write(pte_t pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 58a64e23c9ab..2d2d224207bc 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -259,24 +259,34 @@ static inline int is_module_addr(void *addr) #define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID) #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) #define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID) -#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) +#define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH | \ + _REGION3_ENTRY_PRESENT) #define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY_ORIGIN_LARGE ~0x7fffffffUL /* large page address */ #define _REGION3_ENTRY_DIRTY 0x2000 /* SW region dirty bit */ #define _REGION3_ENTRY_YOUNG 0x1000 /* SW region young bit */ +#define _REGION3_ENTRY_COMM 0x0010 /* Common-Region, marks swap entry */ #define _REGION3_ENTRY_LARGE 0x0400 /* RTTE-format control, large page */ -#define _REGION3_ENTRY_READ 0x0002 /* SW region read bit */ -#define _REGION3_ENTRY_WRITE 0x0001 /* SW region write bit */ +#define _REGION3_ENTRY_WRITE 0x8000 /* SW region write bit */ +#define _REGION3_ENTRY_READ 0x4000 /* SW region read bit */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _REGION3_ENTRY_SOFT_DIRTY 0x4000 /* SW region soft dirty bit */ +#define _REGION3_ENTRY_SOFT_DIRTY 0x0002 /* SW region soft dirty bit */ #else #define _REGION3_ENTRY_SOFT_DIRTY 0x0000 /* SW region soft dirty bit */ #endif #define _REGION_ENTRY_BITS 0xfffffffffffff22fUL +/* + * SW region present bit. For non-leaf region-third-table entries, bits 62-63 + * indicate the TABLE LENGTH and both must be set to 1. But such entries + * would always be considered as present, so it is safe to use bit 63 as + * PRESENT bit for PUD. + */ +#define _REGION3_ENTRY_PRESENT 0x0001 + /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL #define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL @@ -288,21 +298,29 @@ static inline int is_module_addr(void *addr) #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */ -#define _SEGMENT_ENTRY (0) +#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PRESENT) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) #define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */ #define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */ + +#define _SEGMENT_ENTRY_COMM 0x0010 /* Common-Segment, marks swap entry */ #define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */ -#define _SEGMENT_ENTRY_WRITE 0x0002 /* SW segment write bit */ -#define _SEGMENT_ENTRY_READ 0x0001 /* SW segment read bit */ +#define _SEGMENT_ENTRY_WRITE 0x8000 /* SW segment write bit */ +#define _SEGMENT_ENTRY_READ 0x4000 /* SW segment read bit */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _SEGMENT_ENTRY_SOFT_DIRTY 0x4000 /* SW segment soft dirty bit */ +#define _SEGMENT_ENTRY_SOFT_DIRTY 0x0002 /* SW segment soft dirty bit */ #else #define _SEGMENT_ENTRY_SOFT_DIRTY 0x0000 /* SW segment soft dirty bit */ #endif +#define _SEGMENT_ENTRY_PRESENT 0x0001 /* SW segment present bit */ + +/* Common bits in region and segment table entries, for swap entries */ +#define _RST_ENTRY_COMM 0x0010 /* Common-Region/Segment, marks swap entry */ +#define _RST_ENTRY_INVALID 0x0020 /* invalid region/segment table entry */ + #define _CRST_ENTRIES 2048 /* number of region/segment table entries */ #define _PAGE_ENTRIES 256 /* number of page table entries */ @@ -434,17 +452,22 @@ static inline int is_module_addr(void *addr) /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ +#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PROTECT | \ +#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PROTECT | \ +#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ) -#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_READ | \ +#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_READ | \ +#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \ + _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) #define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ @@ -471,6 +494,7 @@ static inline int is_module_addr(void *addr) */ #define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_WRITE | \ @@ -478,12 +502,14 @@ static inline int is_module_addr(void *addr) _REGION3_ENTRY_DIRTY | \ _REGION_ENTRY_NOEXEC) #define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_YOUNG | \ _REGION_ENTRY_PROTECT | \ _REGION_ENTRY_NOEXEC) #define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ _REGION3_ENTRY_LARGE | \ _REGION3_ENTRY_READ | \ _REGION3_ENTRY_WRITE | \ @@ -705,7 +731,7 @@ static inline int pud_present(pud_t pud) { if (pud_folded(pud)) return 1; - return (pud_val(pud) & _REGION_ENTRY_ORIGIN) != 0UL; + return (pud_val(pud) & _REGION3_ENTRY_PRESENT) != 0; } static inline int pud_none(pud_t pud) @@ -720,13 +746,18 @@ static inline bool pud_leaf(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3) return 0; - return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); + return (pud_present(pud) && (pud_val(pud) & _REGION3_ENTRY_LARGE) != 0); +} + +static inline int pmd_present(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_PRESENT) != 0; } #define pmd_leaf pmd_leaf static inline bool pmd_leaf(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; + return (pmd_present(pmd) && (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0); } static inline int pmd_bad(pmd_t pmd) @@ -758,11 +789,6 @@ static inline int p4d_bad(p4d_t p4d) return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0; } -static inline int pmd_present(pmd_t pmd) -{ - return pmd_val(pmd) != _SEGMENT_ENTRY_EMPTY; -} - static inline int pmd_none(pmd_t pmd) { return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY; @@ -1807,7 +1833,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, static inline int pmd_trans_huge(pmd_t pmd) { - return pmd_val(pmd) & _SEGMENT_ENTRY_LARGE; + return pmd_leaf(pmd); } #define has_transparent_hugepage has_transparent_hugepage @@ -1867,6 +1893,53 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +/* + * 64 bit swap entry format for REGION3 and SEGMENT table entries (RSTE) + * Bits 59 and 63 are used to indicate the swap entry. Bit 58 marks the rste + * as invalid. + * A swap entry is indicated by bit pattern (rste & 0x011) == 0x010 + * | offset |Xtype |11TT|S0| + * |0000000000111111111122222222223333333333444444444455|555555|5566|66| + * |0123456789012345678901234567890123456789012345678901|234567|8901|23| + * + * Bits 0-51 store the offset. + * Bits 53-57 store the type. + * Bit 62 (S) is used for softdirty tracking. + * Bits 60-61 (TT) indicate the table type: 0x01 for REGION3 and 0x00 for SEGMENT. + * Bit 52 (X) is unused. + */ + +#define __SWP_OFFSET_MASK_RSTE ((1UL << 52) - 1) +#define __SWP_OFFSET_SHIFT_RSTE 12 +#define __SWP_TYPE_MASK_RSTE ((1UL << 5) - 1) +#define __SWP_TYPE_SHIFT_RSTE 6 + +/* + * TT bits set to 0x00 == SEGMENT. For REGION3 entries, caller must add R3 + * bits 0x01. See also __set_huge_pte_at(). + */ +static inline unsigned long mk_swap_rste(unsigned long type, unsigned long offset) +{ + unsigned long rste; + + rste = _RST_ENTRY_INVALID | _RST_ENTRY_COMM; + rste |= (offset & __SWP_OFFSET_MASK_RSTE) << __SWP_OFFSET_SHIFT_RSTE; + rste |= (type & __SWP_TYPE_MASK_RSTE) << __SWP_TYPE_SHIFT_RSTE; + return rste; +} + +static inline unsigned long __swp_type_rste(swp_entry_t entry) +{ + return (entry.val >> __SWP_TYPE_SHIFT_RSTE) & __SWP_TYPE_MASK_RSTE; +} + +static inline unsigned long __swp_offset_rste(swp_entry_t entry) +{ + return (entry.val >> __SWP_OFFSET_SHIFT_RSTE) & __SWP_OFFSET_MASK_RSTE; +} + +#define __rste_to_swp_entry(rste) ((swp_entry_t) { rste }) + extern int vmem_add_mapping(unsigned long start, unsigned long size); extern void vmem_remove_mapping(unsigned long start, unsigned long size); extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 05c908051e93..268fb53713f0 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -307,13 +307,15 @@ static void do_fault_error(struct pt_regs *regs, vm_fault_t fault) do_no_context(regs); else do_sigsegv(regs, SEGV_MAPERR); - } else if (fault & VM_FAULT_SIGBUS) { + } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | + VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) do_no_context(regs); else do_sigbus(regs); } else { + pr_emerg("Unexpected fault flags: %08x\n", fault); BUG(); } break; diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index b72facdb49f6..5124024957a2 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -616,7 +616,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS_LARGE) - | _SEGMENT_ENTRY_GMAP_UC; + | _SEGMENT_ENTRY_GMAP_UC + | _SEGMENT_ENTRY; } else *table = pmd_val(*pmd) & _SEGMENT_ENTRY_HARDWARE_BITS; @@ -2345,7 +2346,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (purge) __pmdp_csp(pmdp); set_pmd(pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); @@ -2399,7 +2401,8 @@ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); @@ -2434,7 +2437,8 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) gaddr = __gmap_segment_gaddr(entry); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC)); + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index c2e8242bd15d..e5c504449244 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -24,6 +24,7 @@ static inline unsigned long __pte_to_rste(pte_t pte) { + swp_entry_t arch_entry; unsigned long rste; /* @@ -48,6 +49,7 @@ static inline unsigned long __pte_to_rste(pte_t pte) */ if (pte_present(pte)) { rste = pte_val(pte) & PAGE_MASK; + rste |= _SEGMENT_ENTRY_PRESENT; rste |= move_set_bit(pte_val(pte), _PAGE_READ, _SEGMENT_ENTRY_READ); rste |= move_set_bit(pte_val(pte), _PAGE_WRITE, @@ -66,6 +68,10 @@ static inline unsigned long __pte_to_rste(pte_t pte) #endif rste |= move_set_bit(pte_val(pte), _PAGE_NOEXEC, _SEGMENT_ENTRY_NOEXEC); + } else if (!pte_none(pte)) { + /* swap pte */ + arch_entry = __pte_to_swp_entry(pte); + rste = mk_swap_rste(__swp_type(arch_entry), __swp_offset(arch_entry)); } else rste = _SEGMENT_ENTRY_EMPTY; return rste; @@ -73,13 +79,18 @@ static inline unsigned long __pte_to_rste(pte_t pte) static inline pte_t __rste_to_pte(unsigned long rste) { + swp_entry_t arch_entry; unsigned long pteval; - int present; + int present, none; + pte_t pte; - if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) + if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { present = pud_present(__pud(rste)); - else + none = pud_none(__pud(rste)); + } else { present = pmd_present(__pmd(rste)); + none = pmd_none(__pmd(rste)); + } /* * Convert encoding pmd / pud bits pte bits @@ -114,6 +125,11 @@ static inline pte_t __rste_to_pte(unsigned long rste) pteval |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY, _PAGE_SOFT_DIRTY); #endif pteval |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC, _PAGE_NOEXEC); + } else if (!none) { + /* swap rste */ + arch_entry = __rste_to_swp_entry(rste); + pte = mk_swap_pte(__swp_type_rste(arch_entry), __swp_offset_rste(arch_entry)); + pteval = pte_val(pte); } else pteval = _PAGE_INVALID; return __pte(pteval); @@ -148,8 +164,6 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, unsigned long rste; rste = __pte_to_rste(pte); - if (!MACHINE_HAS_NX) - rste &= ~_SEGMENT_ENTRY_NOEXEC; /* Set correct table type for 2G hugepages */ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) { @@ -223,11 +237,10 @@ pte_t *huge_pte_offset(struct mm_struct *mm, p4dp = p4d_offset(pgdp, addr); if (p4d_present(*p4dp)) { pudp = pud_offset(p4dp, addr); - if (pud_present(*pudp)) { - if (pud_leaf(*pudp)) - return (pte_t *) pudp; + if (sz == PUD_SIZE) + return (pte_t *)pudp; + if (pud_present(*pudp)) pmdp = pmd_offset(pudp, addr); - } } } return (pte_t *) pmdp;