futex: Use a folio instead of a page
JIRA: https://issues.redhat.com/browse/RHEL-28616 commit e35a6cf1cc343d720ad235f678f1cd2a9876b777 Author: Matthew Wilcox (Oracle) <willy@infradead.org> Date: Mon, 21 Aug 2023 15:22:07 +0100 futex: Use a folio instead of a page The futex code already handles compound pages correctly, but using a folio tells the compiler that there is already a reference to the head page and it doesn't need to call compound_head() again. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Davidlohr Bueso <dave@stgolabs.net> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20230821142207.2537124-1-willy@infradead.org Signed-off-by: Waiman Long <longman@redhat.com>
This commit is contained in:
parent
c965fac1e0
commit
ec2d33fd18
|
@ -222,7 +222,8 @@ int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
|
||||||
{
|
{
|
||||||
unsigned long address = (unsigned long)uaddr;
|
unsigned long address = (unsigned long)uaddr;
|
||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
struct page *page, *tail;
|
struct page *page;
|
||||||
|
struct folio *folio;
|
||||||
struct address_space *mapping;
|
struct address_space *mapping;
|
||||||
int err, ro = 0;
|
int err, ro = 0;
|
||||||
|
|
||||||
|
@ -283,54 +284,52 @@ again:
|
||||||
err = 0;
|
err = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The treatment of mapping from this point on is critical. The page
|
* The treatment of mapping from this point on is critical. The folio
|
||||||
* lock protects many things but in this context the page lock
|
* lock protects many things but in this context the folio lock
|
||||||
* stabilizes mapping, prevents inode freeing in the shared
|
* stabilizes mapping, prevents inode freeing in the shared
|
||||||
* file-backed region case and guards against movement to swap cache.
|
* file-backed region case and guards against movement to swap cache.
|
||||||
*
|
*
|
||||||
* Strictly speaking the page lock is not needed in all cases being
|
* Strictly speaking the folio lock is not needed in all cases being
|
||||||
* considered here and page lock forces unnecessarily serialization
|
* considered here and folio lock forces unnecessarily serialization.
|
||||||
* From this point on, mapping will be re-verified if necessary and
|
* From this point on, mapping will be re-verified if necessary and
|
||||||
* page lock will be acquired only if it is unavoidable
|
* folio lock will be acquired only if it is unavoidable
|
||||||
*
|
*
|
||||||
* Mapping checks require the head page for any compound page so the
|
* Mapping checks require the folio so it is looked up now. For
|
||||||
* head page and mapping is looked up now. For anonymous pages, it
|
* anonymous pages, it does not matter if the folio is split
|
||||||
* does not matter if the page splits in the future as the key is
|
* in the future as the key is based on the address. For
|
||||||
* based on the address. For filesystem-backed pages, the tail is
|
* filesystem-backed pages, the precise page is required as the
|
||||||
* required as the index of the page determines the key. For
|
* index of the page determines the key.
|
||||||
* base pages, there is no tail page and tail == page.
|
|
||||||
*/
|
*/
|
||||||
tail = page;
|
folio = page_folio(page);
|
||||||
page = compound_head(page);
|
mapping = READ_ONCE(folio->mapping);
|
||||||
mapping = READ_ONCE(page->mapping);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If page->mapping is NULL, then it cannot be a PageAnon
|
* If folio->mapping is NULL, then it cannot be an anonymous
|
||||||
* page; but it might be the ZERO_PAGE or in the gate area or
|
* page; but it might be the ZERO_PAGE or in the gate area or
|
||||||
* in a special mapping (all cases which we are happy to fail);
|
* in a special mapping (all cases which we are happy to fail);
|
||||||
* or it may have been a good file page when get_user_pages_fast
|
* or it may have been a good file page when get_user_pages_fast
|
||||||
* found it, but truncated or holepunched or subjected to
|
* found it, but truncated or holepunched or subjected to
|
||||||
* invalidate_complete_page2 before we got the page lock (also
|
* invalidate_complete_page2 before we got the folio lock (also
|
||||||
* cases which we are happy to fail). And we hold a reference,
|
* cases which we are happy to fail). And we hold a reference,
|
||||||
* so refcount care in invalidate_inode_page's remove_mapping
|
* so refcount care in invalidate_inode_page's remove_mapping
|
||||||
* prevents drop_caches from setting mapping to NULL beneath us.
|
* prevents drop_caches from setting mapping to NULL beneath us.
|
||||||
*
|
*
|
||||||
* The case we do have to guard against is when memory pressure made
|
* The case we do have to guard against is when memory pressure made
|
||||||
* shmem_writepage move it from filecache to swapcache beneath us:
|
* shmem_writepage move it from filecache to swapcache beneath us:
|
||||||
* an unlikely race, but we do need to retry for page->mapping.
|
* an unlikely race, but we do need to retry for folio->mapping.
|
||||||
*/
|
*/
|
||||||
if (unlikely(!mapping)) {
|
if (unlikely(!mapping)) {
|
||||||
int shmem_swizzled;
|
int shmem_swizzled;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Page lock is required to identify which special case above
|
* Folio lock is required to identify which special case above
|
||||||
* applies. If this is really a shmem page then the page lock
|
* applies. If this is really a shmem page then the folio lock
|
||||||
* will prevent unexpected transitions.
|
* will prevent unexpected transitions.
|
||||||
*/
|
*/
|
||||||
lock_page(page);
|
folio_lock(folio);
|
||||||
shmem_swizzled = PageSwapCache(page) || page->mapping;
|
shmem_swizzled = folio_test_swapcache(folio) || folio->mapping;
|
||||||
unlock_page(page);
|
folio_unlock(folio);
|
||||||
put_page(page);
|
folio_put(folio);
|
||||||
|
|
||||||
if (shmem_swizzled)
|
if (shmem_swizzled)
|
||||||
goto again;
|
goto again;
|
||||||
|
@ -341,14 +340,14 @@ again:
|
||||||
/*
|
/*
|
||||||
* Private mappings are handled in a simple way.
|
* Private mappings are handled in a simple way.
|
||||||
*
|
*
|
||||||
* If the futex key is stored on an anonymous page, then the associated
|
* If the futex key is stored in anonymous memory, then the associated
|
||||||
* object is the mm which is implicitly pinned by the calling process.
|
* object is the mm which is implicitly pinned by the calling process.
|
||||||
*
|
*
|
||||||
* NOTE: When userspace waits on a MAP_SHARED mapping, even if
|
* NOTE: When userspace waits on a MAP_SHARED mapping, even if
|
||||||
* it's a read-only handle, it's expected that futexes attach to
|
* it's a read-only handle, it's expected that futexes attach to
|
||||||
* the object not the particular process.
|
* the object not the particular process.
|
||||||
*/
|
*/
|
||||||
if (PageAnon(page)) {
|
if (folio_test_anon(folio)) {
|
||||||
/*
|
/*
|
||||||
* A RO anonymous page will never change and thus doesn't make
|
* A RO anonymous page will never change and thus doesn't make
|
||||||
* sense for futex operations.
|
* sense for futex operations.
|
||||||
|
@ -367,10 +366,10 @@ again:
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The associated futex object in this case is the inode and
|
* The associated futex object in this case is the inode and
|
||||||
* the page->mapping must be traversed. Ordinarily this should
|
* the folio->mapping must be traversed. Ordinarily this should
|
||||||
* be stabilised under page lock but it's not strictly
|
* be stabilised under folio lock but it's not strictly
|
||||||
* necessary in this case as we just want to pin the inode, not
|
* necessary in this case as we just want to pin the inode, not
|
||||||
* update the radix tree or anything like that.
|
* update i_pages or anything like that.
|
||||||
*
|
*
|
||||||
* The RCU read lock is taken as the inode is finally freed
|
* The RCU read lock is taken as the inode is finally freed
|
||||||
* under RCU. If the mapping still matches expectations then the
|
* under RCU. If the mapping still matches expectations then the
|
||||||
|
@ -378,9 +377,9 @@ again:
|
||||||
*/
|
*/
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
|
||||||
if (READ_ONCE(page->mapping) != mapping) {
|
if (READ_ONCE(folio->mapping) != mapping) {
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
put_page(page);
|
folio_put(folio);
|
||||||
|
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
|
@ -388,19 +387,19 @@ again:
|
||||||
inode = READ_ONCE(mapping->host);
|
inode = READ_ONCE(mapping->host);
|
||||||
if (!inode) {
|
if (!inode) {
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
put_page(page);
|
folio_put(folio);
|
||||||
|
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
|
|
||||||
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
|
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
|
||||||
key->shared.i_seq = get_inode_sequence_number(inode);
|
key->shared.i_seq = get_inode_sequence_number(inode);
|
||||||
key->shared.pgoff = page_to_pgoff(tail);
|
key->shared.pgoff = folio->index + folio_page_idx(folio, page);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
put_page(page);
|
folio_put(folio);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue