Lines Matching +full:atomic +full:- +full:threshold +full:- +full:us

1 // SPDX-License-Identifier: GPL-2.0
29 * Dax memory reclaim threshold in percetage of total ranges. When free
30 * number of free ranges drops below this threshold, reclaim can trigger
40 /* Will connect in fcd->free_ranges to keep track of free memory */
46 /* Will connect in fc->busy_ranges to keep track busy memory */
55 /* Is this mapping read-only or read-write */
62 /* Per-inode dax map */
113 /* If number of free ranges are below threshold, start reclaim */ in __kick_dmap_free_worker()
114 free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100, in __kick_dmap_free_worker()
116 if (fcd->nr_free_ranges < free_threshold) in __kick_dmap_free_worker()
117 queue_delayed_work(system_long_wq, &fcd->free_work, in __kick_dmap_free_worker()
124 spin_lock(&fcd->lock); in kick_dmap_free_worker()
126 spin_unlock(&fcd->lock); in kick_dmap_free_worker()
133 spin_lock(&fcd->lock); in alloc_dax_mapping()
134 dmap = list_first_entry_or_null(&fcd->free_ranges, in alloc_dax_mapping()
137 list_del_init(&dmap->list); in alloc_dax_mapping()
138 WARN_ON(fcd->nr_free_ranges <= 0); in alloc_dax_mapping()
139 fcd->nr_free_ranges--; in alloc_dax_mapping()
142 spin_unlock(&fcd->lock); in alloc_dax_mapping()
147 /* This assumes fcd->lock is held */
151 list_del_init(&dmap->busy_list); in __dmap_remove_busy_list()
152 WARN_ON(fcd->nr_busy_ranges == 0); in __dmap_remove_busy_list()
153 fcd->nr_busy_ranges--; in __dmap_remove_busy_list()
159 spin_lock(&fcd->lock); in dmap_remove_busy_list()
161 spin_unlock(&fcd->lock); in dmap_remove_busy_list()
164 /* This assumes fcd->lock is held */
168 list_add_tail(&dmap->list, &fcd->free_ranges); in __dmap_add_to_free_pool()
169 fcd->nr_free_ranges++; in __dmap_add_to_free_pool()
170 wake_up(&fcd->range_waitq); in __dmap_add_to_free_pool()
177 spin_lock(&fcd->lock); in dmap_add_to_free_pool()
179 spin_unlock(&fcd->lock); in dmap_add_to_free_pool()
187 struct fuse_conn_dax *fcd = fm->fc->dax; in fuse_setup_one_mapping()
194 WARN_ON(fcd->nr_free_ranges < 0); in fuse_setup_one_mapping()
199 inarg.fh = -1; in fuse_setup_one_mapping()
200 inarg.moffset = dmap->window_offset; in fuse_setup_one_mapping()
206 args.nodeid = fi->nodeid; in fuse_setup_one_mapping()
213 dmap->writable = writable; in fuse_setup_one_mapping()
220 dmap->inode = inode; in fuse_setup_one_mapping()
221 dmap->itn.start = dmap->itn.last = start_idx; in fuse_setup_one_mapping()
222 /* Protected by fi->dax->sem */ in fuse_setup_one_mapping()
223 interval_tree_insert(&dmap->itn, &fi->dax->tree); in fuse_setup_one_mapping()
224 fi->dax->nr++; in fuse_setup_one_mapping()
225 spin_lock(&fcd->lock); in fuse_setup_one_mapping()
226 list_add_tail(&dmap->busy_list, &fcd->busy_ranges); in fuse_setup_one_mapping()
227 fcd->nr_busy_ranges++; in fuse_setup_one_mapping()
228 spin_unlock(&fcd->lock); in fuse_setup_one_mapping()
242 args.nodeid = fi->nodeid; in fuse_send_removemapping()
246 args.in_args[1].size = inargp->count * sizeof(*remove_one); in fuse_send_removemapping()
262 return -ENOMEM; in dmap_removemapping_list()
266 ptr->moffset = dmap->window_offset; in dmap_removemapping_list()
267 ptr->len = dmap->length; in dmap_removemapping_list()
270 num--; in dmap_removemapping_list()
289 * fcd->lock held.
295 dmap->itn.start, dmap->itn.last, dmap->window_offset, in dmap_reinit_add_to_free_pool()
296 dmap->length); in dmap_reinit_add_to_free_pool()
298 dmap->inode = NULL; in dmap_reinit_add_to_free_pool()
299 dmap->itn.start = dmap->itn.last = 0; in dmap_reinit_add_to_free_pool()
322 node = interval_tree_iter_first(&fi->dax->tree, start_idx, in inode_reclaim_dmap_range()
328 WARN_ON(refcount_read(&dmap->refcnt) > 1); in inode_reclaim_dmap_range()
329 interval_tree_remove(&dmap->itn, &fi->dax->tree); in inode_reclaim_dmap_range()
331 list_add(&dmap->list, &to_remove); in inode_reclaim_dmap_range()
338 WARN_ON(fi->dax->nr < num); in inode_reclaim_dmap_range()
339 fi->dax->nr -= num; in inode_reclaim_dmap_range()
341 if (err && err != -ENOTCONN) { in inode_reclaim_dmap_range()
345 spin_lock(&fcd->lock); in inode_reclaim_dmap_range()
347 list_del_init(&dmap->list); in inode_reclaim_dmap_range()
350 spin_unlock(&fcd->lock); in inode_reclaim_dmap_range()
362 forget_one.moffset = dmap->window_offset; in dmap_removemapping_one()
363 forget_one.len = dmap->length; in dmap_removemapping_one()
370 * this function does not take any locks like fi->dax->sem for traversing
384 inode_reclaim_dmap_range(fc->dax, inode, 0, -1); in fuse_dax_inode_cleanup()
385 WARN_ON(fi->dax->nr); in fuse_dax_inode_cleanup()
390 iomap->addr = IOMAP_NULL_ADDR; in fuse_fill_iomap_hole()
391 iomap->length = length; in fuse_fill_iomap_hole()
392 iomap->type = IOMAP_HOLE; in fuse_fill_iomap_hole()
402 offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT); in fuse_fill_iomap()
403 len = min(length, dmap->length - offset); in fuse_fill_iomap()
407 len = i_size - pos; in fuse_fill_iomap()
410 iomap->addr = dmap->window_offset + offset; in fuse_fill_iomap()
411 iomap->length = len; in fuse_fill_iomap()
413 iomap->length = ALIGN(len, PAGE_SIZE); in fuse_fill_iomap()
414 iomap->type = IOMAP_MAPPED; in fuse_fill_iomap()
417 * use. This assumes fi->dax->sem mutex is held either in fuse_fill_iomap()
420 refcount_inc(&dmap->refcnt); in fuse_fill_iomap()
422 /* iomap->private should be NULL */ in fuse_fill_iomap()
423 WARN_ON_ONCE(iomap->private); in fuse_fill_iomap()
424 iomap->private = dmap; in fuse_fill_iomap()
437 struct fuse_conn_dax *fcd = fc->dax; in fuse_setup_new_dax_mapping()
447 * fuse_wait_dax_page() drops mapping->invalidate_lock and requires it. in fuse_setup_new_dax_mapping()
448 * In fault path we enter with mapping->invalidate_lock held and can't in fuse_setup_new_dax_mapping()
449 * drop it. Also in fault path we hold mapping->invalidate_lock shared in fuse_setup_new_dax_mapping()
451 * fuse_wait_dax_page(). Hence return -EAGAIN and fuse_dax_fault() in fuse_setup_new_dax_mapping()
457 return -EAGAIN; in fuse_setup_new_dax_mapping()
466 return -EIO; in fuse_setup_new_dax_mapping()
472 down_write(&fi->dax->sem); in fuse_setup_new_dax_mapping()
477 node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); in fuse_setup_new_dax_mapping()
482 up_write(&fi->dax->sem); in fuse_setup_new_dax_mapping()
491 up_write(&fi->dax->sem); in fuse_setup_new_dax_mapping()
495 up_write(&fi->dax->sem); in fuse_setup_new_dax_mapping()
513 down_write(&fi->dax->sem); in fuse_upgrade_dax_mapping()
514 node = interval_tree_iter_first(&fi->dax->tree, idx, idx); in fuse_upgrade_dax_mapping()
520 * re-acquired the fi->dax->sem lock. in fuse_upgrade_dax_mapping()
522 ret = -EIO; in fuse_upgrade_dax_mapping()
529 * Now we hold fi->dax->sem lock and that reference is not needed in fuse_upgrade_dax_mapping()
532 if (refcount_dec_and_test(&dmap->refcnt)) { in fuse_upgrade_dax_mapping()
542 if (dmap->writable) { in fuse_upgrade_dax_mapping()
554 up_write(&fi->dax->sem); in fuse_upgrade_dax_mapping()
574 return -EIO; in fuse_iomap_begin()
576 iomap->offset = pos; in fuse_iomap_begin()
577 iomap->flags = 0; in fuse_iomap_begin()
578 iomap->bdev = NULL; in fuse_iomap_begin()
579 iomap->dax_dev = fc->dax->dev; in fuse_iomap_begin()
588 down_read(&fi->dax->sem); in fuse_iomap_begin()
589 node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); in fuse_iomap_begin()
592 if (writable && !dmap->writable) { in fuse_iomap_begin()
593 /* Upgrade read-only mapping to read-write. This will in fuse_iomap_begin()
594 * require exclusive fi->dax->sem lock as we don't want in fuse_iomap_begin()
599 * Before dropping fi->dax->sem lock, take reference in fuse_iomap_begin()
602 refcount_inc(&dmap->refcnt); in fuse_iomap_begin()
603 up_read(&fi->dax->sem); in fuse_iomap_begin()
610 up_read(&fi->dax->sem); in fuse_iomap_begin()
614 up_read(&fi->dax->sem); in fuse_iomap_begin()
631 __func__, pos, length, iomap->length); in fuse_iomap_begin()
639 struct fuse_dax_mapping *dmap = iomap->private; in fuse_iomap_end()
642 if (refcount_dec_and_test(&dmap->refcnt)) { in fuse_iomap_end()
650 /* DAX writes beyond end-of-file aren't handled using iomap, so the in fuse_iomap_end()
663 filemap_invalidate_unlock(inode->i_mapping); in fuse_wait_dax_page()
665 filemap_invalidate_lock(inode->i_mapping); in fuse_wait_dax_page()
668 /* Should be called with mapping->invalidate_lock held exclusively */
674 page = dax_layout_busy_page_range(inode->i_mapping, start, end); in __fuse_dax_break_layouts()
679 return ___wait_var_event(&page->_refcount, in __fuse_dax_break_layouts()
680 atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, in __fuse_dax_break_layouts()
702 struct inode *inode = file_inode(iocb->ki_filp); in fuse_dax_read_iter()
705 if (iocb->ki_flags & IOCB_NOWAIT) { in fuse_dax_read_iter()
707 return -EAGAIN; in fuse_dax_read_iter()
715 /* TODO file_accessed(iocb->f_filp) */ in fuse_dax_read_iter()
721 struct inode *inode = file_inode(iocb->ki_filp); in file_extending_write()
724 ((iocb->ki_pos) >= i_size_read(inode) || in file_extending_write()
725 (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)))); in file_extending_write()
730 struct inode *inode = file_inode(iocb->ki_filp); in fuse_dax_direct_write()
734 ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); in fuse_dax_direct_write()
736 fuse_write_update_attr(inode, iocb->ki_pos, ret); in fuse_dax_direct_write()
742 struct inode *inode = file_inode(iocb->ki_filp); in fuse_dax_write_iter()
745 if (iocb->ki_flags & IOCB_NOWAIT) { in fuse_dax_write_iter()
747 return -EAGAIN; in fuse_dax_write_iter()
756 ret = file_remove_privs(iocb->ki_filp); in fuse_dax_write_iter()
762 * disk i_size increase are not atomic otherwise. in fuse_dax_write_iter()
781 struct inode *inode = mapping->host; in fuse_dax_writepages()
784 return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc); in fuse_dax_writepages()
791 struct inode *inode = file_inode(vmf->vma->vm_file); in __fuse_dax_fault()
792 struct super_block *sb = inode->i_sb; in __fuse_dax_fault()
796 struct fuse_conn_dax *fcd = fc->dax; in __fuse_dax_fault()
802 if (retry && !(fcd->nr_free_ranges > 0)) in __fuse_dax_fault()
803 wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0)); in __fuse_dax_fault()
811 filemap_invalidate_lock_shared(inode->i_mapping); in __fuse_dax_fault()
813 if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) { in __fuse_dax_fault()
816 filemap_invalidate_unlock_shared(inode->i_mapping); in __fuse_dax_fault()
822 filemap_invalidate_unlock_shared(inode->i_mapping); in __fuse_dax_fault()
832 return __fuse_dax_fault(vmf, 0, vmf->flags & FAULT_FLAG_WRITE); in fuse_dax_fault()
837 return __fuse_dax_fault(vmf, order, vmf->flags & FAULT_FLAG_WRITE); in fuse_dax_huge_fault()
860 vma->vm_ops = &fuse_dax_vm_ops; in fuse_dax_mmap()
869 loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT; in dmap_writeback_invalidate()
870 loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1); in dmap_writeback_invalidate()
872 ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos); in dmap_writeback_invalidate()
879 ret = invalidate_inode_pages2_range(inode->i_mapping, in dmap_writeback_invalidate()
896 * igrab() was done to make sure inode won't go under us, and this in reclaim_one_dmap_locked()
904 interval_tree_remove(&dmap->itn, &fi->dax->tree); in reclaim_one_dmap_locked()
905 fi->dax->nr--; in reclaim_one_dmap_locked()
912 if (ret && ret != -ENOTCONN) { in reclaim_one_dmap_locked()
914 dmap->window_offset, dmap->length, ret); in reclaim_one_dmap_locked()
920 * to hold fi->dax->sem lock either shared or exclusive.
928 for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node; in inode_lookup_first_dmap()
929 node = interval_tree_iter_next(node, 0, -1)) { in inode_lookup_first_dmap()
932 if (refcount_read(&dmap->refcnt) > 1) in inode_lookup_first_dmap()
956 filemap_invalidate_lock(inode->i_mapping); in inode_inline_reclaim_one_dmap()
959 down_read(&fi->dax->sem); in inode_inline_reclaim_one_dmap()
962 start_idx = dmap->itn.start; in inode_inline_reclaim_one_dmap()
964 dmap_end = dmap_start + FUSE_DAX_SZ - 1; in inode_inline_reclaim_one_dmap()
966 up_read(&fi->dax->sem); in inode_inline_reclaim_one_dmap()
982 down_write(&fi->dax->sem); in inode_inline_reclaim_one_dmap()
983 node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); in inode_inline_reclaim_one_dmap()
993 if (refcount_read(&dmap->refcnt) > 1) { in inode_inline_reclaim_one_dmap()
1008 dmap->inode = NULL; in inode_inline_reclaim_one_dmap()
1009 dmap->itn.start = dmap->itn.last = 0; in inode_inline_reclaim_one_dmap()
1012 __func__, inode, dmap->window_offset, dmap->length); in inode_inline_reclaim_one_dmap()
1015 up_write(&fi->dax->sem); in inode_inline_reclaim_one_dmap()
1017 filemap_invalidate_unlock(inode->i_mapping); in inode_inline_reclaim_one_dmap()
1047 * mapping->invalidate_lock held and worker to free memory in alloc_dax_mapping_reclaim()
1049 * mapping->invalidate_lock. So sleep only if fi->dax->nr=0 in alloc_dax_mapping_reclaim()
1055 * We are not holding fi->dax->sem. So it is possible in alloc_dax_mapping_reclaim()
1057 * mapping->invalidate_lock, worker should still be able to in alloc_dax_mapping_reclaim()
1058 * free up a range and wake us up. in alloc_dax_mapping_reclaim()
1060 if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) { in alloc_dax_mapping_reclaim()
1061 if (wait_event_killable_exclusive(fcd->range_waitq, in alloc_dax_mapping_reclaim()
1062 (fcd->nr_free_ranges > 0))) { in alloc_dax_mapping_reclaim()
1063 return ERR_PTR(-EINTR); in alloc_dax_mapping_reclaim()
1079 node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); in lookup_and_reclaim_dmap_locked()
1087 if (refcount_read(&dmap->refcnt) > 1) in lookup_and_reclaim_dmap_locked()
1095 spin_lock(&fcd->lock); in lookup_and_reclaim_dmap_locked()
1097 spin_unlock(&fcd->lock); in lookup_and_reclaim_dmap_locked()
1104 * 1. Take mapping->invalidate_lock to block dax faults.
1105 * 2. Take fi->dax->sem to protect interval tree and also to make sure
1116 loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1; in lookup_and_reclaim_dmap()
1118 filemap_invalidate_lock(inode->i_mapping); in lookup_and_reclaim_dmap()
1126 down_write(&fi->dax->sem); in lookup_and_reclaim_dmap()
1128 up_write(&fi->dax->sem); in lookup_and_reclaim_dmap()
1130 filemap_invalidate_unlock(inode->i_mapping); in lookup_and_reclaim_dmap()
1148 spin_lock(&fcd->lock); in try_to_free_dmap_chunks()
1150 if (!fcd->nr_busy_ranges) { in try_to_free_dmap_chunks()
1151 spin_unlock(&fcd->lock); in try_to_free_dmap_chunks()
1155 list_for_each_entry_safe(pos, temp, &fcd->busy_ranges, in try_to_free_dmap_chunks()
1158 if (refcount_read(&pos->refcnt) > 1) in try_to_free_dmap_chunks()
1161 inode = igrab(pos->inode); in try_to_free_dmap_chunks()
1175 list_move_tail(&dmap->busy_list, &fcd->busy_ranges); in try_to_free_dmap_chunks()
1176 start_idx = end_idx = dmap->itn.start; in try_to_free_dmap_chunks()
1179 spin_unlock(&fcd->lock); in try_to_free_dmap_chunks()
1203 /* If number of free ranges are still below threshold, requeue */ in fuse_dax_free_mem_worker()
1213 list_del(&range->list); in fuse_free_dax_mem_ranges()
1214 if (!list_empty(&range->busy_list)) in fuse_free_dax_mem_ranges()
1215 list_del(&range->busy_list); in fuse_free_dax_mem_ranges()
1222 if (fc->dax) { in fuse_dax_conn_free()
1223 fuse_free_dax_mem_ranges(&fc->dax->free_ranges); in fuse_dax_conn_free()
1224 kfree(fc->dax); in fuse_dax_conn_free()
1225 fc->dax = NULL; in fuse_dax_conn_free()
1234 size_t dax_size = -1; in fuse_dax_mem_range_init()
1237 init_waitqueue_head(&fcd->range_waitq); in fuse_dax_mem_range_init()
1238 INIT_LIST_HEAD(&fcd->free_ranges); in fuse_dax_mem_range_init()
1239 INIT_LIST_HEAD(&fcd->busy_ranges); in fuse_dax_mem_range_init()
1240 INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); in fuse_dax_mem_range_init()
1243 nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), in fuse_dax_mem_range_init()
1257 ret = -ENOMEM; in fuse_dax_mem_range_init()
1261 /* TODO: This offset only works if virtio-fs driver is not in fuse_dax_mem_range_init()
1265 range->window_offset = i * FUSE_DAX_SZ; in fuse_dax_mem_range_init()
1266 range->length = FUSE_DAX_SZ; in fuse_dax_mem_range_init()
1267 INIT_LIST_HEAD(&range->busy_list); in fuse_dax_mem_range_init()
1268 refcount_set(&range->refcnt, 1); in fuse_dax_mem_range_init()
1269 list_add_tail(&range->list, &fcd->free_ranges); in fuse_dax_mem_range_init()
1272 fcd->nr_free_ranges = nr_ranges; in fuse_dax_mem_range_init()
1273 fcd->nr_ranges = nr_ranges; in fuse_dax_mem_range_init()
1277 fuse_free_dax_mem_ranges(&fcd->free_ranges); in fuse_dax_mem_range_init()
1287 fc->dax_mode = dax_mode; in fuse_dax_conn_alloc()
1294 return -ENOMEM; in fuse_dax_conn_alloc()
1296 spin_lock_init(&fcd->lock); in fuse_dax_conn_alloc()
1297 fcd->dev = dax_dev; in fuse_dax_conn_alloc()
1304 fc->dax = fcd; in fuse_dax_conn_alloc()
1312 fi->dax = NULL; in fuse_dax_inode_alloc()
1313 if (fc->dax) { in fuse_dax_inode_alloc()
1314 fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT); in fuse_dax_inode_alloc()
1315 if (!fi->dax) in fuse_dax_inode_alloc()
1318 init_rwsem(&fi->dax->sem); in fuse_dax_inode_alloc()
1319 fi->dax->tree = RB_ROOT_CACHED; in fuse_dax_inode_alloc()
1334 enum fuse_dax_mode dax_mode = fc->dax_mode; in fuse_should_enable_dax()
1340 * fc->dax may be NULL in 'inode' mode when filesystem device doesn't in fuse_should_enable_dax()
1343 if (!fc->dax) in fuse_should_enable_dax()
1350 return fc->inode_dax && (flags & FUSE_ATTR_DAX); in fuse_should_enable_dax()
1358 inode->i_flags |= S_DAX; in fuse_dax_inode_init()
1359 inode->i_data.a_ops = &fuse_dax_file_aops; in fuse_dax_inode_init()
1366 if (fuse_is_inode_dax_mode(fc->dax_mode) && in fuse_dax_dontcache()
1373 if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { in fuse_dax_check_alignment()
1383 struct fuse_conn_dax *fcd = fc->dax; in fuse_dax_cancel_work()
1386 cancel_delayed_work_sync(&fcd->free_work); in fuse_dax_cancel_work()