Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 10 additions & 44 deletions kmod/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ static int file_open(struct inode* inode, struct file* filp) {
struct ternfs_inode* enode = TERNFS_I(inode);

ternfs_debug("enode=%p status=%d owner=%p", enode, enode->file.status, current->group_leader);
int err = 0;

if ((filp->f_mode&FMODE_WRITE) && (enode->file.status == TERNFS_FILE_STATUS_WRITING)) {
// this is the "common" writing case, we've just created a file to write it.
Expand All @@ -90,51 +89,11 @@ static int file_open(struct inode* inode, struct file* filp) {
// to files) are attempted. the reason is that some workflows (such as open write +
// setattr) _will_ work.
enode->file.status = TERNFS_FILE_STATUS_READING;
// also, set atime, if requested
if (!(filp->f_flags&O_NOATIME)) {
u64 atime_ns = ktime_get_real_ns();
struct timespec64 atime_ts = ns_to_timespec64(atime_ns);
u64 diff = atime_ts.tv_sec - min(inode_get_atime_sec(&enode->inode), atime_ts.tv_sec);
if (diff < ternfs_atime_update_interval_sec) {
// we don't think we should update
goto out;
}

// https://internal-repo/issues/292
// we might have cached data and another client updated atime.
// ternfs_do_getattr is orders of magnitude cheaper than ternfs_shard_set_time,
// so we might as well refresh and re-check
int err = ternfs_do_getattr(enode, ATTR_CACHE_NO_TIMEOUT);
if (err) {
goto out;
}
diff = atime_ts.tv_sec - min(inode_get_atime_sec(&enode->inode), atime_ts.tv_sec);
if (diff < ternfs_atime_update_interval_sec) {
// out local time changed and we see we don't need to update
goto out;
}

if ((inode_get_atime_sec(&enode->inode) > atime_ts.tv_sec) ||
(inode_get_atime_sec(&enode->inode) == atime_ts.tv_sec &&
inode_get_atime_nsec(&enode->inode) == atime_ts.tv_nsec
)
) {
// we don't want atime to go into the past don't update
goto out;
}
u64 atime = atime_ns | (1ull<<63);
err = ternfs_shard_set_time((struct ternfs_fs_info*)enode->inode.i_sb->s_fs_info, inode->i_ino, 0, atime);
if (err) {
goto out;
}
// we updated time. we don't need to refresh it now but allow refresh on next stat by getattr_expiry
smp_store_release(&enode->getattr_expiry, 0);
}
}
out:

inode_unlock(inode);
trace_eggsfs_inode_lock(inode, TERNFS_INODE_UNLOCK, "file_open");
return err;
return 0;
}

static void init_transient_span(void* p) {
Expand Down Expand Up @@ -1177,13 +1136,20 @@ static int file_fsync(struct file* f, loff_t start, loff_t end, int datasync) {
return 0;
}

// filemap_fault() does not call file_accessed(), so mmap-driven reads would
// otherwise never update atime. Bump it at mmap time before delegating.
static int file_mmap(struct file* file, struct vm_area_struct* vma) {
file_accessed(file);
return generic_file_readonly_mmap(file, vma);
}

const struct file_operations ternfs_file_operations = {
.open = file_open,
.read_iter = file_read_iter,
.write_iter = file_write_iter,
.flush = file_flush_internal,
.llseek = file_lseek,
.mmap = generic_file_readonly_mmap,
.mmap = file_mmap,
.fsync = file_fsync,
};

Expand Down
68 changes: 68 additions & 0 deletions kmod/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -626,6 +626,73 @@ static const char* ternfs_get_link(struct dentry* dentry, struct inode* inode, s
return buf;
}

// Called by the VFS (via touch_atime -> inode_update_time) on actual data access
// of a regular file: generic_file_read_iter hits it on read/pread/readv, and
// file_mmap() in file.c hits it for mmap. atime_needs_update() in fs/inode.c
// already honours O_NOATIME, MNT_NOATIME, and relatime before reaching us, so
// this function only needs to apply the ternfs_atime_update_interval_sec
// throttle on top and push the update to the shard.
//
// Only wired into ternfs_file_inode_ops: directory atime is TernFS-internal
// state (dentry cache invalidation) and must not be touched by VFS access
// paths -- SB_NODIRATIME keeps the VFS out.
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
static int ternfs_update_time(struct inode* inode, struct timespec64*, int flags) {
#else
static int ternfs_update_time(struct inode* inode, int flags) {
#endif
struct ternfs_inode* enode = TERNFS_I(inode);

int non_atime = flags & ~S_ATIME;
if (non_atime) {
ternfs_generic_update_time(inode, non_atime);
}
if (!(flags & S_ATIME)) { return 0; }

u64 now_ns = ktime_get_real_ns();
struct timespec64 now_ts = ns_to_timespec64(now_ns);

time64_t cur_sec = inode_get_atime_sec(inode);
u64 diff = now_ts.tv_sec - min(cur_sec, now_ts.tv_sec);
if (diff < ternfs_atime_update_interval_sec) {
// within throttle window: bump in-memory atime only
inode_set_atime_to_ts(inode, now_ts);
return 0;
}

// https://internal-repo/issues/292
// a peer client may have already bumped atime. ternfs_do_getattr is orders
// of magnitude cheaper than ternfs_shard_set_time, so refresh before RPC.
int err = ternfs_do_getattr(enode, ATTR_CACHE_NO_TIMEOUT);
if (err) {
// non-fatal: atime errors shouldn't break reads
ternfs_warn("file=%016lx update_time getattr failed err=%d", inode->i_ino, err);
return 0;
}
cur_sec = inode_get_atime_sec(inode);
diff = now_ts.tv_sec - min(cur_sec, now_ts.tv_sec);
if (diff < ternfs_atime_update_interval_sec) { return 0; }

if ((cur_sec > now_ts.tv_sec) ||
(cur_sec == now_ts.tv_sec &&
inode_get_atime_nsec(inode) >= now_ts.tv_nsec)) {
// don't let atime go backwards
return 0;
}

u64 atime = now_ns | (1ull << 63);
err = ternfs_shard_set_time(
(struct ternfs_fs_info*)inode->i_sb->s_fs_info,
inode->i_ino, 0, atime);
if (err) {
ternfs_warn("file=%016lx update_time shard_set_time failed err=%d", inode->i_ino, err);
return 0;
}
inode_set_atime_to_ts(inode, now_ts);
smp_store_release(&enode->getattr_expiry, 0);
return 0;
}

static const struct inode_operations ternfs_dir_inode_ops = {
.create = ternfs_create,
.lookup = ternfs_lookup,
Expand All @@ -640,6 +707,7 @@ static const struct inode_operations ternfs_dir_inode_ops = {
static const struct inode_operations ternfs_file_inode_ops = {
.getattr = ternfs_getattr,
.setattr = ternfs_setattr,
.update_time = ternfs_update_time,
};

static const struct inode_operations ternfs_symlink_inode_ops = {
Expand Down
16 changes: 16 additions & 0 deletions kmod/inode_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,20 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode,

#endif

// The inode_operations->update_time signature lost the `struct timespec64 *now`
// parameter in mainline 6.6; filesystems compute the timestamp themselves now.
// ternfs_generic_update_time hides the difference so callers can pass just (inode, flags).
#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 6, 0)
static inline int ternfs_generic_update_time(struct inode *inode, int flags)
{
struct timespec64 now = current_time(inode);
return generic_update_time(inode, &now, flags);
}
#else
static inline int ternfs_generic_update_time(struct inode *inode, int flags)
{
return generic_update_time(inode, flags);
}
#endif

#endif /* _TERNFS_INODE_COMPAT_H */
4 changes: 3 additions & 1 deletion kmod/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,9 @@ static struct dentry* ternfs_mount(struct file_system_type* fs_type, int flags,

sb->s_fs_info = info;

sb->s_flags = SB_NOSUID | SB_NODEV | SB_NOEXEC | SB_NOATIME | SB_NODIRATIME;
// SB_NODIRATIME needs to be set: directory atime is TernFS-internal state used
// for dentry-cache invalidation and must not be touched by the VFS.
sb->s_flags = SB_NOSUID | SB_NODEV | SB_NOEXEC | SB_NODIRATIME;
sb->s_iflags = SB_I_NOEXEC | SB_I_NODEV;

sb->s_op = &ternfs_super_ops;
Expand Down
Loading