From 883ad7b29f4f26c4d1d9cc2461fc52571be9ecd6 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Mon, 2 Mar 2015 09:04:10 -0800 Subject: android: base-cfg: turn off /dev/mem and /dev/kmem Signed-off-by: Mark Salyzyn Bug: 19549480 Change-Id: Icd43da8712efab46aa70590311c0fb170a9e124c --- android/configs/android-base.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/android/configs/android-base.cfg b/android/configs/android-base.cfg index d8503e450957..6c08830ae6ed 100644 --- a/android/configs/android-base.cfg +++ b/android/configs/android-base.cfg @@ -1,4 +1,6 @@ # KEEP ALPHABETICALLY SORTED +# CONFIG_DEVKMEM is not set +# CONFIG_DEVMEM is not set # CONFIG_INET_LRO is not set # CONFIG_MODULES is not set # CONFIG_OABI_COMPAT is not set -- cgit v1.2.3 From 02de954e6c1842beabcd85e7824526dd1bd45c0e Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 15 May 2014 16:38:41 -0700 Subject: net: ipv6: make "ip -6 route get mark xyz" work. Currently, "ip -6 route get mark xyz" ignores the mark passed in by userspace. Make it honour the mark, just like IPv4 does. [net-next commit 2e47b291953c35afa4e20a65475954c1a1b9afe1] Change-Id: Ief6cd1e1b7e43dc0d008b7e692be62cadc5cc7ca Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bad36468dcd7..28a664d98745 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2587,6 +2587,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_MARK]) + fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (tb[RTA_UID]) fl6.flowi6_uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); -- cgit v1.2.3 From dfca41467fa4570189f76e0fb9e0f911bc88ceb8 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 19 Jul 2013 11:31:36 +0200 Subject: um: siginfo cleanup Currently we use both struct siginfo and siginfo_t. Let's use struct siginfo internally to avoid ongoing compiler warning. We are allowed to do so because struct siginfo and siginfo_t are equivalent. [cherry-pick of upstream 9a8c1359571c5d5e2fbc43cf457a6486b70a70cb] Change-Id: I564775c9ed515c39ffff7d7d600a85d50291f31d Signed-off-by: Richard Weinberger Signed-off-by: Lorenzo Colitti --- arch/um/include/shared/frame_kern.h | 8 ++++---- arch/um/kernel/signal.c | 4 ++-- arch/um/os-Linux/signal.c | 8 ++++---- arch/um/os-Linux/skas/process.c | 10 +++++----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/um/include/shared/frame_kern.h b/arch/um/include/shared/frame_kern.h index e584e40ee832..f2ca5702a4e2 100644 --- a/arch/um/include/shared/frame_kern.h +++ b/arch/um/include/shared/frame_kern.h @@ -6,13 +6,13 @@ #ifndef __FRAME_KERN_H_ #define __FRAME_KERN_H_ -extern int setup_signal_stack_sc(unsigned long stack_top, int sig, +extern int setup_signal_stack_sc(unsigned long stack_top, int sig, struct k_sigaction *ka, - struct pt_regs *regs, + struct pt_regs *regs, sigset_t *mask); -extern int setup_signal_stack_si(unsigned long stack_top, int sig, +extern int setup_signal_stack_si(unsigned long stack_top, int sig, struct k_sigaction *ka, - struct pt_regs *regs, siginfo_t *info, + struct pt_regs *regs, struct siginfo *info, sigset_t *mask); #endif diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c index 3e831b3fd07b..f57e02e7910f 100644 --- a/arch/um/kernel/signal.c +++ b/arch/um/kernel/signal.c @@ -19,7 +19,7 @@ EXPORT_SYMBOL(unblock_signals); * OK, we're invoking a handler */ static void handle_signal(struct pt_regs *regs, unsigned long signr, - struct k_sigaction *ka, siginfo_t *info) + struct k_sigaction *ka, struct siginfo *info) { sigset_t *oldset = sigmask_to_save(); int singlestep = 0; @@ -71,7 +71,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr, static int kern_do_signal(struct pt_regs *regs) { struct k_sigaction ka_copy; - siginfo_t info; + struct siginfo info; int sig, handled_sig = 0; while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) { diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index 9d9f1b4bf826..905924b773d3 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -25,7 +25,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { [SIGIO] = sigio_handler, [SIGVTALRM] = timer_handler }; -static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc) +static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) { struct uml_pt_regs r; int save_errno = errno; @@ -61,7 +61,7 @@ static void sig_handler_common(int sig, siginfo_t *si, mcontext_t *mc) static int signals_enabled; static unsigned int signals_pending; -void sig_handler(int sig, siginfo_t *si, mcontext_t *mc) +void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) { int enabled; @@ -120,7 +120,7 @@ void set_sigstack(void *sig_stack, int size) panic("enabling signal stack failed, errno = %d\n", errno); } -static void (*handlers[_NSIG])(int sig, siginfo_t *si, mcontext_t *mc) = { +static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { [SIGSEGV] = sig_handler, [SIGBUS] = sig_handler, [SIGILL] = sig_handler, @@ -162,7 +162,7 @@ static void hard_handler(int sig, siginfo_t *si, void *p) while ((sig = ffs(pending)) != 0){ sig--; pending &= ~(1 << sig); - (*handlers[sig])(sig, si, mc); + (*handlers[sig])(sig, (struct siginfo *)si, mc); } /* diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 4625949bf1e4..908579f2b0ab 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -409,7 +409,7 @@ void userspace(struct uml_pt_regs *regs) if (WIFSTOPPED(status)) { int sig = WSTOPSIG(status); - ptrace(PTRACE_GETSIGINFO, pid, 0, &si); + ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); switch (sig) { case SIGSEGV: @@ -417,7 +417,7 @@ void userspace(struct uml_pt_regs *regs) !ptrace_faultinfo) { get_skas_faultinfo(pid, ®s->faultinfo); - (*sig_info[SIGSEGV])(SIGSEGV, &si, + (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, regs); } else handle_segv(pid, regs); @@ -426,14 +426,14 @@ void userspace(struct uml_pt_regs *regs) handle_trap(pid, regs, local_using_sysemu); break; case SIGTRAP: - relay_signal(SIGTRAP, &si, regs); + relay_signal(SIGTRAP, (struct siginfo *)&si, regs); break; case SIGVTALRM: now = os_nsecs(); if (now < nsecs) break; block_signals(); - (*sig_info[sig])(sig, &si, regs); + (*sig_info[sig])(sig, (struct siginfo *)&si, regs); unblock_signals(); nsecs = timer.it_value.tv_sec * UM_NSEC_PER_SEC + @@ -447,7 +447,7 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: block_signals(); - (*sig_info[sig])(sig, &si, regs); + (*sig_info[sig])(sig, (struct siginfo *)&si, regs); unblock_signals(); break; default: -- cgit v1.2.3 From 0f708442d160846f3e0b0f390c0534e801f7b5f5 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 19 Jul 2013 11:35:32 +0200 Subject: um: remove dead code "me" is not used. [cherry-pick of upstream 9e82d450531c79b18ab18c9b9645cdd9db31ee98] Change-Id: Ifc3550184931dddf8feebd6c3137e60b97f6a0f1 Signed-off-by: Richard Weinberger Signed-off-by: Lorenzo Colitti --- arch/x86/um/signal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index ae7319db18ee..5e04a1c899fa 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -508,7 +508,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, { struct rt_sigframe __user *frame; int err = 0; - struct task_struct *me = current; frame = (struct rt_sigframe __user *) round_down(stack_top - sizeof(struct rt_sigframe), 16); -- cgit v1.2.3 From 9c086b4cf266e9ac1afabb86ff9ef54407b344e2 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 15 Apr 2014 16:25:34 -0700 Subject: ipv4, fib: pass LOOPBACK_IFINDEX instead of 0 to flowi4_iif MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As suggested by Julian: Simply, flowi4_iif must not contain 0, it does not look logical to ignore all ip rules with specified iif. because in fib_rule_match() we do: if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) goto out; flowi4_iif should be LOOPBACK_IFINDEX by default. We need to move LOOPBACK_IFINDEX to include/net/flow.h: 1) It is mostly used by flowi_iif 2) Fix the following compile error if we use it in flow.h by the patches latter: In file included from include/linux/netfilter.h:277:0, from include/net/netns/netfilter.h:5, from include/net/net_namespace.h:21, from include/linux/netdevice.h:43, from include/linux/icmpv6.h:12, from include/linux/ipv6.h:61, from include/net/ipv6.h:16, from include/linux/sunrpc/clnt.h:27, from include/linux/nfs_fs.h:30, from init/do_mounts.c:32: include/net/flow.h: In function ‘flowi4_init_output’: include/net/flow.h:84:32: error: ‘LOOPBACK_IFINDEX’ undeclared (first use in this function) [Backport of net-next 6a662719c9868b3d6c7d26b3a085f0cd3cc15e64] Change-Id: Ib7a0a08d78c03800488afa1b2c170cb70e34cfd9 Cc: Eric Biederman Cc: Julian Anastasov Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Lorenzo Colitti --- include/net/flow.h | 10 +++++++++- include/net/net_namespace.h | 9 +-------- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 1 + net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/ipt_rpfilter.c | 5 +---- net/ipv6/ip6mr.c | 2 +- 7 files changed, 15 insertions(+), 16 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index c91e2aae3fb1..1426681f7cf3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -12,6 +12,14 @@ #include #include +/* + * ifindex generation is per-net namespace, and loopback is + * always the 1st device in ns (see net_dev_init), thus any + * loopback device should get ifindex 1 + */ + +#define LOOPBACK_IFINDEX 1 + struct flowi_common { int flowic_oif; int flowic_iif; @@ -85,7 +93,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, kuid_t uid) { fl4->flowi4_oif = oif; - fl4->flowi4_iif = 0; + fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_mark = mark; fl4->flowi4_tos = tos; fl4->flowi4_scope = scope; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index b17697827482..b064d6dd14fb 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -120,14 +121,6 @@ struct net { atomic_t rt_genid; }; -/* - * ifindex generation is per-net namespace, and loopback is - * always the 1st device in ns (see net_dev_init), thus any - * loopback device should get ifindex 1 - */ - -#define LOOPBACK_IFINDEX 1 - #include /* Init's network namespace */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ffffeb448ec4..5ae6d9449847 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -250,7 +250,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, bool dev_match; fl4.flowi4_oif = 0; - fl4.flowi4_iif = oif; + fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; fl4.daddr = src; fl4.saddr = dst; fl4.flowi4_tos = tos; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8f6cb7a87cd6..567285bb83d2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -629,6 +629,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, .daddr = nh->nh_gw, .flowi4_scope = cfg->fc_scope + 1, .flowi4_oif = nh->nh_oif, + .flowi4_iif = LOOPBACK_IFINDEX, }; /* It is not necessary, but requires a bit of thinking */ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d9610ae7855..2c538ec7d3ea 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -451,7 +451,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) struct mr_table *mrt; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, - .flowi4_iif = skb->skb_iif, + .flowi4_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi4_mark = skb->mark, }; int err; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index c49dcd0284a0..4bfaedf9b34e 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -89,11 +89,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ipv4_is_multicast(iph->daddr)) { if (ipv4_is_zeronet(iph->saddr)) return ipv4_is_local_multicast(iph->daddr) ^ invert; - flow.flowi4_iif = 0; - } else { - flow.flowi4_iif = LOOPBACK_IFINDEX; } - + flow.flowi4_iif = LOOPBACK_IFINDEX; flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_oif = 0; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 241fb8ad9fcf..7a3bd3b26c38 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -693,7 +693,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct mr6_table *mrt; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, - .flowi6_iif = skb->skb_iif, + .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_mark = skb->mark, }; int err; -- cgit v1.2.3 From 78bae7615d4d1c8114a2259262648afceb9ef221 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 16 May 2013 01:17:58 -0400 Subject: [readdir] convert squashfs Signed-off-by: Al Viro --- fs/squashfs/dir.c | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index 57dc70ebbb19..f7f527bf8c10 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -100,7 +100,7 @@ static int get_dir_index_using_offset(struct super_block *sb, } -static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) +static int squashfs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; @@ -127,11 +127,11 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) * It also means that the external f_pos is offset by 3 from the * on-disk directory f_pos. */ - while (file->f_pos < 3) { + while (ctx->pos < 3) { char *name; int i_ino; - if (file->f_pos == 0) { + if (ctx->pos == 0) { name = "."; size = 1; i_ino = inode->i_ino; @@ -141,24 +141,18 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) i_ino = squashfs_i(inode)->parent; } - TRACE("Calling filldir(%p, %s, %d, %lld, %d, %d)\n", - dirent, name, size, file->f_pos, i_ino, - squashfs_filetype_table[1]); - - if (filldir(dirent, name, size, file->f_pos, i_ino, - squashfs_filetype_table[1]) < 0) { - TRACE("Filldir returned less than 0\n"); + if (!dir_emit(ctx, name, size, i_ino, + squashfs_filetype_table[1])) goto finish; - } - file->f_pos += size; + ctx->pos += size; } length = get_dir_index_using_offset(inode->i_sb, &block, &offset, squashfs_i(inode)->dir_idx_start, squashfs_i(inode)->dir_idx_offset, squashfs_i(inode)->dir_idx_cnt, - file->f_pos); + ctx->pos); while (length < i_size_read(inode)) { /* @@ -198,7 +192,7 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) length += sizeof(*dire) + size; - if (file->f_pos >= length) + if (ctx->pos >= length) continue; dire->name[size] = '\0'; @@ -206,22 +200,12 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) ((short) le16_to_cpu(dire->inode_number)); type = le16_to_cpu(dire->type); - TRACE("Calling filldir(%p, %s, %d, %lld, %x:%x, %d, %d)" - "\n", dirent, dire->name, size, - file->f_pos, - le32_to_cpu(dirh.start_block), - le16_to_cpu(dire->offset), - inode_number, - squashfs_filetype_table[type]); - - if (filldir(dirent, dire->name, size, file->f_pos, + if (!dir_emit(ctx, dire->name, size, inode_number, - squashfs_filetype_table[type]) < 0) { - TRACE("Filldir returned less than 0\n"); + squashfs_filetype_table[type])) goto finish; - } - file->f_pos = length; + ctx->pos = length; } } @@ -238,6 +222,6 @@ failed_read: const struct file_operations squashfs_dir_ops = { .read = generic_read_dir, - .readdir = squashfs_readdir, + .iterate = squashfs_readdir, .llseek = default_llseek, }; -- cgit v1.2.3 From c301a0e047e401d41b26db1009d08e088ae2365a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 May 2013 13:52:59 -0400 Subject: [readdir] introduce iterate_dir() and dir_context iterate_dir(): new helper, replacing vfs_readdir(). struct dir_context: contains the readdir callback (and will get more stuff in it), embedded into whatever data that callback wants to deal with; eventually, we'll be passing it to ->readdir() replacement instead of (data,filldir) pair. Signed-off-by: Al Viro --- Documentation/filesystems/porting | 3 +++ arch/alpha/kernel/osf_sys.c | 4 +++- arch/parisc/hpux/fs.c | 4 +++- fs/compat.c | 12 +++++++++--- fs/ecryptfs/file.c | 4 +++- fs/exportfs/expfs.c | 4 +++- fs/nfsd/nfs4recover.c | 13 +++++++++---- fs/nfsd/vfs.c | 4 +++- fs/readdir.c | 21 +++++++++++++-------- include/linux/fs.h | 4 ++++ 10 files changed, 53 insertions(+), 20 deletions(-) diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 4db22f6491e0..85a4a033bae7 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -445,3 +445,6 @@ object doesn't exist. It's remote/distributed ones that might care... [mandatory] FS_REVAL_DOT is gone; if you used to have it, add ->d_weak_revalidate() in your dentry operations instead. +-- +[mandatory] + vfs_readdir() is gone; switch to iterate_dir() instead diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index b9e37ad6fa19..ac19c7299d8e 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -96,6 +96,7 @@ struct osf_dirent { }; struct osf_dirent_callback { + struct dir_context ctx; struct osf_dirent __user *dirent; long __user *basep; unsigned int count; @@ -155,8 +156,9 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, buf.basep = basep; buf.count = count; buf.error = 0; + buf.ctx.actor = osf_filldir; - error = vfs_readdir(arg.file, osf_filldir, &buf); + error = iterate_dir(arg.file, &buf.ctx); if (error >= 0) error = buf.error; if (count != buf.count) diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 838b479a42c4..fc2cbee86e34 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -60,6 +60,7 @@ struct hpux_dirent { }; struct getdents_callback { + struct dir_context ctx; struct hpux_dirent __user *current_dir; struct hpux_dirent __user *previous; int count; @@ -121,8 +122,9 @@ int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned i buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = filldir; - error = vfs_readdir(arg.file, filldir, &buf); + error = iterate_dir(arg.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; diff --git a/fs/compat.c b/fs/compat.c index fc3b55dce184..2279b59e81f2 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -832,6 +832,7 @@ struct compat_old_linux_dirent { }; struct compat_readdir_callback { + struct dir_context ctx; struct compat_old_linux_dirent __user *dirent; int result; }; @@ -880,8 +881,9 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, buf.result = 0; buf.dirent = dirent; + buf.ctx.actor = compat_fillonedir; - error = vfs_readdir(f.file, compat_fillonedir, &buf); + error = iterate_dir(f.file, &buf.ctx); if (buf.result) error = buf.result; @@ -897,6 +899,7 @@ struct compat_linux_dirent { }; struct compat_getdents_callback { + struct dir_context ctx; struct compat_linux_dirent __user *current_dir; struct compat_linux_dirent __user *previous; int count; @@ -965,8 +968,9 @@ asmlinkage long compat_sys_getdents(unsigned int fd, buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = compat_filldir; - error = vfs_readdir(f.file, compat_filldir, &buf); + error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; @@ -983,6 +987,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 struct compat_getdents_callback64 { + struct dir_context ctx; struct linux_dirent64 __user *current_dir; struct linux_dirent64 __user *previous; int count; @@ -1050,8 +1055,9 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = compat_filldir64; - error = vfs_readdir(f.file, compat_filldir64, &buf); + error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index a7abbea2c096..041379a646b3 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -68,6 +68,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, } struct ecryptfs_getdents_callback { + struct dir_context ctx; void *dirent; struct dentry *dentry; filldir_t filldir; @@ -126,7 +127,8 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) buf.filldir = filldir; buf.filldir_called = 0; buf.entries_written = 0; - rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); + buf.ctx.actor = ecryptfs_filldir; + rc = iterate_dir(lower_file, &buf.ctx); file->f_pos = lower_file->f_pos; if (rc < 0) goto out; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 262fc9940982..7cb190426cec 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -212,6 +212,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) } struct getdents_callback { + struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ unsigned long ino; /* the inum we are looking for */ @@ -278,10 +279,11 @@ static int get_name(const struct path *path, char *name, struct dentry *child) buffer.ino = child->d_inode->i_ino; buffer.found = 0; buffer.sequence = 0; + buffer.ctx.actor = filldir_one; while (1) { int old_seq = buffer.sequence; - error = vfs_readdir(file, filldir_one, &buffer); + error = iterate_dir(file, &buffer.ctx); if (buffer.found) { error = 0; break; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4e9a21db867a..4f8cc6ba7c28 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -263,7 +263,10 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; struct dentry *dir = nn->rec_file->f_path.dentry; - LIST_HEAD(names); + struct { + struct dir_context ctx; + struct list_head names; + } ctx; int status; status = nfs4_save_creds(&original_cred); @@ -276,11 +279,13 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) return status; } - status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names); + INIT_LIST_HEAD(&ctx.names); + ctx.ctx.actor = nfsd4_build_namelist; + status = iterate_dir(nn->rec_file, &ctx.ctx); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); - while (!list_empty(&names)) { + while (!list_empty(&ctx.names)) { struct name_list *entry; - entry = list_entry(names.next, struct name_list, list); + entry = list_entry(ctx.names.next, struct name_list, list); if (!status) { struct dentry *dentry; dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 84ce601d8063..f939ba9bf8e8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1912,6 +1912,7 @@ struct buffered_dirent { }; struct readdir_data { + struct dir_context ctx; char *dirent; size_t used; int full; @@ -1949,6 +1950,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, int size; loff_t offset; + buf.ctx.actor = nfsd_buffered_filldir; buf.dirent = (void *)__get_free_page(GFP_KERNEL); if (!buf.dirent) return nfserrno(-ENOMEM); @@ -1963,7 +1965,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, buf.used = 0; buf.full = 0; - host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); + host_err = iterate_dir(file, &buf.ctx); if (buf.full) host_err = 0; diff --git a/fs/readdir.c b/fs/readdir.c index fee38e04fae4..5b620a2b45e6 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -20,7 +20,7 @@ #include -int vfs_readdir(struct file *file, filldir_t filler, void *buf) +int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); int res = -ENOTDIR; @@ -37,15 +37,14 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf) res = -ENOENT; if (!IS_DEADDIR(inode)) { - res = file->f_op->readdir(file, buf, filler); + res = file->f_op->readdir(file, ctx, ctx->actor); file_accessed(file); } mutex_unlock(&inode->i_mutex); out: return res; } - -EXPORT_SYMBOL(vfs_readdir); +EXPORT_SYMBOL(iterate_dir); /* * Traditional linux readdir() handling.. @@ -66,6 +65,7 @@ struct old_linux_dirent { }; struct readdir_callback { + struct dir_context ctx; struct old_linux_dirent __user * dirent; int result; }; @@ -73,7 +73,7 @@ struct readdir_callback { static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct readdir_callback * buf = (struct readdir_callback *) __buf; + struct readdir_callback *buf = (struct readdir_callback *) __buf; struct old_linux_dirent __user * dirent; unsigned long d_ino; @@ -112,10 +112,11 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, if (!f.file) return -EBADF; + buf.ctx.actor = fillonedir; buf.result = 0; buf.dirent = dirent; - error = vfs_readdir(f.file, fillonedir, &buf); + error = iterate_dir(f.file, &buf.ctx); if (buf.result) error = buf.result; @@ -137,6 +138,7 @@ struct linux_dirent { }; struct getdents_callback { + struct dir_context ctx; struct linux_dirent __user * current_dir; struct linux_dirent __user * previous; int count; @@ -205,8 +207,9 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = filldir; - error = vfs_readdir(f.file, filldir, &buf); + error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; @@ -221,6 +224,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, } struct getdents_callback64 { + struct dir_context ctx; struct linux_dirent64 __user * current_dir; struct linux_dirent64 __user * previous; int count; @@ -285,8 +289,9 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, buf.previous = NULL; buf.count = count; buf.error = 0; + buf.ctx.actor = filldir64; - error = vfs_readdir(f.file, filldir64, &buf); + error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; lastdirent = buf.previous; diff --git a/include/linux/fs.h b/include/linux/fs.h index 65c2be22b601..643e5b6cbaf5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1506,6 +1506,9 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); * to have different dirent layouts depending on the binary type. */ typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); +struct dir_context { + filldir_t actor; +}; struct block_device_operations; /* These macros are for out of kernel modules to test that @@ -2494,6 +2497,7 @@ loff_t inode_get_bytes(struct inode *inode); void inode_set_bytes(struct inode *inode, loff_t bytes); extern int vfs_readdir(struct file *, filldir_t, void *); +extern int iterate_dir(struct file *, struct dir_context *); extern int vfs_stat(const char __user *, struct kstat *); extern int vfs_lstat(const char __user *, struct kstat *); -- cgit v1.2.3 From 83fd542759010949ac7d9638b615fac1bb9744e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 15 May 2013 18:49:12 -0400 Subject: [readdir] introduce ->iterate(), ctx->pos, dir_emit() New method - ->iterate(file, ctx). That's the replacement for ->readdir(); it takes callback from ctx->actor, uses ctx->pos instead of file->f_pos and calls dir_emit(ctx, ...) instead of filldir(data, ...). It does *not* update file->f_pos (or look at it, for that matter); iterate_dir() does the update. Note that dir_emit() takes the offset from ctx->pos (and eventually filldir_t will lose that argument). Signed-off-by: Al Viro --- arch/parisc/hpux/fs.c | 2 +- fs/coda/dir.c | 19 +++++++++++++++---- fs/compat.c | 4 ++-- fs/exportfs/expfs.c | 2 +- fs/nfsd/nfs4recover.c | 14 ++++++++------ fs/readdir.c | 15 +++++++++++---- include/linux/fs.h | 9 +++++++++ 7 files changed, 47 insertions(+), 18 deletions(-) diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index fc2cbee86e34..eca8230267cc 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -129,7 +129,7 @@ int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned i error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(arg.file->f_pos, &lastdirent->d_off)) + if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index b7d3a05c062c..fc66861b3598 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -391,8 +391,7 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) if (!host_file->f_op) return -ENOTDIR; - if (host_file->f_op->readdir) - { + if (host_file->f_op->readdir) { /* potemkin case: we were handed a directory inode. * We can't use vfs_readdir because we have to keep the file * position in sync between the coda_file and the host_file. @@ -410,8 +409,20 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) coda_file->f_pos = host_file->f_pos; mutex_unlock(&host_inode->i_mutex); - } - else /* Venus: we must read Venus dirents from a file */ + } else if (host_file->f_op->iterate) { + struct inode *host_inode = file_inode(host_file); + struct dir_context *ctx = buf; + + mutex_lock(&host_inode->i_mutex); + ret = -ENOENT; + if (!IS_DEADDIR(host_inode)) { + ret = host_file->f_op->iterate(host_file, ctx); + file_accessed(host_file); + } + mutex_unlock(&host_inode->i_mutex); + + coda_file->f_pos = ctx->pos; + } else /* Venus: we must read Venus dirents from a file */ ret = coda_venus_readdir(coda_file, buf, filldir); return ret; diff --git a/fs/compat.c b/fs/compat.c index 2279b59e81f2..69ca1e301766 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -975,7 +975,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(f.file->f_pos, &lastdirent->d_off)) + if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; @@ -1062,7 +1062,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, error = buf.error; lastdirent = buf.previous; if (lastdirent) { - typeof(lastdirent->d_off) d_off = f.file->f_pos; + typeof(lastdirent->d_off) d_off = buf.ctx.pos; if (__put_user_unaligned(d_off, &lastdirent->d_off)) error = -EFAULT; else diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 7cb190426cec..6c8ef1dd4bdf 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -272,7 +272,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child) goto out; error = -EINVAL; - if (!file->f_op->readdir) + if (!file->f_op->readdir && !file->f_op->iterate) goto out_close; buffer.name = name; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4f8cc6ba7c28..2fa2e2eb190b 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -240,11 +240,16 @@ struct name_list { struct list_head list; }; +struct nfs4_dir_ctx { + struct dir_context ctx; + struct list_head names; +}; + static int nfsd4_build_namelist(void *arg, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct list_head *names = arg; + struct nfs4_dir_ctx *ctx = arg; struct name_list *entry; if (namlen != HEXDIR_LEN - 1) @@ -254,7 +259,7 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen, return -ENOMEM; memcpy(entry->name, name, HEXDIR_LEN - 1); entry->name[HEXDIR_LEN - 1] = '\0'; - list_add(&entry->list, names); + list_add(&entry->list, &ctx->names); return 0; } @@ -263,10 +268,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; struct dentry *dir = nn->rec_file->f_path.dentry; - struct { - struct dir_context ctx; - struct list_head names; - } ctx; + struct nfs4_dir_ctx ctx; int status; status = nfs4_save_creds(&original_cred); diff --git a/fs/readdir.c b/fs/readdir.c index 5b620a2b45e6..5d6578affbbf 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -24,7 +24,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); int res = -ENOTDIR; - if (!file->f_op || !file->f_op->readdir) + if (!file->f_op || (!file->f_op->readdir && !file->f_op->iterate)) goto out; res = security_file_permission(file, MAY_READ); @@ -37,7 +37,14 @@ int iterate_dir(struct file *file, struct dir_context *ctx) res = -ENOENT; if (!IS_DEADDIR(inode)) { - res = file->f_op->readdir(file, ctx, ctx->actor); + if (file->f_op->iterate) { + ctx->pos = file->f_pos; + res = file->f_op->iterate(file, ctx); + file->f_pos = ctx->pos; + } else { + res = file->f_op->readdir(file, ctx, ctx->actor); + ctx->pos = file->f_pos; + } file_accessed(file); } mutex_unlock(&inode->i_mutex); @@ -214,7 +221,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, error = buf.error; lastdirent = buf.previous; if (lastdirent) { - if (put_user(f.file->f_pos, &lastdirent->d_off)) + if (put_user(buf.ctx.pos, &lastdirent->d_off)) error = -EFAULT; else error = count - buf.count; @@ -296,7 +303,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, error = buf.error; lastdirent = buf.previous; if (lastdirent) { - typeof(lastdirent->d_off) d_off = f.file->f_pos; + typeof(lastdirent->d_off) d_off = buf.ctx.pos; if (__put_user(d_off, &lastdirent->d_off)) error = -EFAULT; else diff --git a/include/linux/fs.h b/include/linux/fs.h index 643e5b6cbaf5..b9641ae68da8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1508,7 +1508,15 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); struct dir_context { filldir_t actor; + loff_t pos; }; + +static inline bool dir_emit(struct dir_context *ctx, + const char *name, int namelen, + u64 ino, unsigned type) +{ + return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; +} struct block_device_operations; /* These macros are for out of kernel modules to test that @@ -1525,6 +1533,7 @@ struct file_operations { ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); + int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); -- cgit v1.2.3 From b2497fc3057ae27db9aa29579f16ae5afb6d6d08 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 22 May 2013 22:22:04 -0400 Subject: [readdir] constify ->actor Signed-off-by: Al Viro --- arch/alpha/kernel/osf_sys.c | 13 ++++++------- arch/parisc/hpux/fs.c | 12 +++++------- fs/compat.c | 33 ++++++++++++++------------------- fs/exportfs/expfs.c | 10 +++++----- fs/gfs2/export.c | 6 ++++-- fs/nfsd/nfs4recover.c | 7 ++++--- fs/nfsd/vfs.c | 7 ++++--- fs/readdir.c | 33 ++++++++++++++------------------- include/linux/fs.h | 2 +- 9 files changed, 57 insertions(+), 66 deletions(-) diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index ac19c7299d8e..1402fcc11c2c 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -147,17 +147,16 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, { int error; struct fd arg = fdget(fd); - struct osf_dirent_callback buf; + struct osf_dirent_callback buf = { + .ctx.actor = osf_filldir, + .dirent = dirent, + .basep = basep, + .count = count + }; if (!arg.file) return -EBADF; - buf.dirent = dirent; - buf.basep = basep; - buf.count = count; - buf.error = 0; - buf.ctx.actor = osf_filldir; - error = iterate_dir(arg.file, &buf.ctx); if (error >= 0) error = buf.error; diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index eca8230267cc..88d0962de65a 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -111,19 +111,17 @@ int hpux_getdents(unsigned int fd, struct hpux_dirent __user *dirent, unsigned i { struct fd arg; struct hpux_dirent __user * lastdirent; - struct getdents_callback buf; + struct getdents_callback buf = { + .ctx.actor = filldir, + .current_dir = dirent, + .count = count + }; int error; arg = fdget(fd); if (!arg.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = filldir; - error = iterate_dir(arg.file, &buf.ctx); if (error >= 0) error = buf.error; diff --git a/fs/compat.c b/fs/compat.c index 69ca1e301766..6af20de2c1a3 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -874,15 +874,14 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, { int error; struct fd f = fdget(fd); - struct compat_readdir_callback buf; + struct compat_readdir_callback buf = { + .ctx.actor = compat_fillonedir, + .dirent = dirent + }; if (!f.file) return -EBADF; - buf.result = 0; - buf.dirent = dirent; - buf.ctx.actor = compat_fillonedir; - error = iterate_dir(f.file, &buf.ctx); if (buf.result) error = buf.result; @@ -954,7 +953,11 @@ asmlinkage long compat_sys_getdents(unsigned int fd, { struct fd f; struct compat_linux_dirent __user * lastdirent; - struct compat_getdents_callback buf; + struct compat_getdents_callback buf = { + .ctx.actor = compat_filldir, + .current_dir = dirent, + .count = count + }; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) @@ -964,12 +967,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd, if (!f.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = compat_filldir; - error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; @@ -1041,7 +1038,11 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, { struct fd f; struct linux_dirent64 __user * lastdirent; - struct compat_getdents_callback64 buf; + struct compat_getdents_callback64 buf = { + .ctx.actor = compat_filldir64, + .current_dir = dirent, + .count = count + }; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) @@ -1051,12 +1052,6 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, if (!f.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = compat_filldir64; - error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 6c8ef1dd4bdf..43b448ddc3dc 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -255,7 +255,11 @@ static int get_name(const struct path *path, char *name, struct dentry *child) struct inode *dir = path->dentry->d_inode; int error; struct file *file; - struct getdents_callback buffer; + struct getdents_callback buffer = { + .ctx.actor = filldir_one, + .name = name, + .ino = child->d_inode->i_ino + }; error = -ENOTDIR; if (!dir || !S_ISDIR(dir->i_mode)) @@ -275,11 +279,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child) if (!file->f_op->readdir && !file->f_op->iterate) goto out_close; - buffer.name = name; - buffer.ino = child->d_inode->i_ino; - buffer.found = 0; buffer.sequence = 0; - buffer.ctx.actor = filldir_one; while (1) { int old_seq = buffer.sequence; diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 9973df4ff565..7102c7a5af4d 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -88,7 +88,10 @@ static int gfs2_get_name(struct dentry *parent, char *name, struct inode *dir = parent->d_inode; struct inode *inode = child->d_inode; struct gfs2_inode *dip, *ip; - struct get_name_filldir gnfd; + struct get_name_filldir gnfd = { + .ctx.actor = get_name_filldir, + .name = name + }; struct gfs2_holder gh; u64 offset = 0; int error; @@ -106,7 +109,6 @@ static int gfs2_get_name(struct dentry *parent, char *name, *name = 0; gnfd.inum.no_addr = ip->i_no_addr; gnfd.inum.no_formal_ino = ip->i_no_formal_ino; - gnfd.name = name; error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); if (error) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 2fa2e2eb190b..105a3b080d12 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -268,7 +268,10 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) { const struct cred *original_cred; struct dentry *dir = nn->rec_file->f_path.dentry; - struct nfs4_dir_ctx ctx; + struct nfs4_dir_ctx ctx = { + .ctx.actor = nfsd4_build_namelist, + .names = LIST_HEAD_INIT(ctx.names) + }; int status; status = nfs4_save_creds(&original_cred); @@ -281,8 +284,6 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) return status; } - INIT_LIST_HEAD(&ctx.names); - ctx.ctx.actor = nfsd4_build_namelist; status = iterate_dir(nn->rec_file, &ctx.ctx); mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); while (!list_empty(&ctx.names)) { diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f939ba9bf8e8..a6bc8a7423db 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1944,14 +1944,15 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, struct readdir_cd *cdp, loff_t *offsetp) { - struct readdir_data buf; struct buffered_dirent *de; int host_err; int size; loff_t offset; + struct readdir_data buf = { + .ctx.actor = nfsd_buffered_filldir, + .dirent = (void *)__get_free_page(GFP_KERNEL) + }; - buf.ctx.actor = nfsd_buffered_filldir; - buf.dirent = (void *)__get_free_page(GFP_KERNEL); if (!buf.dirent) return nfserrno(-ENOMEM); diff --git a/fs/readdir.c b/fs/readdir.c index 5d6578affbbf..d46eca8567a4 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -114,15 +114,14 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, { int error; struct fd f = fdget(fd); - struct readdir_callback buf; + struct readdir_callback buf = { + .ctx.actor = fillonedir, + .dirent = dirent + }; if (!f.file) return -EBADF; - buf.ctx.actor = fillonedir; - buf.result = 0; - buf.dirent = dirent; - error = iterate_dir(f.file, &buf.ctx); if (buf.result) error = buf.result; @@ -200,7 +199,11 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, { struct fd f; struct linux_dirent __user * lastdirent; - struct getdents_callback buf; + struct getdents_callback buf = { + .ctx.actor = filldir, + .count = count, + .current_dir = dirent + }; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) @@ -210,12 +213,6 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, if (!f.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = filldir; - error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; @@ -282,7 +279,11 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, { struct fd f; struct linux_dirent64 __user * lastdirent; - struct getdents_callback64 buf; + struct getdents_callback64 buf = { + .ctx.actor = filldir64, + .count = count, + .current_dir = dirent + }; int error; if (!access_ok(VERIFY_WRITE, dirent, count)) @@ -292,12 +293,6 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, if (!f.file) return -EBADF; - buf.current_dir = dirent; - buf.previous = NULL; - buf.count = count; - buf.error = 0; - buf.ctx.actor = filldir64; - error = iterate_dir(f.file, &buf.ctx); if (error >= 0) error = buf.error; diff --git a/include/linux/fs.h b/include/linux/fs.h index b9641ae68da8..9257703686d4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1507,7 +1507,7 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); */ typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); struct dir_context { - filldir_t actor; + const filldir_t actor; loff_t pos; }; -- cgit v1.2.3 From 639839d89f7b7fd15160389f57edf06b93205b2d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Jul 2013 15:20:25 +0300 Subject: Squashfs: sanity check information from disk We read the size of the name from the disk, but a larger name than expected would cause memory corruption. Signed-off-by: Dan Carpenter Signed-off-by: Phillip Lougher --- fs/squashfs/namei.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 7834a517f7f4..f866d42a8b6f 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -79,7 +79,8 @@ static int get_dir_index_using_name(struct super_block *sb, int len) { struct squashfs_sb_info *msblk = sb->s_fs_info; - int i, size, length = 0, err; + int i, length = 0, err; + unsigned int size; struct squashfs_dir_index *index; char *str; @@ -103,6 +104,10 @@ static int get_dir_index_using_name(struct super_block *sb, size = le32_to_cpu(index->size) + 1; + if (size > SQUASHFS_NAME_LEN) { + err = -EINVAL; + break; + } err = squashfs_read_metadata(sb, index->name, &index_start, &index_offset, size); -- cgit v1.2.3 From 706a7943a802fedc861d39598a4399999209e341 Mon Sep 17 00:00:00 2001 From: Manish Sharma Date: Wed, 4 Sep 2013 22:31:23 +0530 Subject: Squashfs: Optimized uncompressed buffer loop Merged the two for loops. We might get a little gain by overlapping wait_on_bh and the memcpy operations. Signed-off-by: Manish Sharma Signed-off-by: Phillip Lougher --- fs/squashfs/block.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index fb50652e4e11..41d108ecc9be 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -167,17 +167,14 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, /* * Block is uncompressed. */ - int i, in, pg_offset = 0; - - for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - } + int in, pg_offset = 0; for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; + wait_on_buffer(bh[k]); + if (!buffer_uptodate(bh[k])) + goto block_release; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { page++; -- cgit v1.2.3 From edd2496ccf4a46cfb4c2d20ff9b93ab29b055e1b Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 3 Sep 2013 04:02:53 +0100 Subject: Squashfs: fix corruption check in get_dir_index_using_name() Patch "Squashfs: sanity check information from disk" from Dan Carpenter adds a missing check for corruption in the "size" field while reading the directory index from disk. It, however, sets err to -EINVAL, this value is not used later, and so setting it is completely redundant. So remove it. Errors in reading the index are deliberately non-fatal. If we get an error in reading the index we just return the part of the index we have managed to read - the index isn't essential, just quicker. Signed-off-by: Phillip Lougher --- fs/squashfs/namei.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index f866d42a8b6f..342a5aa5a0e4 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -104,10 +104,8 @@ static int get_dir_index_using_name(struct super_block *sb, size = le32_to_cpu(index->size) + 1; - if (size > SQUASHFS_NAME_LEN) { - err = -EINVAL; + if (size > SQUASHFS_NAME_LEN) break; - } err = squashfs_read_metadata(sb, index->name, &index_start, &index_offset, size); -- cgit v1.2.3 From 98bdbcf71ac52afdbf832489c6077790689c9018 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 3 Sep 2013 04:21:52 +0100 Subject: Squashfs: fix corruption checks in squashfs_lookup() The dir_count and size fields when read from disk are sanity checked for correctness. However, the sanity checks only check the values are not greater than expected. As dir_count and size were incorrectly defined as signed ints, this can lead to corrupted values appearing as negative which are not trapped. Signed-off-by: Phillip Lougher --- fs/squashfs/namei.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index 342a5aa5a0e4..67cad77fefb4 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -147,7 +147,8 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, struct squashfs_dir_entry *dire; u64 block = squashfs_i(dir)->start + msblk->directory_table; int offset = squashfs_i(dir)->offset; - int err, length, dir_count, size; + int err, length; + unsigned int dir_count, size; TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset); -- cgit v1.2.3 From 4ff8d316da73165acdda9682ac0413b2a141c6d6 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 3 Sep 2013 04:38:43 +0100 Subject: Squashfs: fix corruption checks in squashfs_readdir() The dir_count and size fields when read from disk are sanity checked for correctness. However, the sanity checks only check the values are not greater than expected. As dir_count and size were incorrectly defined as signed ints, this can lead to corrupted values appearing as negative which are not trapped. Signed-off-by: Phillip Lougher --- fs/squashfs/dir.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index f7f527bf8c10..119208422260 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -105,9 +105,8 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; u64 block = squashfs_i(inode)->start + msblk->directory_table; - int offset = squashfs_i(inode)->offset, length, dir_count, size, - type, err; - unsigned int inode_number; + int offset = squashfs_i(inode)->offset, length, type, err; + unsigned int inode_number, dir_count, size; struct squashfs_dir_header dirh; struct squashfs_dir_entry *dire; -- cgit v1.2.3 From 4fed250b48dc5033a0fcada7aa2ca77f507c7ddc Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Tue, 3 Sep 2013 04:52:52 +0100 Subject: Squashfs: add corruption check in get_dir_index_using_offset() We read the size (of the name) field from disk. This value should be sanity checked for correctness to avoid blindly reading huge amounts of unnecessary data from disk on corruption. Note, here we're not actually reading the name into a buffer, but skipping it, and so corruption doesn't cause buffer overflow, merely lots of unnecessary amounts of data to be read. Signed-off-by: Phillip Lougher --- fs/squashfs/dir.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index 119208422260..bd7155b198a9 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -54,6 +54,7 @@ static int get_dir_index_using_offset(struct super_block *sb, { struct squashfs_sb_info *msblk = sb->s_fs_info; int err, i, index, length = 0; + unsigned int size; struct squashfs_dir_index dir_index; TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n", @@ -81,8 +82,14 @@ static int get_dir_index_using_offset(struct super_block *sb, */ break; + size = le32_to_cpu(dir_index.size) + 1; + + /* size should never be larger than SQUASHFS_NAME_LEN */ + if (size > SQUASHFS_NAME_LEN) + break; + err = squashfs_read_metadata(sb, NULL, &index_start, - &index_offset, le32_to_cpu(dir_index.size) + 1); + &index_offset, size); if (err < 0) break; -- cgit v1.2.3 From bb5919cc205072075aab3639a9c62302db148d25 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 4 Sep 2013 02:58:12 +0100 Subject: Squashfs: add corruption check for type in squashfs_readdir() We read the type field from disk. This value should be sanity checked for correctness to avoid an out of bounds access when reading the squashfs_filetype_table array. Signed-off-by: Phillip Lougher --- fs/squashfs/dir.c | 7 +++++-- fs/squashfs/squashfs_fs.h | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index bd7155b198a9..d8c2d747be28 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -112,8 +112,8 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; u64 block = squashfs_i(inode)->start + msblk->directory_table; - int offset = squashfs_i(inode)->offset, length, type, err; - unsigned int inode_number, dir_count, size; + int offset = squashfs_i(inode)->offset, length, err; + unsigned int inode_number, dir_count, size, type; struct squashfs_dir_header dirh; struct squashfs_dir_entry *dire; @@ -206,6 +206,9 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx) ((short) le16_to_cpu(dire->inode_number)); type = le16_to_cpu(dire->type); + if (type > SQUASHFS_MAX_DIR_TYPE) + goto failed_read; + if (!dir_emit(ctx, dire->name, size, inode_number, squashfs_filetype_table[type])) diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 9e2349d07cb1..4b2beda49498 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -87,7 +87,7 @@ #define SQUASHFS_COMP_OPTS(flags) SQUASHFS_BIT(flags, \ SQUASHFS_COMP_OPT) -/* Max number of types and file types */ +/* Inode types including extended types */ #define SQUASHFS_DIR_TYPE 1 #define SQUASHFS_REG_TYPE 2 #define SQUASHFS_SYMLINK_TYPE 3 @@ -103,6 +103,9 @@ #define SQUASHFS_LFIFO_TYPE 13 #define SQUASHFS_LSOCKET_TYPE 14 +/* Max type value stored in directory entry */ +#define SQUASHFS_MAX_DIR_TYPE 7 + /* Xattr types */ #define SQUASHFS_XATTR_USER 0 #define SQUASHFS_XATTR_TRUSTED 1 -- cgit v1.2.3 From 06e4ac0b0a206086bff4cb34c07f5a87e87aef4c Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 13 Nov 2013 02:56:26 +0000 Subject: Squashfs: Refactor decompressor interface and code The decompressor interface and code was written from the point of view of single-threaded operation. In doing so it mixed a lot of single-threaded implementation specific aspects into the decompressor code and elsewhere which makes it difficult to seamlessly support multiple different decompressor implementations. This patch does the following: 1. It removes compressor_options parsing from the decompressor init() function. This allows the decompressor init() function to be dynamically called to instantiate multiple decompressors, without the compressor options needing to be read and parsed each time. 2. It moves threading and all sleeping operations out of the decompressors. In doing so, it makes the decompressors non-blocking wrappers which only deal with interfacing with the decompressor implementation. 3. It splits decompressor.[ch] into decompressor generic functions in decompressor.[ch], and moves the single threaded decompressor implementation into decompressor_single.c. The result of this patch is Squashfs should now be able to support multiple decompressors by adding new decompressor_xxx.c files with specialised implementations of the functions in decompressor_single.c Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/Makefile | 2 +- fs/squashfs/block.c | 11 +++-- fs/squashfs/decompressor.c | 47 ++++++++++++++------- fs/squashfs/decompressor.h | 21 +++------ fs/squashfs/decompressor_single.c | 86 +++++++++++++++++++++++++++++++++++++ fs/squashfs/lzo_wrapper.c | 24 +++-------- fs/squashfs/squashfs.h | 9 +++- fs/squashfs/squashfs_fs_sb.h | 3 +- fs/squashfs/super.c | 10 ++--- fs/squashfs/xz_wrapper.c | 89 +++++++++++++++++++++------------------ fs/squashfs/zlib_wrapper.c | 50 +++++++--------------- 11 files changed, 216 insertions(+), 136 deletions(-) create mode 100644 fs/squashfs/decompressor_single.c diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 110b0476f3b4..c223c8439c21 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-y += namei.o super.o symlink.o decompressor.o decompressor_single.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 41d108ecc9be..4dd402597f22 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -93,7 +93,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, struct buffer_head **bh; int offset = index & ((1 << msblk->devblksize_log2) - 1); u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, page = 0, avail; + int bytes, compressed, b = 0, k = 0, page = 0, avail, i; bh = kcalloc(((srclength + msblk->devblksize - 1) >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); @@ -158,6 +158,12 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, ll_rw_block(READ, b - 1, bh + 1); } + for (i = 0; i < b; i++) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + goto block_release; + } + if (compressed) { length = squashfs_decompress(msblk, buffer, bh, b, offset, length, srclength, pages); @@ -172,9 +178,6 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto block_release; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { page++; diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 3f6271d86abc..234291f79ba5 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -37,29 +37,29 @@ */ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { - NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 + NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 }; #ifndef CONFIG_SQUASHFS_LZO static const struct squashfs_decompressor squashfs_lzo_comp_ops = { - NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 + NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 }; #endif #ifndef CONFIG_SQUASHFS_XZ static const struct squashfs_decompressor squashfs_xz_comp_ops = { - NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 + NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 }; #endif #ifndef CONFIG_SQUASHFS_ZLIB static const struct squashfs_decompressor squashfs_zlib_comp_ops = { - NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 + NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 }; #endif static const struct squashfs_decompressor squashfs_unknown_comp_ops = { - NULL, NULL, NULL, 0, "unknown", 0 + NULL, NULL, NULL, NULL, 0, "unknown", 0 }; static const struct squashfs_decompressor *decompressor[] = { @@ -83,10 +83,10 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) } -void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) +static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; - void *strm, *buffer = NULL; + void *buffer = NULL, *comp_opts; int length = 0; /* @@ -94,23 +94,40 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) */ if (SQUASHFS_COMP_OPTS(flags)) { buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); - if (buffer == NULL) - return ERR_PTR(-ENOMEM); + if (buffer == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } length = squashfs_read_data(sb, &buffer, sizeof(struct squashfs_super_block), 0, NULL, - PAGE_CACHE_SIZE, 1); + PAGE_CACHE_SIZE, 1); if (length < 0) { - strm = ERR_PTR(length); - goto finished; + comp_opts = ERR_PTR(length); + goto out; } } - strm = msblk->decompressor->init(msblk, buffer, length); + comp_opts = squashfs_comp_opts(msblk, buffer, length); -finished: +out: kfree(buffer); + return comp_opts; +} + + +void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + void *stream, *comp_opts = get_comp_opts(sb, flags); + + if (IS_ERR(comp_opts)) + return comp_opts; + + stream = squashfs_decompressor_create(msblk, comp_opts); + if (IS_ERR(stream)) + kfree(comp_opts); - return strm; + return stream; } diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 330073e29029..6cdb20a3878a 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -24,28 +24,21 @@ */ struct squashfs_decompressor { - void *(*init)(struct squashfs_sb_info *, void *, int); + void *(*init)(struct squashfs_sb_info *, void *); + void *(*comp_opts)(struct squashfs_sb_info *, void *, int); void (*free)(void *); - int (*decompress)(struct squashfs_sb_info *, void **, + int (*decompress)(struct squashfs_sb_info *, void *, void **, struct buffer_head **, int, int, int, int, int); int id; char *name; int supported; }; -static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, - void *s) +static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int length) { - if (msblk->decompressor) - msblk->decompressor->free(s); -} - -static inline int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) -{ - return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + return msblk->decompressor->comp_opts ? + msblk->decompressor->comp_opts(msblk, buff, length) : NULL; } #ifdef CONFIG_SQUASHFS_XZ diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c new file mode 100644 index 000000000000..f857cf6f22d4 --- /dev/null +++ b/fs/squashfs/decompressor_single.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements single-threaded decompression in the + * decompressor framework + */ + +struct squashfs_stream { + void *stream; + struct mutex mutex; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + int err = -ENOMEM; + + stream = kmalloc(sizeof(*stream), GFP_KERNEL); + if (stream == NULL) + goto out; + + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + + kfree(comp_opts); + mutex_init(&stream->mutex); + return stream; + +out: + kfree(stream); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + + if (stream) { + msblk->decompressor->free(stream->stream); + kfree(stream); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + + mutex_lock(&stream->mutex); + res = msblk->decompressor->decompress(msblk, stream->stream, buffer, + bh, b, offset, length, srclength, pages); + mutex_unlock(&stream->mutex); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return 1; +} diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 00f4dfc5f088..75c3b5779172 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -37,7 +37,7 @@ struct squashfs_lzo { void *output; }; -static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len) +static void *lzo_init(struct squashfs_sb_info *msblk, void *buff) { int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); @@ -74,22 +74,16 @@ static void lzo_free(void *strm) } -static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) { - struct squashfs_lzo *stream = msblk->stream; + struct squashfs_lzo *stream = strm; void *buff = stream->input; int avail, i, bytes = length, res; size_t out_len = srclength; - mutex_lock(&msblk->read_data_mutex); - for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - avail = min(bytes, msblk->devblksize - offset); memcpy(buff, bh[i]->b_data + offset, avail); buff += avail; @@ -111,17 +105,9 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, bytes -= avail; } - mutex_unlock(&msblk->read_data_mutex); return res; -block_release: - for (; i < b; i++) - put_bh(bh[i]); - failed: - mutex_unlock(&msblk->read_data_mutex); - - ERROR("lzo decompression failed, data probably corrupt\n"); return -EIO; } diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index d1266516ed08..2e2751df8452 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int); /* decompressor.c */ extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); -extern void *squashfs_decompressor_init(struct super_block *, unsigned short); +extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); + +/* decompressor_xxx.c */ +extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); +extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); +extern int squashfs_decompress(struct squashfs_sb_info *, void **, + struct buffer_head **, int, int, int, int, int); +extern int squashfs_max_decompressors(void); /* export.c */ extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64, diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 52934a22f296..9cdcf4150d59 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -63,10 +63,9 @@ struct squashfs_sb_info { __le64 *id_table; __le64 *fragment_index; __le64 *xattr_id_table; - struct mutex read_data_mutex; struct mutex meta_index_mutex; struct meta_index *meta_index; - void *stream; + struct squashfs_stream *stream; __le64 *inode_lookup_table; u64 inode_table; u64 directory_table; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 60553a9053ca..202df6312d4e 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); msblk->devblksize_log2 = ffz(~msblk->devblksize); - mutex_init(&msblk->read_data_mutex); mutex_init(&msblk->meta_index_mutex); /* @@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; /* Allocate read_page block */ - msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size); + msblk->read_page = squashfs_cache_init("data", + squashfs_max_decompressors(), msblk->block_size); if (msblk->read_page == NULL) { ERROR("Failed to allocate read_page block\n"); goto failed_mount; } - msblk->stream = squashfs_decompressor_init(sb, flags); + msblk->stream = squashfs_decompressor_setup(sb, flags); if (IS_ERR(msblk->stream)) { err = PTR_ERR(msblk->stream); msblk->stream = NULL; @@ -336,7 +336,7 @@ failed_mount: squashfs_cache_delete(msblk->block_cache); squashfs_cache_delete(msblk->fragment_cache); squashfs_cache_delete(msblk->read_page); - squashfs_decompressor_free(msblk, msblk->stream); + squashfs_decompressor_destroy(msblk); kfree(msblk->inode_lookup_table); kfree(msblk->fragment_index); kfree(msblk->id_table); @@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb) squashfs_cache_delete(sbi->block_cache); squashfs_cache_delete(sbi->fragment_cache); squashfs_cache_delete(sbi->read_page); - squashfs_decompressor_free(sbi, sbi->stream); + squashfs_decompressor_destroy(sbi); kfree(sbi->id_table); kfree(sbi->fragment_index); kfree(sbi->meta_index); diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 1760b7d108f6..5d1d07cca6b4 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -38,38 +38,63 @@ struct squashfs_xz { struct xz_buf buf; }; -struct comp_opts { +struct disk_comp_opts { __le32 dictionary_size; __le32 flags; }; -static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, - int len) +struct comp_opts { + int dict_size; +}; + +static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int len) { - struct comp_opts *comp_opts = buff; - struct squashfs_xz *stream; - int dict_size = msblk->block_size; - int err, n; + struct disk_comp_opts *comp_opts = buff; + struct comp_opts *opts; + int err = 0, n; + + opts = kmalloc(sizeof(*opts), GFP_KERNEL); + if (opts == NULL) { + err = -ENOMEM; + goto out2; + } if (comp_opts) { /* check compressor options are the expected length */ if (len < sizeof(*comp_opts)) { err = -EIO; - goto failed; + goto out; } - dict_size = le32_to_cpu(comp_opts->dictionary_size); + opts->dict_size = le32_to_cpu(comp_opts->dictionary_size); /* the dictionary size should be 2^n or 2^n+2^(n+1) */ - n = ffs(dict_size) - 1; - if (dict_size != (1 << n) && dict_size != (1 << n) + + n = ffs(opts->dict_size) - 1; + if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) + (1 << (n + 1))) { err = -EIO; - goto failed; + goto out; } - } + } else + /* use defaults */ + opts->dict_size = max_t(int, msblk->block_size, + SQUASHFS_METADATA_SIZE); - dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE); + return opts; + +out: + kfree(opts); +out2: + return ERR_PTR(err); +} + + +static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff) +{ + struct comp_opts *comp_opts = buff; + struct squashfs_xz *stream; + int err; stream = kmalloc(sizeof(*stream), GFP_KERNEL); if (stream == NULL) { @@ -77,7 +102,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, goto failed; } - stream->state = xz_dec_init(XZ_PREALLOC, dict_size); + stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size); if (stream->state == NULL) { kfree(stream); err = -ENOMEM; @@ -103,15 +128,13 @@ static void squashfs_xz_free(void *strm) } -static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) { enum xz_ret xz_err; int avail, total = 0, k = 0, page = 0; - struct squashfs_xz *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + struct squashfs_xz *stream = strm; xz_dec_reset(stream->state); stream->buf.in_pos = 0; @@ -124,10 +147,6 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (stream->buf.in_pos == stream->buf.in_size && k < b) { avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->buf.in = bh[k]->b_data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; @@ -147,23 +166,12 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (xz_err == XZ_OK); - if (xz_err != XZ_STREAM_END) { - ERROR("xz_dec_run error, data probably corrupt\n"); - goto release_mutex; - } - - if (k < b) { - ERROR("xz_uncompress error, input remaining\n"); - goto release_mutex; - } - - total += stream->buf.out_pos; - mutex_unlock(&msblk->read_data_mutex); - return total; + if (xz_err != XZ_STREAM_END || k < b) + goto out; -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return total + stream->buf.out_pos; +out: for (; k < b; k++) put_bh(bh[k]); @@ -172,6 +180,7 @@ release_mutex: const struct squashfs_decompressor squashfs_xz_comp_ops = { .init = squashfs_xz_init, + .comp_opts = squashfs_xz_comp_opts, .free = squashfs_xz_free, .decompress = squashfs_xz_uncompress, .id = XZ_COMPRESSION, diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 55d918fd2d86..bb049027d15c 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -33,7 +33,7 @@ #include "squashfs.h" #include "decompressor.h" -static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len) +static void *zlib_init(struct squashfs_sb_info *dummy, void *buff) { z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); if (stream == NULL) @@ -61,15 +61,13 @@ static void zlib_free(void *strm) } -static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, - struct buffer_head **bh, int b, int offset, int length, int srclength, - int pages) +static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) { int zlib_err, zlib_init = 0; int k = 0, page = 0; - z_stream *stream = msblk->stream; - - mutex_lock(&msblk->read_data_mutex); + z_stream *stream = strm; stream->avail_out = 0; stream->avail_in = 0; @@ -78,10 +76,6 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (stream->avail_in == 0 && k < b) { int avail = min(length, msblk->devblksize - offset); length -= avail; - wait_on_buffer(bh[k]); - if (!buffer_uptodate(bh[k])) - goto release_mutex; - stream->next_in = bh[k]->b_data + offset; stream->avail_in = avail; offset = 0; @@ -94,12 +88,8 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, if (!zlib_init) { zlib_err = zlib_inflateInit(stream); - if (zlib_err != Z_OK) { - ERROR("zlib_inflateInit returned unexpected " - "result 0x%x, srclength %d\n", - zlib_err, srclength); - goto release_mutex; - } + if (zlib_err != Z_OK) + goto out; zlib_init = 1; } @@ -109,29 +99,19 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, put_bh(bh[k++]); } while (zlib_err == Z_OK); - if (zlib_err != Z_STREAM_END) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } + if (zlib_err != Z_STREAM_END) + goto out; zlib_err = zlib_inflateEnd(stream); - if (zlib_err != Z_OK) { - ERROR("zlib_inflate error, data probably corrupt\n"); - goto release_mutex; - } - - if (k < b) { - ERROR("zlib_uncompress error, data remaining\n"); - goto release_mutex; - } + if (zlib_err != Z_OK) + goto out; - length = stream->total_out; - mutex_unlock(&msblk->read_data_mutex); - return length; + if (k < b) + goto out; -release_mutex: - mutex_unlock(&msblk->read_data_mutex); + return stream->total_out; +out: for (; k < b; k++) put_bh(bh[k]); -- cgit v1.2.3 From c0f8b08ad95ca0724ac465c391aef3fc456f5aa9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 28 Oct 2013 14:26:30 +0900 Subject: squashfs: Enhance parallel I/O Now squashfs have used for only one stream buffer for decompression so it hurts parallel read performance so this patch supports multiple decompressor to enhance performance parallel I/O. Four 1G file dd read on KVM machine which has 2 CPU and 4G memory. dd if=test/test1.dat of=/dev/null & dd if=test/test2.dat of=/dev/null & dd if=test/test3.dat of=/dev/null & dd if=test/test4.dat of=/dev/null & old : 1m39s -> new : 9s * From v1 * Change comp_strm with decomp_strm - Phillip * Change/add comments - Phillip Signed-off-by: Minchan Kim Signed-off-by: Phillip Lougher --- fs/squashfs/Kconfig | 13 +++ fs/squashfs/Makefile | 9 +- fs/squashfs/decompressor_multi.c | 200 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 fs/squashfs/decompressor_multi.c diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index c70111ebefd4..1c6d340fc61f 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -63,6 +63,19 @@ config SQUASHFS_LZO If unsure, say N. +config SQUASHFS_MULTI_DECOMPRESSOR + bool "Use multiple decompressors for handling parallel I/O" + depends on SQUASHFS + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + If you have a parallel I/O workload and your system has enough memory, + using this option may improve overall I/O performance. + + If unsure, say N. + config SQUASHFS_XZ bool "Include support for XZ compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index c223c8439c21..dfebc3b12d61 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,8 +4,15 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o decompressor_single.o +squashfs-y += namei.o super.o symlink.o decompressor.o + squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o + +ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR + squashfs-y += decompressor_multi.o +else + squashfs-y += decompressor_single.o +endif diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c new file mode 100644 index 000000000000..462731db5130 --- /dev/null +++ b/fs/squashfs/decompressor_multi.c @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2013 + * Minchan Kim + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression in the + * decompressor framework + */ + + +/* + * The reason that multiply two is that a CPU can request new I/O + * while it is waiting previous request. + */ +#define MAX_DECOMPRESSOR (num_online_cpus() * 2) + + +int squashfs_max_decompressors(void) +{ + return MAX_DECOMPRESSOR; +} + + +struct squashfs_stream { + void *comp_opts; + struct list_head strm_list; + struct mutex mutex; + int avail_decomp; + wait_queue_head_t wait; +}; + + +struct decomp_stream { + void *stream; + struct list_head list; +}; + + +static void put_decomp_stream(struct decomp_stream *decomp_strm, + struct squashfs_stream *stream) +{ + mutex_lock(&stream->mutex); + list_add(&decomp_strm->list, &stream->strm_list); + mutex_unlock(&stream->mutex); + wake_up(&stream->wait); +} + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct decomp_stream *decomp_strm = NULL; + int err = -ENOMEM; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (!stream) + goto out; + + stream->comp_opts = comp_opts; + mutex_init(&stream->mutex); + INIT_LIST_HEAD(&stream->strm_list); + init_waitqueue_head(&stream->wait); + + /* + * We should have a decompressor at least as default + * so if we fail to allocate new decompressor dynamically, + * we could always fall back to default decompressor and + * file system works. + */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto out; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + err = PTR_ERR(decomp_strm->stream); + goto out; + } + + list_add(&decomp_strm->list, &stream->strm_list); + stream->avail_decomp = 1; + return stream; + +out: + kfree(decomp_strm); + kfree(stream); + return ERR_PTR(err); +} + + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream *stream = msblk->stream; + if (stream) { + struct decomp_stream *decomp_strm; + + while (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + msblk->decompressor->free(decomp_strm->stream); + kfree(decomp_strm); + stream->avail_decomp--; + } + } + + WARN_ON(stream->avail_decomp); + kfree(stream->comp_opts); + kfree(stream); +} + + +static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, + struct squashfs_stream *stream) +{ + struct decomp_stream *decomp_strm; + + while (1) { + mutex_lock(&stream->mutex); + + /* There is available decomp_stream */ + if (!list_empty(&stream->strm_list)) { + decomp_strm = list_entry(stream->strm_list.prev, + struct decomp_stream, list); + list_del(&decomp_strm->list); + mutex_unlock(&stream->mutex); + break; + } + + /* + * If there is no available decomp and already full, + * let's wait for releasing decomp from other users. + */ + if (stream->avail_decomp >= MAX_DECOMPRESSOR) + goto wait; + + /* Let's allocate new decomp */ + decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); + if (!decomp_strm) + goto wait; + + decomp_strm->stream = msblk->decompressor->init(msblk, + stream->comp_opts); + if (IS_ERR(decomp_strm->stream)) { + kfree(decomp_strm); + goto wait; + } + + stream->avail_decomp++; + WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR); + + mutex_unlock(&stream->mutex); + break; +wait: + /* + * If system memory is tough, let's for other's + * releasing instead of hurting VM because it could + * make page cache thrashing. + */ + mutex_unlock(&stream->mutex); + wait_event(stream->wait, + !list_empty(&stream->strm_list)); + } + + return decomp_strm; +} + + +int squashfs_decompress(struct squashfs_sb_info *msblk, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) +{ + int res; + struct squashfs_stream *stream = msblk->stream; + struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); + res = msblk->decompressor->decompress(msblk, decomp_stream->stream, + buffer, bh, b, offset, length, srclength, pages); + put_decomp_stream(decomp_stream, stream); + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + return res; +} -- cgit v1.2.3 From 887bd836dc38e36525154e7dc592e5bc5e7e322c Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 18 Nov 2013 02:31:36 +0000 Subject: Squashfs: add multi-threaded decompression using percpu variable Add a multi-threaded decompression implementation which uses percpu variables. Using percpu variables has advantages and disadvantages over implementations which do not use percpu variables. Advantages: * the nature of percpu variables ensures decompression is load-balanced across the multiple cores. * simplicity. Disadvantages: it limits decompression to one thread per core. Signed-off-by: Phillip Lougher --- fs/squashfs/Kconfig | 57 ++++++++++++++----- fs/squashfs/Makefile | 10 +--- fs/squashfs/decompressor_multi_percpu.c | 98 +++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 20 deletions(-) create mode 100644 fs/squashfs/decompressor_multi_percpu.c diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 1c6d340fc61f..159bd6676dc2 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,6 +25,50 @@ config SQUASHFS If unsure, say N. +choice + prompt "Decompressor parallelisation options" + depends on SQUASHFS + help + Squashfs now supports three parallelisation options for + decompression. Each one exhibits various trade-offs between + decompression performance and CPU and memory usage. + + If in doubt, select "Single threaded compression" + +config SQUASHFS_DECOMP_SINGLE + bool "Single threaded compression" + help + Traditionally Squashfs has used single-threaded decompression. + Only one block (data or metadata) can be decompressed at any + one time. This limits CPU and memory usage to a minimum. + +config SQUASHFS_DECOMP_MULTI + bool "Use multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + If you have a parallel I/O workload and your system has enough memory, + using this option may improve overall I/O performance. + + This decompressor implementation uses up to two parallel + decompressors per core. It dynamically allocates decompressors + on a demand basis. + +config SQUASHFS_DECOMP_MULTI_PERCPU + bool "Use percpu multiple decompressors for parallel I/O" + help + By default Squashfs uses a single decompressor but it gives + poor performance on parallel I/O workloads when using multiple CPU + machines due to waiting on decompressor availability. + + This decompressor implementation uses a maximum of one + decompressor per core. It uses percpu variables to ensure + decompression is load-balanced across the cores. + +endchoice + config SQUASHFS_XATTR bool "Squashfs XATTR support" depends on SQUASHFS @@ -63,19 +107,6 @@ config SQUASHFS_LZO If unsure, say N. -config SQUASHFS_MULTI_DECOMPRESSOR - bool "Use multiple decompressors for handling parallel I/O" - depends on SQUASHFS - help - By default Squashfs uses a single decompressor but it gives - poor performance on parallel I/O workloads when using multiple CPU - machines due to waiting on decompressor availability. - - If you have a parallel I/O workload and your system has enough memory, - using this option may improve overall I/O performance. - - If unsure, say N. - config SQUASHFS_XZ bool "Include support for XZ compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index dfebc3b12d61..5833b96ee69c 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -5,14 +5,10 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o squashfs-y += namei.o super.o symlink.o decompressor.o - +squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o +squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o - -ifdef CONFIG_SQUASHFS_MULTI_DECOMPRESSOR - squashfs-y += decompressor_multi.o -else - squashfs-y += decompressor_single.o -endif diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c new file mode 100644 index 000000000000..0e7b679bc4ad --- /dev/null +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "decompressor.h" +#include "squashfs.h" + +/* + * This file implements multi-threaded decompression using percpu + * variables, one thread per cpu core. + */ + +struct squashfs_stream { + void *stream; +}; + +void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, + void *comp_opts) +{ + struct squashfs_stream *stream; + struct squashfs_stream __percpu *percpu; + int err, cpu; + + percpu = alloc_percpu(struct squashfs_stream); + if (percpu == NULL) + return ERR_PTR(-ENOMEM); + + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + stream->stream = msblk->decompressor->init(msblk, comp_opts); + if (IS_ERR(stream->stream)) { + err = PTR_ERR(stream->stream); + goto out; + } + } + + kfree(comp_opts); + return (__force void *) percpu; + +out: + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + if (!IS_ERR_OR_NULL(stream->stream)) + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + return ERR_PTR(err); +} + +void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream; + int cpu; + + if (msblk->stream) { + for_each_possible_cpu(cpu) { + stream = per_cpu_ptr(percpu, cpu); + msblk->decompressor->free(stream->stream); + } + free_percpu(percpu); + } +} + +int squashfs_decompress(struct squashfs_sb_info *msblk, + void **buffer, struct buffer_head **bh, int b, int offset, int length, + int srclength, int pages) +{ + struct squashfs_stream __percpu *percpu = + (struct squashfs_stream __percpu *) msblk->stream; + struct squashfs_stream *stream = get_cpu_ptr(percpu); + int res = msblk->decompressor->decompress(msblk, stream->stream, buffer, + bh, b, offset, length, srclength, pages); + put_cpu_ptr(stream); + + if (res < 0) + ERROR("%s decompression failed, data probably corrupt\n", + msblk->decompressor->name); + + return res; +} + +int squashfs_max_decompressors(void) +{ + return num_possible_cpus(); +} -- cgit v1.2.3 From dad13d68547bf13dde9a4ef7e24659e10250b7b1 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 18 Nov 2013 02:59:12 +0000 Subject: Squashfs: Generalise paging handling in the decompressors Further generalise the decompressors by adding a page handler abstraction. This adds helpers to allow the decompressors to access and process the output buffers in an implementation independant manner. This allows different types of output buffer to be passed to the decompressors, with the implementation specific aspects handled at decompression time, but without the knowledge being held in the decompressor wrapper code. This will allow the decompressors to handle Squashfs cache buffers, and page cache pages. This patch adds the abstraction and an implementation for the caches. Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/block.c | 27 ++++++++++-------- fs/squashfs/cache.c | 28 +++++++++++++++---- fs/squashfs/decompressor.c | 14 ++++++++-- fs/squashfs/decompressor.h | 5 ++-- fs/squashfs/decompressor_multi.c | 7 ++--- fs/squashfs/decompressor_multi_percpu.c | 9 +++--- fs/squashfs/decompressor_single.c | 9 +++--- fs/squashfs/lzo_wrapper.c | 27 ++++++++++++------ fs/squashfs/page_actor.h | 49 +++++++++++++++++++++++++++++++++ fs/squashfs/squashfs.h | 8 +++--- fs/squashfs/squashfs_fs_sb.h | 1 + fs/squashfs/xz_wrapper.c | 22 +++++++++------ fs/squashfs/zlib_wrapper.c | 24 ++++++++++------ 13 files changed, 163 insertions(+), 67 deletions(-) create mode 100644 fs/squashfs/page_actor.h diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 4dd402597f22..0cea9b9236d0 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -36,6 +36,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" /* * Read the metadata block length, this is stored in the first two @@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb, * generated a larger block - this does occasionally happen with compression * algorithms). */ -int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, - int length, u64 *next_index, int srclength, int pages) +int squashfs_read_data(struct super_block *sb, u64 index, int length, + u64 *next_index, struct squashfs_page_actor *output) { struct squashfs_sb_info *msblk = sb->s_fs_info; struct buffer_head **bh; int offset = index & ((1 << msblk->devblksize_log2) - 1); u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, page = 0, avail, i; + int bytes, compressed, b = 0, k = 0, avail, i; - bh = kcalloc(((srclength + msblk->devblksize - 1) + bh = kcalloc(((output->length + msblk->devblksize - 1) >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); if (bh == NULL) return -ENOMEM; @@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, *next_index = index + length; TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", - index, compressed ? "" : "un", length, srclength); + index, compressed ? "" : "un", length, output->length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto read_failure; @@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, TRACE("Block @ 0x%llx, %scompressed size %d\n", index, compressed ? "" : "un", length); - if (length < 0 || length > srclength || + if (length < 0 || length > output->length || (index + length) > msblk->bytes_used) goto block_release; @@ -165,8 +166,8 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, } if (compressed) { - length = squashfs_decompress(msblk, buffer, bh, b, offset, - length, srclength, pages); + length = squashfs_decompress(msblk, bh, b, offset, length, + output); if (length < 0) goto read_failure; } else { @@ -174,19 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, * Block is uncompressed. */ int in, pg_offset = 0; + void *data = squashfs_first_page(output); for (bytes = length; k < b; k++) { in = min(bytes, msblk->devblksize - offset); bytes -= in; while (in) { if (pg_offset == PAGE_CACHE_SIZE) { - page++; + data = squashfs_next_page(output); pg_offset = 0; } avail = min_t(int, in, PAGE_CACHE_SIZE - pg_offset); - memcpy(buffer[page] + pg_offset, - bh[k]->b_data + offset, avail); + memcpy(data + pg_offset, bh[k]->b_data + offset, + avail); in -= avail; pg_offset += avail; offset += avail; @@ -194,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, offset = 0; put_bh(bh[k]); } + squashfs_finish_page(output); } kfree(bh); diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index af0b73802592..1cb70a0b2168 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c @@ -56,6 +56,7 @@ #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" +#include "page_actor.h" /* * Look-up block in cache, and increment usage count. If not in cache, read @@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, entry->error = 0; spin_unlock(&cache->lock); - entry->length = squashfs_read_data(sb, entry->data, - block, length, &entry->next_index, - cache->block_size, cache->pages); + entry->length = squashfs_read_data(sb, block, length, + &entry->next_index, entry->actor); spin_lock(&cache->lock); @@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) kfree(cache->entry[i].data[j]); kfree(cache->entry[i].data); } + kfree(cache->entry[i].actor); } kfree(cache->entry); @@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, goto cleanup; } } + + entry->actor = squashfs_page_actor_init(entry->data, + cache->pages, 0); + if (entry->actor == NULL) { + ERROR("Failed to allocate %s cache entry\n", name); + goto cleanup; + } } return cache; @@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; int i, res; void *table, *buffer, **data; + struct squashfs_page_actor *actor; table = buffer = kmalloc(length, GFP_KERNEL); if (table == NULL) @@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) goto failed; } + actor = squashfs_page_actor_init(data, pages, length); + if (actor == NULL) { + res = -ENOMEM; + goto failed2; + } + for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) data[i] = buffer; - res = squashfs_read_data(sb, data, block, length | - SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); + res = squashfs_read_data(sb, block, length | + SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); kfree(data); + kfree(actor); if (res < 0) goto failed; return table; +failed2: + kfree(data); failed: kfree(table); return ERR_PTR(res); diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 234291f79ba5..ac22fe73b0ad 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -30,6 +30,7 @@ #include "squashfs_fs_sb.h" #include "decompressor.h" #include "squashfs.h" +#include "page_actor.h" /* * This file (and decompressor.h) implements a decompressor framework for @@ -87,6 +88,7 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags) { struct squashfs_sb_info *msblk = sb->s_fs_info; void *buffer = NULL, *comp_opts; + struct squashfs_page_actor *actor = NULL; int length = 0; /* @@ -99,9 +101,14 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags) goto out; } - length = squashfs_read_data(sb, &buffer, - sizeof(struct squashfs_super_block), 0, NULL, - PAGE_CACHE_SIZE, 1); + actor = squashfs_page_actor_init(&buffer, 1, 0); + if (actor == NULL) { + comp_opts = ERR_PTR(-ENOMEM); + goto out; + } + + length = squashfs_read_data(sb, + sizeof(struct squashfs_super_block), 0, NULL, actor); if (length < 0) { comp_opts = ERR_PTR(length); @@ -112,6 +119,7 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags) comp_opts = squashfs_comp_opts(msblk, buffer, length); out: + kfree(actor); kfree(buffer); return comp_opts; } diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 6cdb20a3878a..af0985321808 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -27,8 +27,9 @@ struct squashfs_decompressor { void *(*init)(struct squashfs_sb_info *, void *); void *(*comp_opts)(struct squashfs_sb_info *, void *, int); void (*free)(void *); - int (*decompress)(struct squashfs_sb_info *, void *, void **, - struct buffer_head **, int, int, int, int, int); + int (*decompress)(struct squashfs_sb_info *, void *, + struct buffer_head **, int, int, int, + struct squashfs_page_actor *); int id; char *name; int supported; diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c index 462731db5130..ae54675a3526 100644 --- a/fs/squashfs/decompressor_multi.c +++ b/fs/squashfs/decompressor_multi.c @@ -183,15 +183,14 @@ wait: } -int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) { int res; struct squashfs_stream *stream = msblk->stream; struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); res = msblk->decompressor->decompress(msblk, decomp_stream->stream, - buffer, bh, b, offset, length, srclength, pages); + bh, b, offset, length, output); put_decomp_stream(decomp_stream, stream); if (res < 0) ERROR("%s decompression failed, data probably corrupt\n", diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index 0e7b679bc4ad..23a9c28ad8ea 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -74,15 +74,14 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) } } -int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) { struct squashfs_stream __percpu *percpu = (struct squashfs_stream __percpu *) msblk->stream; struct squashfs_stream *stream = get_cpu_ptr(percpu); - int res = msblk->decompressor->decompress(msblk, stream->stream, buffer, - bh, b, offset, length, srclength, pages); + int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); put_cpu_ptr(stream); if (res < 0) diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c index f857cf6f22d4..a6c75929a00e 100644 --- a/fs/squashfs/decompressor_single.c +++ b/fs/squashfs/decompressor_single.c @@ -61,16 +61,15 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) } } -int squashfs_decompress(struct squashfs_sb_info *msblk, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) +int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, + int b, int offset, int length, struct squashfs_page_actor *output) { int res; struct squashfs_stream *stream = msblk->stream; mutex_lock(&stream->mutex); - res = msblk->decompressor->decompress(msblk, stream->stream, buffer, - bh, b, offset, length, srclength, pages); + res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + offset, length, output); mutex_unlock(&stream->mutex); if (res < 0) diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 75c3b5779172..244b9fbfff7b 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -31,6 +31,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_lzo { void *input; @@ -75,13 +76,13 @@ static void lzo_free(void *strm) static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { struct squashfs_lzo *stream = strm; - void *buff = stream->input; + void *buff = stream->input, *data; int avail, i, bytes = length, res; - size_t out_len = srclength; + size_t out_len = output->length; for (i = 0; i < b; i++) { avail = min(bytes, msblk->devblksize - offset); @@ -98,12 +99,20 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, goto failed; res = bytes = (int)out_len; - for (i = 0, buff = stream->output; bytes && i < pages; i++) { - avail = min_t(int, bytes, PAGE_CACHE_SIZE); - memcpy(buffer[i], buff, avail); - buff += avail; - bytes -= avail; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_CACHE_SIZE) { + memcpy(data, buff, bytes); + break; + } else { + memcpy(data, buff, PAGE_CACHE_SIZE); + buff += PAGE_CACHE_SIZE; + bytes -= PAGE_CACHE_SIZE; + data = squashfs_next_page(output); + } } + squashfs_finish_page(output); return res; diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h new file mode 100644 index 000000000000..5b0ba5a7133a --- /dev/null +++ b/fs/squashfs/page_actor.h @@ -0,0 +1,49 @@ +#ifndef PAGE_ACTOR_H +#define PAGE_ACTOR_H +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +struct squashfs_page_actor { + void **page; + int pages; + int length; + int next_page; +}; + +static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + return actor; +} + +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->page[0]; +} + +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->next_page == actor->pages ? NULL : + actor->page[actor->next_page++]; +} + +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} +#endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 2e2751df8452..6a97e63ca173 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -28,8 +28,8 @@ #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) /* block.c */ -extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, - int, int); +extern int squashfs_read_data(struct super_block *, u64, int, u64 *, + struct squashfs_page_actor *); /* cache.c */ extern struct squashfs_cache *squashfs_cache_init(char *, int, int); @@ -53,8 +53,8 @@ extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); /* decompressor_xxx.c */ extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); -extern int squashfs_decompress(struct squashfs_sb_info *, void **, - struct buffer_head **, int, int, int, int, int); +extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **, + int, int, int, struct squashfs_page_actor *); extern int squashfs_max_decompressors(void); /* export.c */ diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 9cdcf4150d59..1da565cb50c3 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -50,6 +50,7 @@ struct squashfs_cache_entry { wait_queue_head_t wait_queue; struct squashfs_cache *cache; void **data; + struct squashfs_page_actor *actor; }; struct squashfs_sb_info { diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 5d1d07cca6b4..c609624e4b8a 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -32,6 +32,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" struct squashfs_xz { struct xz_dec *state; @@ -129,11 +130,11 @@ static void squashfs_xz_free(void *strm) static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { enum xz_ret xz_err; - int avail, total = 0, k = 0, page = 0; + int avail, total = 0, k = 0; struct squashfs_xz *stream = strm; xz_dec_reset(stream->state); @@ -141,7 +142,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, stream->buf.in_size = 0; stream->buf.out_pos = 0; stream->buf.out_size = PAGE_CACHE_SIZE; - stream->buf.out = buffer[page++]; + stream->buf.out = squashfs_first_page(output); do { if (stream->buf.in_pos == stream->buf.in_size && k < b) { @@ -153,11 +154,12 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, offset = 0; } - if (stream->buf.out_pos == stream->buf.out_size - && page < pages) { - stream->buf.out = buffer[page++]; - stream->buf.out_pos = 0; - total += PAGE_CACHE_SIZE; + if (stream->buf.out_pos == stream->buf.out_size) { + stream->buf.out = squashfs_next_page(output); + if (stream->buf.out != NULL) { + stream->buf.out_pos = 0; + total += PAGE_CACHE_SIZE; + } } xz_err = xz_dec_run(stream->state, &stream->buf); @@ -166,6 +168,8 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, put_bh(bh[k++]); } while (xz_err == XZ_OK); + squashfs_finish_page(output); + if (xz_err != XZ_STREAM_END || k < b) goto out; diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index bb049027d15c..8727caba6882 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -32,6 +32,7 @@ #include "squashfs_fs_sb.h" #include "squashfs.h" #include "decompressor.h" +#include "page_actor.h" static void *zlib_init(struct squashfs_sb_info *dummy, void *buff) { @@ -62,14 +63,14 @@ static void zlib_free(void *strm) static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, - void **buffer, struct buffer_head **bh, int b, int offset, int length, - int srclength, int pages) + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) { - int zlib_err, zlib_init = 0; - int k = 0, page = 0; + int zlib_err, zlib_init = 0, k = 0; z_stream *stream = strm; - stream->avail_out = 0; + stream->avail_out = PAGE_CACHE_SIZE; + stream->next_out = squashfs_first_page(output); stream->avail_in = 0; do { @@ -81,15 +82,18 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, offset = 0; } - if (stream->avail_out == 0 && page < pages) { - stream->next_out = buffer[page++]; - stream->avail_out = PAGE_CACHE_SIZE; + if (stream->avail_out == 0) { + stream->next_out = squashfs_next_page(output); + if (stream->next_out != NULL) + stream->avail_out = PAGE_CACHE_SIZE; } if (!zlib_init) { zlib_err = zlib_inflateInit(stream); - if (zlib_err != Z_OK) + if (zlib_err != Z_OK) { + squashfs_finish_page(output); goto out; + } zlib_init = 1; } @@ -99,6 +103,8 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, put_bh(bh[k++]); } while (zlib_err == Z_OK); + squashfs_finish_page(output); + if (zlib_err != Z_STREAM_END) goto out; -- cgit v1.2.3 From 4ff4dc4d1508e64cb642dc1d1380085af47d1d6e Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 31 Oct 2013 19:24:27 +0000 Subject: Squashfs: Restructure squashfs_readpage() Restructure squashfs_readpage() splitting it into separate functions for datablocks, fragments and sparse blocks. Move the memcpying (from squashfs cache entry) implementation of squashfs_readpage_block into file_cache.c This allows different implementations to be supported. Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/Makefile | 2 +- fs/squashfs/file.c | 142 ++++++++++++++++++++++++----------------------- fs/squashfs/file_cache.c | 38 +++++++++++++ fs/squashfs/squashfs.h | 7 +++ 4 files changed, 118 insertions(+), 71 deletions(-) create mode 100644 fs/squashfs/file_cache.c diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 5833b96ee69c..e01ba1126c89 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-y += namei.o super.o symlink.o decompressor.o file_cache.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 8ca62c28fe12..e5c9689062ba 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c @@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) return le32_to_cpu(size); } - -static int squashfs_readpage(struct file *file, struct page *page) +/* Copy data into page cache */ +void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, + int bytes, int offset) { struct inode *inode = page->mapping->host; struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; - int bytes, i, offset = 0, sparse = 0; - struct squashfs_cache_entry *buffer = NULL; void *pageaddr; - - int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; - int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); - int start_index = page->index & ~mask; - int end_index = start_index | mask; - int file_end = i_size_read(inode) >> msblk->block_log; - - TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", - page->index, squashfs_i(inode)->start); - - if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT)) - goto out; - - if (index < file_end || squashfs_i(inode)->fragment_block == - SQUASHFS_INVALID_BLK) { - /* - * Reading a datablock from disk. Need to read block list - * to get location and block size. - */ - u64 block = 0; - int bsize = read_blocklist(inode, index, &block); - if (bsize < 0) - goto error_out; - - if (bsize == 0) { /* hole */ - bytes = index == file_end ? - (i_size_read(inode) & (msblk->block_size - 1)) : - msblk->block_size; - sparse = 1; - } else { - /* - * Read and decompress datablock. - */ - buffer = squashfs_get_datablock(inode->i_sb, - block, bsize); - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x" - "\n", block, bsize); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = buffer->length; - } - } else { - /* - * Datablock is stored inside a fragment (tail-end packed - * block). - */ - buffer = squashfs_get_fragment(inode->i_sb, - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - - if (buffer->error) { - ERROR("Unable to read page, block %llx, size %x\n", - squashfs_i(inode)->fragment_block, - squashfs_i(inode)->fragment_size); - squashfs_cache_put(buffer); - goto error_out; - } - bytes = i_size_read(inode) & (msblk->block_size - 1); - offset = squashfs_i(inode)->fragment_offset; - } + int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = page->index & ~mask, end_index = start_index | mask; /* * Loop copying datablock into pages. As the datablock likely covers @@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page) for (i = start_index; i <= end_index && bytes > 0; i++, bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { struct page *push_page; - int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE); + int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0; TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); @@ -475,11 +413,75 @@ skip_page: if (i != page->index) page_cache_release(push_page); } +} + +/* Read datablock stored packed inside a fragment (tail-end packed block) */ +static int squashfs_readpage_fragment(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + else + squashfs_copy_cache(page, buffer, i_size_read(inode) & + (msblk->block_size - 1), + squashfs_i(inode)->fragment_offset); + + squashfs_cache_put(buffer); + return res; +} - if (!sparse) - squashfs_cache_put(buffer); +static int squashfs_readpage_sparse(struct page *page, int index, int file_end) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int bytes = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + squashfs_copy_cache(page, NULL, bytes, 0); return 0; +} + +static int squashfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); + int file_end = i_size_read(inode) >> msblk->block_log; + int res; + void *pageaddr; + + TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", + page->index, squashfs_i(inode)->start); + + if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + goto out; + + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + if (bsize < 0) + goto error_out; + + if (bsize == 0) + res = squashfs_readpage_sparse(page, index, file_end); + else + res = squashfs_readpage_block(page, block, bsize); + } else + res = squashfs_readpage_fragment(page); + + if (!res) + return 0; error_out: SetPageError(page); diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c new file mode 100644 index 000000000000..f2310d2a2019 --- /dev/null +++ b/fs/squashfs/file_cache.c @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* Read separately compressed datablock and memcopy into page cache */ +int squashfs_readpage_block(struct page *page, u64 block, int bsize) +{ + struct inode *i = page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int res = buffer->error; + + if (res) + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + else + squashfs_copy_cache(page, buffer, buffer->length, 0); + + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 6a97e63ca173..9e1bb79f7e6f 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -66,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); extern __le64 *squashfs_read_fragment_index_table(struct super_block *, u64, u64, unsigned int); +/* file.c */ +void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, + int); + +/* file_xxx.c */ +extern int squashfs_readpage_block(struct page *, u64, int); + /* id.c */ extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64, -- cgit v1.2.3 From a7b16c939e7759d65246b5363aeca1fac5999b62 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Wed, 13 Nov 2013 02:04:19 +0000 Subject: Squashfs: Directly decompress into the page cache for file data This introduces an implementation of squashfs_readpage_block() that directly decompresses into the page cache. This uses the previously added page handler abstraction to push down the necessary kmap_atomic/kunmap_atomic operations on the page cache buffers into the decompressors. This enables direct copying into the page cache without using the slow kmap/kunmap calls. The code detects when multiple threads are racing in squashfs_readpage() to decompress the same block, and avoids this regression by falling back to using an intermediate buffer. This patch enhances the performance of Squashfs significantly when multiple processes are accessing the filesystem simultaneously because it not only reduces memcopying, but it more importantly eliminates the lock contention on the intermediate buffer. Using single-thread decompression. dd if=file1 of=/dev/null bs=4096 & dd if=file2 of=/dev/null bs=4096 & dd if=file3 of=/dev/null bs=4096 & dd if=file4 of=/dev/null bs=4096 Before: 629145600 bytes (629 MB) copied, 45.8046 s, 13.7 MB/s After: 629145600 bytes (629 MB) copied, 9.29414 s, 67.7 MB/s Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/Kconfig | 28 ++++++++ fs/squashfs/Makefile | 4 +- fs/squashfs/file_direct.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++ fs/squashfs/page_actor.c | 100 +++++++++++++++++++++++++++ fs/squashfs/page_actor.h | 32 +++++++++ 5 files changed, 336 insertions(+), 1 deletion(-) create mode 100644 fs/squashfs/file_direct.c create mode 100644 fs/squashfs/page_actor.c diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 159bd6676dc2..b6fa8657dcbc 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -25,6 +25,34 @@ config SQUASHFS If unsure, say N. +choice + prompt "File decompression options" + depends on SQUASHFS + help + Squashfs now supports two options for decompressing file + data. Traditionally Squashfs has decompressed into an + intermediate buffer and then memcopied it into the page cache. + Squashfs now supports the ability to decompress directly into + the page cache. + + If unsure, select "Decompress file data into an intermediate buffer" + +config SQUASHFS_FILE_CACHE + bool "Decompress file data into an intermediate buffer" + help + Decompress file data into an intermediate buffer and then + memcopy it into the page cache. + +config SQUASHFS_FILE_DIRECT + bool "Decompress files directly into the page cache" + help + Directly decompress file data into the page cache. + Doing so can significantly improve performance because + it eliminates a memcpy and it also removes the lock contention + on the single buffer. + +endchoice + choice prompt "Decompressor parallelisation options" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index e01ba1126c89..4132520b4ff2 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,7 +4,9 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o decompressor.o file_cache.o +squashfs-y += namei.o super.o symlink.o decompressor.o +squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o +squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c new file mode 100644 index 000000000000..2943b2bfae48 --- /dev/null +++ b/fs/squashfs/file_direct.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" +#include "page_actor.h" + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page); + +/* Read separately compressed datablock directly into page cache */ +int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) + +{ + struct inode *inode = target_page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + + int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int start_index = target_page->index & ~mask; + int end_index = start_index | mask; + int i, n, pages, missing_pages, bytes, res = -ENOMEM; + struct page **page; + struct squashfs_page_actor *actor; + void *pageaddr; + + if (end_index > file_end) + end_index = file_end; + + pages = end_index - start_index + 1; + + page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); + if (page == NULL) + return res; + + /* + * Create a "page actor" which will kmap and kunmap the + * page cache pages appropriately within the decompressor + */ + actor = squashfs_page_actor_init_special(page, pages, 0); + if (actor == NULL) + goto out; + + /* Try to grab all the pages covered by the Squashfs block */ + for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { + page[i] = (n == target_page->index) ? target_page : + grab_cache_page_nowait(target_page->mapping, n); + + if (page[i] == NULL) { + missing_pages++; + continue; + } + + if (PageUptodate(page[i])) { + unlock_page(page[i]); + page_cache_release(page[i]); + page[i] = NULL; + missing_pages++; + } + } + + if (missing_pages) { + /* + * Couldn't get one or more pages, this page has either + * been VM reclaimed, but others are still in the page cache + * and uptodate, or we're racing with another thread in + * squashfs_readpage also trying to grab them. Fall back to + * using an intermediate buffer. + */ + res = squashfs_read_cache(target_page, block, bsize, pages, + page); + goto out; + } + + /* Decompress directly into the page cache buffers */ + res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); + if (res < 0) + goto mark_errored; + + /* Last page may have trailing bytes not filled */ + bytes = res % PAGE_CACHE_SIZE; + if (bytes) { + pageaddr = kmap_atomic(page[pages - 1]); + memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + kunmap_atomic(pageaddr); + } + + /* Mark pages as uptodate, unlock and release */ + for (i = 0; i < pages; i++) { + flush_dcache_page(page[i]); + SetPageUptodate(page[i]); + unlock_page(page[i]); + if (page[i] != target_page) + page_cache_release(page[i]); + } + + kfree(actor); + kfree(page); + + return 0; + +mark_errored: + /* Decompression failed, mark pages as errored. Target_page is + * dealt with by the caller + */ + for (i = 0; i < pages; i++) { + if (page[i] == target_page) + continue; + flush_dcache_page(page[i]); + SetPageError(page[i]); + unlock_page(page[i]); + page_cache_release(page[i]); + } + +out: + kfree(actor); + kfree(page); + return res; +} + + +static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, + int pages, struct page **page) +{ + struct inode *i = target_page->mapping->host; + struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, + block, bsize); + int bytes = buffer->length, res = buffer->error, n, offset = 0; + void *pageaddr; + + if (res) { + ERROR("Unable to read page, block %llx, size %x\n", block, + bsize); + goto out; + } + + for (n = 0; n < pages && bytes > 0; n++, + bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { + int avail = min_t(int, bytes, PAGE_CACHE_SIZE); + + if (page[n] == NULL) + continue; + + pageaddr = kmap_atomic(page[n]); + squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); + kunmap_atomic(pageaddr); + flush_dcache_page(page[n]); + SetPageUptodate(page[n]); + unlock_page(page[n]); + if (page[n] != target_page) + page_cache_release(page[n]); + } + +out: + squashfs_cache_put(buffer); + return res; +} diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c new file mode 100644 index 000000000000..5a1c11f56441 --- /dev/null +++ b/fs/squashfs/page_actor.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2013 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include "page_actor.h" + +/* + * This file contains implementations of page_actor for decompressing into + * an intermediate buffer, and for decompressing directly into the + * page cache. + * + * Calling code should avoid sleeping between calls to squashfs_first_page() + * and squashfs_finish_page(). + */ + +/* Implementation of page_actor for decompressing into intermediate buffer */ +static void *cache_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->buffer[0]; +} + +static void *cache_next_page(struct squashfs_page_actor *actor) +{ + if (actor->next_page == actor->pages) + return NULL; + + return actor->buffer[actor->next_page++]; +} + +static void cache_finish_page(struct squashfs_page_actor *actor) +{ + /* empty */ +} + +struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->buffer = buffer; + actor->pages = pages; + actor->next_page = 0; + actor->squashfs_first_page = cache_first_page; + actor->squashfs_next_page = cache_next_page; + actor->squashfs_finish_page = cache_finish_page; + return actor; +} + +/* Implementation of page_actor for decompressing directly into page cache. */ +static void *direct_first_page(struct squashfs_page_actor *actor) +{ + actor->next_page = 1; + return actor->pageaddr = kmap_atomic(actor->page[0]); +} + +static void *direct_next_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); + + return actor->pageaddr = actor->next_page == actor->pages ? NULL : + kmap_atomic(actor->page[actor->next_page++]); +} + +static void direct_finish_page(struct squashfs_page_actor *actor) +{ + if (actor->pageaddr) + kunmap_atomic(actor->pageaddr); +} + +struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, + int pages, int length) +{ + struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); + + if (actor == NULL) + return NULL; + + actor->length = length ? : pages * PAGE_CACHE_SIZE; + actor->page = page; + actor->pages = pages; + actor->next_page = 0; + actor->pageaddr = NULL; + actor->squashfs_first_page = direct_first_page; + actor->squashfs_next_page = direct_next_page; + actor->squashfs_finish_page = direct_finish_page; + return actor; +} diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h index 5b0ba5a7133a..26dd82008b82 100644 --- a/fs/squashfs/page_actor.h +++ b/fs/squashfs/page_actor.h @@ -8,6 +8,7 @@ * the COPYING file in the top-level directory. */ +#ifndef CONFIG_SQUASHFS_FILE_DIRECT struct squashfs_page_actor { void **page; int pages; @@ -46,4 +47,35 @@ static inline void squashfs_finish_page(struct squashfs_page_actor *actor) { /* empty */ } +#else +struct squashfs_page_actor { + union { + void **buffer; + struct page **page; + }; + void *pageaddr; + void *(*squashfs_first_page)(struct squashfs_page_actor *); + void *(*squashfs_next_page)(struct squashfs_page_actor *); + void (*squashfs_finish_page)(struct squashfs_page_actor *); + int pages; + int length; + int next_page; +}; + +extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); +extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page + **, int, int); +static inline void *squashfs_first_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_first_page(actor); +} +static inline void *squashfs_next_page(struct squashfs_page_actor *actor) +{ + return actor->squashfs_next_page(actor); +} +static inline void squashfs_finish_page(struct squashfs_page_actor *actor) +{ + actor->squashfs_finish_page(actor); +} +#endif #endif -- cgit v1.2.3 From 6a1e8fd1e4dc7c53d67f4091bea0cfd2139901d0 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Sun, 10 Nov 2013 00:02:29 +0000 Subject: Squashfs: Check stream is not NULL in decompressor_multi.c Fix static checker complaint that stream is not checked in squashfs_decompressor_destroy(). Reported-by: Dan Carpenter Signed-off-by: Phillip Lougher Reviewed-by: Minchan Kim --- fs/squashfs/decompressor_multi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c index ae54675a3526..d6008a636479 100644 --- a/fs/squashfs/decompressor_multi.c +++ b/fs/squashfs/decompressor_multi.c @@ -119,11 +119,10 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) kfree(decomp_strm); stream->avail_decomp--; } + WARN_ON(stream->avail_decomp); + kfree(stream->comp_opts); + kfree(stream); } - - WARN_ON(stream->avail_decomp); - kfree(stream->comp_opts); - kfree(stream); } -- cgit v1.2.3 From 3a2109763d60cf4d0d92f7ae3c6e2c268fb7de04 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Sun, 24 Nov 2013 00:40:49 +0000 Subject: Squashfs: fix failure to unlock pages on decompress error Direct decompression into the page cache. If we fall back to using an intermediate buffer (because we cannot grab all the page cache pages) and we get a decompress fail, we forgot to release the pages. Reported-by: Roman Peniaev Signed-off-by: Phillip Lougher --- fs/squashfs/file_direct.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 2943b2bfae48..62a0de6632e1 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -84,6 +84,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) */ res = squashfs_read_cache(target_page, block, bsize, pages, page); + if (res < 0) + goto mark_errored; + goto out; } @@ -119,7 +122,7 @@ mark_errored: * dealt with by the caller */ for (i = 0; i < pages; i++) { - if (page[i] == target_page) + if (page[i] == NULL || page[i] == target_page) continue; flush_dcache_page(page[i]); SetPageError(page[i]); -- cgit v1.2.3 From e57e5b786fcfe069afdfa8ae3abb1287ffe3d687 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 13 Mar 2014 10:14:33 -0400 Subject: fs: push sync_filesystem() down to the file system's remount_fs() Previously, the no-op "mount -o mount /dev/xxx" operation when the file system is already mounted read-write causes an implied, unconditional syncfs(). This seems pretty stupid, and it's certainly documented or guaraunteed to do this, nor is it particularly useful, except in the case where the file system was mounted rw and is getting remounted read-only. However, it's possible that there might be some file systems that are actually depending on this behavior. In most file systems, it's probably fine to only call sync_filesystem() when transitioning from read-write to read-only, and there are some file systems where this is not needed at all (for example, for a pseudo-filesystem or something like romfs). Signed-off-by: "Theodore Ts'o" Cc: linux-fsdevel@vger.kernel.org Cc: Christoph Hellwig Cc: Artem Bityutskiy Cc: Adrian Hunter Cc: Evgeniy Dushistov Cc: Jan Kara Cc: OGAWA Hirofumi Cc: Anders Larsen Cc: Phillip Lougher Cc: Kees Cook Cc: Mikulas Patocka Cc: Petr Vandrovec Cc: xfs@oss.sgi.com Cc: linux-btrfs@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: codalist@coda.cs.cmu.edu Cc: linux-ext4@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: fuse-devel@lists.sourceforge.net Cc: cluster-devel@redhat.com Cc: linux-mtd@lists.infradead.org Cc: jfs-discussion@lists.sourceforge.net Cc: linux-nfs@vger.kernel.org Cc: linux-nilfs@vger.kernel.org Cc: linux-ntfs-dev@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Cc: reiserfs-devel@vger.kernel.org --- fs/adfs/super.c | 1 + fs/affs/super.c | 1 + fs/befs/linuxvfs.c | 1 + fs/btrfs/super.c | 1 + fs/cifs/cifsfs.c | 1 + fs/coda/inode.c | 1 + fs/cramfs/inode.c | 1 + fs/debugfs/inode.c | 1 + fs/devpts/inode.c | 1 + fs/efs/super.c | 1 + fs/ext2/super.c | 1 + fs/ext3/super.c | 2 ++ fs/ext4/super.c | 2 ++ fs/f2fs/super.c | 1 - fs/fat/inode.c | 2 ++ fs/freevxfs/vxfs_super.c | 1 + fs/fuse/inode.c | 1 + fs/gfs2/super.c | 2 ++ fs/hfs/super.c | 1 + fs/hfsplus/super.c | 1 + fs/hpfs/super.c | 2 ++ fs/jffs2/super.c | 1 + fs/jfs/super.c | 1 + fs/minix/inode.c | 1 + fs/ncpfs/inode.c | 1 + fs/nfs/super.c | 2 ++ fs/nilfs2/super.c | 1 + fs/ntfs/super.c | 2 ++ fs/ocfs2/super.c | 2 ++ fs/openpromfs/inode.c | 1 + fs/proc/root.c | 2 ++ fs/pstore/inode.c | 1 + fs/qnx4/inode.c | 1 + fs/qnx6/inode.c | 1 + fs/reiserfs/super.c | 1 + fs/romfs/super.c | 1 + fs/squashfs/super.c | 1 + fs/sysv/inode.c | 1 + fs/ubifs/super.c | 1 + fs/udf/super.c | 1 + fs/ufs/super.c | 1 + fs/xfs/xfs_super.c | 1 + 42 files changed, 50 insertions(+), 1 deletion(-) diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 0ff4bae2c2a2..479ef5a9c5d3 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -213,6 +213,7 @@ static int parse_options(struct super_block *sb, char *options) static int adfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NODIRATIME; return parse_options(sb, data); } diff --git a/fs/affs/super.c b/fs/affs/super.c index 45161a832bbc..01c6e07acbff 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -549,6 +549,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); + sync_filesystem(sb); *flags |= MS_NODIRATIME; memcpy(volume, sbi->s_volume, 32); diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index f95dddced968..7192a7e99373 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -908,6 +908,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) static int befs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); if (!(*flags & MS_RDONLY)) return -EINVAL; return 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f0857e092a3c..461731c7fa3a 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1239,6 +1239,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) unsigned int old_metadata_ratio = fs_info->metadata_ratio; int ret; + sync_filesystem(sb); btrfs_remount_prepare(fs_info); ret = btrfs_parse_options(root, data); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3752b9f6d9e4..0d7daf714c4b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -521,6 +521,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) static int cifs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NODIRATIME; return 0; } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 4dcc0d81a7aa..0aa4c4d75ec6 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -96,6 +96,7 @@ void coda_destroy_inodecache(void) static int coda_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NOATIME; return 0; } diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 35b1c7bd18b7..c0148585670d 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -227,6 +227,7 @@ static void cramfs_put_super(struct super_block *sb) static int cramfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4888cb3fdef7..60ef5f96ae3d 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -218,6 +218,7 @@ static int debugfs_remount(struct super_block *sb, int *flags, char *data) int err; struct debugfs_fs_info *fsi = sb->s_fs_info; + sync_filesystem(sb); err = debugfs_parse_options(data, &fsi->mount_opts); if (err) goto fail; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 073d30b9d1ac..5638ff720f66 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -313,6 +313,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) struct pts_fs_info *fsi = DEVPTS_SB(sb); struct pts_mount_opts *opts = &fsi->mount_opts; + sync_filesystem(sb); err = parse_mount_options(data, PARSE_REMOUNT, opts); /* diff --git a/fs/efs/super.c b/fs/efs/super.c index c6f57a74a559..4709692cd7a0 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -113,6 +113,7 @@ static void efs_put_super(struct super_block *s) static int efs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 288534920fe5..bc47f47a99fe 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1254,6 +1254,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) unsigned long old_sb_flags; int err; + sync_filesystem(sb); spin_lock(&sbi->s_lock); /* Store the old options */ diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 6356665a74bb..246bd04207bf 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2595,6 +2595,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) int i; #endif + sync_filesystem(sb); + /* Store the original options */ old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94cc84db7c9a..48c8af5fe91d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4617,6 +4617,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) #endif char *orig_data = kstrdup(data, GFP_KERNEL); + sync_filesystem(sb); + /* Store the original options */ old_sb_flags = sb->s_flags; old_opts.s_mount_opt = sbi->s_mount_opt; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8555f7df82c7..03ab8b830940 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -239,7 +239,6 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sbi, DISABLE_EXT_IDENTIFY)) seq_puts(seq, ",disable_ext_identify"); - seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index a14dd4c0528a..ebdc6656c067 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -632,6 +632,8 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) struct msdos_sb_info *sbi = MSDOS_SB(sb); *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); + sync_filesystem(sb); + /* make sure we update state on remount. */ new_rdonly = *flags & MS_RDONLY; if (new_rdonly != (sb->s_flags & MS_RDONLY)) { diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index e37eb274e492..7ca8c75d50d3 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -124,6 +124,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) static int vxfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde14a08..a796d1c52a65 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -135,6 +135,7 @@ static void fuse_evict_inode(struct inode *inode) static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); if (*flags & MS_MANDLOCK) return -EINVAL; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e5639dec66c4..db7fff5a29c1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1142,6 +1142,8 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) struct gfs2_tune *gt = &sdp->sd_tune; int error; + sync_filesystem(sb); + spin_lock(>->gt_spin); args.ar_commit = gt->gt_logd_secs; args.ar_quota_quantum = gt->gt_quota_quantum; diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 2d2039e754cd..eee7206c38d1 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -112,6 +112,7 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int hfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NODIRATIME; if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 4c4d142cf890..1b9414f701e6 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -323,6 +323,7 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) static int hfsplus_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; if (!(*flags & MS_RDONLY)) { diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a0617e706957..fc8037639064 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -395,6 +395,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); + sync_filesystem(s); + *flags |= MS_NOATIME; hpfs_lock(s); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 0defb1cc2a35..0918f0e2e266 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -243,6 +243,7 @@ static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data) struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); int err; + sync_filesystem(sb); err = jffs2_parse_options(c, data); if (err) return -EINVAL; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 788e0a9c1fb0..b7486dafa1cd 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -413,6 +413,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) int flag = JFS_SBI(sb)->flag; int ret; + sync_filesystem(sb); if (!parse_options(data, sb, &newLVSize, &flag)) { return -EINVAL; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index df122496f328..a54d08865e59 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -123,6 +123,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) struct minix_sb_info * sbi = minix_sb(sb); struct minix_super_block * ms; + sync_filesystem(sb); ms = sbi->s_ms; if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 26910c8154da..3f54348b926b 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -99,6 +99,7 @@ static void destroy_inodecache(void) static int ncp_remount(struct super_block *sb, int *flags, char* data) { + sync_filesystem(sb); *flags |= MS_NODIRATIME; return 0; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2d7525fbcf25..d85c1b819145 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2133,6 +2133,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; u32 nfsvers = nfss->nfs_client->rpc_ops->version; + sync_filesystem(sb); + /* * Userspace mount programs that send binary options generally send * them populated with default values. We have no way to know which diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c7d1f9f18b09..4b0a8d4a8345 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1114,6 +1114,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) unsigned long old_mount_opt; int err; + sync_filesystem(sb); old_sb_flags = sb->s_flags; old_mount_opt = nilfs->ns_mount_opt; diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 82650d52d916..bd5610d48242 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -468,6 +468,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) ntfs_debug("Entering with remount options string: %s", opt); + sync_filesystem(sb); + #ifndef NTFS_RW /* For read-only compiled driver, enforce read-only flag. */ *flags |= MS_RDONLY; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 01b85165552b..616bde667d8d 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -632,6 +632,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) struct ocfs2_super *osb = OCFS2_SB(sb); u32 tmp; + sync_filesystem(sb); + if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || !ocfs2_check_set_options(sb, &parsed_options)) { ret = -EINVAL; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 75885ffde44e..f4026aba26f3 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -375,6 +375,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) static int openprom_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_NOATIME; return 0; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 41a6ea93f486..85c5018b5b89 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -92,6 +92,8 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) int proc_remount(struct super_block *sb, int *flags, char *data) { struct pid_namespace *pid = sb->s_fs_info; + + sync_filesystem(sb); return !proc_parse_options(data, pid); } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 57bbfeaab663..0a57f914de2e 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -247,6 +247,7 @@ static void parse_options(char *options) static int pstore_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); parse_options(data); return 0; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 2e8caa62da78..3410e9f657ca 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -46,6 +46,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) { struct qnx4_sb_info *qs; + sync_filesystem(sb); qs = qnx4_sb(sb); qs->Version = QNX4_VERSION; *flags |= MS_RDONLY; diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 8d941edfefa1..65cdaab3ed49 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -55,6 +55,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root) static int qnx6_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f8a23c3078f8..9fc9d56533fe 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1318,6 +1318,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) int i; #endif + sync_filesystem(s); reiserfs_write_lock(s); #ifdef CONFIG_QUOTA diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 15cbc41ee365..ae839482c341 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -435,6 +435,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) */ static int romfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 202df6312d4e..031c8d67fd51 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -371,6 +371,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) static int squashfs_remount(struct super_block *sb, int *flags, char *data) { + sync_filesystem(sb); *flags |= MS_RDONLY; return 0; } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index c327d4ee1235..4742e58f3fc5 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -60,6 +60,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data) { struct sysv_sb_info *sbi = SYSV_SB(sb); + sync_filesystem(sb); if (sbi->s_forced_ro) *flags |= MS_RDONLY; return 0; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f21acf0ef01f..0d0036750c71 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1840,6 +1840,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) int err; struct ubifs_info *c = sb->s_fs_info; + sync_filesystem(sb); dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); err = ubifs_parse_options(c, data, 1); diff --git a/fs/udf/super.c b/fs/udf/super.c index 9ac4057a86c9..3b1a4bea332d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -629,6 +629,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) struct udf_options uopt; struct udf_sb_info *sbi = UDF_SB(sb); int error = 0; + sync_filesystem(sb); uopt.flags = sbi->s_flags; uopt.uid = sbi->s_uid; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 329f2f53b7ed..b8c6791f046f 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1280,6 +1280,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) unsigned new_mount_opt, ufstype; unsigned flags; + sync_filesystem(sb); lock_ufs(sb); mutex_lock(&UFS_SB(sb)->s_lock); uspi = UFS_SB(sb)->s_uspi; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 3033ba5e9762..478c0ad5e36f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1218,6 +1218,7 @@ xfs_fs_remount( char *p; int error; + sync_filesystem(sb); while ((p = strsep(&options, ",")) != NULL) { int token; -- cgit v1.2.3 From d933d04a83dbd77eb7b1fc56e01ca45186cebb24 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:05:52 -0700 Subject: fs/squashfs/squashfs.h: replace pr_warning by pr_warn Update the last pr_warning callsite in fs branch Signed-off-by: Fabian Frederick Cc: Phillip Lougher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/squashfs/squashfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 9e1bb79f7e6f..887d6d270080 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -25,7 +25,7 @@ #define ERROR(s, args...) pr_err("SQUASHFS error: "s, ## args) -#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) +#define WARNING(s, args...) pr_warn("SQUASHFS: "s, ## args) /* block.c */ extern int squashfs_read_data(struct super_block *, u64, int, u64 *, -- cgit v1.2.3 From 7fcc243f303f04272db61110e70ed2dfde565378 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 6 Aug 2014 16:03:50 -0700 Subject: fs/squashfs/file_direct.c: replace count*size kmalloc by kmalloc_array kmalloc_array() manages count*sizeof overflow. Signed-off-by: Fabian Frederick Cc: Phillip Lougher Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/squashfs/file_direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c index 62a0de6632e1..43e7a7eddac0 100644 --- a/fs/squashfs/file_direct.c +++ b/fs/squashfs/file_direct.c @@ -44,7 +44,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) pages = end_index - start_index + 1; - page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); + page = kmalloc_array(pages, sizeof(void *), GFP_KERNEL); if (page == NULL) return res; -- cgit v1.2.3 From b9281218c2d20a89b50bd6c152c9ec398527deca Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 6 Aug 2014 16:03:52 -0700 Subject: fs/squashfs/super.c: logging cleanup - Convert printk to pr_foo() - Add pr_fmt for future logging entries - Coalesce formats Signed-off-by: Fabian Frederick Cc: Phillip Lougher Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/squashfs/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 031c8d67fd51..5056babe00df 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -27,6 +27,8 @@ * the filesystem. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -448,8 +450,7 @@ static int __init init_squashfs_fs(void) return err; } - printk(KERN_INFO "squashfs: version 4.0 (2009/01/31) " - "Phillip Lougher\n"); + pr_info("version 4.0 (2009/01/31) Phillip Lougher\n"); return 0; } -- cgit v1.2.3 From 5c913a40d243d609a087703d9217e69553258866 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 27 Nov 2014 06:49:14 +0000 Subject: Squashfs: add LZ4 compression support Add support for reading file systems compressed with the LZ4 compression algorithm. This patch adds the LZ4 decompressor wrapper code. Signed-off-by: Phillip Lougher --- fs/squashfs/lz4_wrapper.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++ fs/squashfs/squashfs_fs.h | 1 + 2 files changed, 143 insertions(+) create mode 100644 fs/squashfs/lz4_wrapper.c diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c new file mode 100644 index 000000000000..c31e2bc9c081 --- /dev/null +++ b/fs/squashfs/lz4_wrapper.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2013, 2014 + * Phillip Lougher + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs.h" +#include "decompressor.h" +#include "page_actor.h" + +#define LZ4_LEGACY 1 + +struct lz4_comp_opts { + __le32 version; + __le32 flags; +}; + +struct squashfs_lz4 { + void *input; + void *output; +}; + + +static void *lz4_comp_opts(struct squashfs_sb_info *msblk, + void *buff, int len) +{ + struct lz4_comp_opts *comp_opts = buff; + + /* LZ4 compressed filesystems always have compression options */ + if (comp_opts == NULL || len < sizeof(*comp_opts)) + return ERR_PTR(-EIO); + + if (le32_to_cpu(comp_opts->version) != LZ4_LEGACY) { + /* LZ4 format currently used by the kernel is the 'legacy' + * format */ + ERROR("Unknown LZ4 version\n"); + return ERR_PTR(-EINVAL); + } + + return NULL; +} + + +static void *lz4_init(struct squashfs_sb_info *msblk, void *buff) +{ + int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); + struct squashfs_lz4 *stream; + + stream = kzalloc(sizeof(*stream), GFP_KERNEL); + if (stream == NULL) + goto failed; + stream->input = vmalloc(block_size); + if (stream->input == NULL) + goto failed2; + stream->output = vmalloc(block_size); + if (stream->output == NULL) + goto failed3; + + return stream; + +failed3: + vfree(stream->input); +failed2: + kfree(stream); +failed: + ERROR("Failed to initialise LZ4 decompressor\n"); + return ERR_PTR(-ENOMEM); +} + + +static void lz4_free(void *strm) +{ + struct squashfs_lz4 *stream = strm; + + if (stream) { + vfree(stream->input); + vfree(stream->output); + } + kfree(stream); +} + + +static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, + struct buffer_head **bh, int b, int offset, int length, + struct squashfs_page_actor *output) +{ + struct squashfs_lz4 *stream = strm; + void *buff = stream->input, *data; + int avail, i, bytes = length, res; + size_t dest_len = output->length; + + for (i = 0; i < b; i++) { + avail = min(bytes, msblk->devblksize - offset); + memcpy(buff, bh[i]->b_data + offset, avail); + buff += avail; + bytes -= avail; + offset = 0; + put_bh(bh[i]); + } + + res = lz4_decompress_unknownoutputsize(stream->input, length, + stream->output, &dest_len); + if (res) + return -EIO; + + bytes = dest_len; + data = squashfs_first_page(output); + buff = stream->output; + while (data) { + if (bytes <= PAGE_CACHE_SIZE) { + memcpy(data, buff, bytes); + break; + } + memcpy(data, buff, PAGE_CACHE_SIZE); + buff += PAGE_CACHE_SIZE; + bytes -= PAGE_CACHE_SIZE; + data = squashfs_next_page(output); + } + squashfs_finish_page(output); + + return dest_len; +} + +const struct squashfs_decompressor squashfs_lz4_comp_ops = { + .init = lz4_init, + .comp_opts = lz4_comp_opts, + .free = lz4_free, + .decompress = lz4_uncompress, + .id = LZ4_COMPRESSION, + .name = "lz4", + .supported = 1 +}; diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 4b2beda49498..506f4ba5b983 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -240,6 +240,7 @@ struct meta_index { #define LZMA_COMPRESSION 2 #define LZO_COMPRESSION 3 #define XZ_COMPRESSION 4 +#define LZ4_COMPRESSION 5 struct squashfs_super_block { __le32 s_magic; -- cgit v1.2.3 From 163b9ffb69cea2390525aa64e85a089967b2bd61 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 27 Nov 2014 18:48:44 +0000 Subject: Squashfs: Add LZ4 compression configuration option Add the glue code, and also update the documentation. Signed-off-by: Phillip Lougher --- Documentation/filesystems/squashfs.txt | 8 ++++---- fs/squashfs/Kconfig | 15 +++++++++++++++ fs/squashfs/Makefile | 1 + fs/squashfs/decompressor.c | 7 +++++++ fs/squashfs/decompressor.h | 4 ++++ 5 files changed, 31 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt index 403c090aca39..e5274f84dc56 100644 --- a/Documentation/filesystems/squashfs.txt +++ b/Documentation/filesystems/squashfs.txt @@ -2,10 +2,10 @@ SQUASHFS 4.0 FILESYSTEM ======================= Squashfs is a compressed read-only filesystem for Linux. -It uses zlib/lzo/xz compression to compress files, inodes and directories. -Inodes in the system are very small and all blocks are packed to minimise -data overhead. Block sizes greater than 4K are supported up to a maximum -of 1Mbytes (default block size 128K). +It uses zlib, lz4, lzo, or xz compression to compress files, inodes and +directories. Inodes in the system are very small and all blocks are packed to +minimise data overhead. Block sizes greater than 4K are supported up to a +maximum of 1Mbytes (default block size 128K). Squashfs is intended for general read-only filesystem use, for archival use (i.e. in cases where a .tar.gz file may be used), and in constrained diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index b6fa8657dcbc..ffb093e72b6c 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -120,6 +120,21 @@ config SQUASHFS_ZLIB If unsure, say Y. +config SQUASHFS_LZ4 + bool "Include support for LZ4 compressed file systems" + depends on SQUASHFS + select LZ4_DECOMPRESS + help + Saying Y here includes support for reading Squashfs file systems + compressed with LZ4 compression. LZ4 compression is mainly + aimed at embedded systems with slower CPUs where the overheads + of zlib are too high. + + LZ4 is not the standard compression used in Squashfs and so most + file systems will be readable without selecting this option. + + If unsure, say N. + config SQUASHFS_LZO bool "Include support for LZO compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 4132520b4ff2..246a6f329d89 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -11,6 +11,7 @@ squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o +squashfs-$(CONFIG_SQUASHFS_LZ4) += lz4_wrapper.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index ac22fe73b0ad..e9034bf6e5ae 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -41,6 +41,12 @@ static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 }; +#ifndef CONFIG_SQUASHFS_LZ4 +static const struct squashfs_decompressor squashfs_lz4_comp_ops = { + NULL, NULL, NULL, NULL, LZ4_COMPRESSION, "lz4", 0 +}; +#endif + #ifndef CONFIG_SQUASHFS_LZO static const struct squashfs_decompressor squashfs_lzo_comp_ops = { NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 @@ -65,6 +71,7 @@ static const struct squashfs_decompressor squashfs_unknown_comp_ops = { static const struct squashfs_decompressor *decompressor[] = { &squashfs_zlib_comp_ops, + &squashfs_lz4_comp_ops, &squashfs_lzo_comp_ops, &squashfs_xz_comp_ops, &squashfs_lzma_unsupported_comp_ops, diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index af0985321808..a25713c031a5 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -46,6 +46,10 @@ static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk, extern const struct squashfs_decompressor squashfs_xz_comp_ops; #endif +#ifdef CONFIG_SQUASHFS_LZ4 +extern const struct squashfs_decompressor squashfs_lz4_comp_ops; +#endif + #ifdef CONFIG_SQUASHFS_LZO extern const struct squashfs_decompressor squashfs_lzo_comp_ops; #endif -- cgit v1.2.3 From d10a716c35b315791a740a362e63a8928c8e473f Mon Sep 17 00:00:00 2001 From: Kyungsik Lee Date: Mon, 8 Jul 2013 16:01:45 -0700 Subject: decompressor: add LZ4 decompressor module Add support for LZ4 decompression in the Linux Kernel. LZ4 Decompression APIs for kernel are based on LZ4 implementation by Yann Collet. Benchmark Results(PATCH v3) Compiler: Linaro ARM gcc 4.6.2 1. ARMv7, 1.5GHz based board Kernel: linux 3.4 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.7MB 20.1MB/s, 25.2MB/s(UA) LZ4 7.3MB 29.1MB/s, 45.6MB/s(UA) 2. ARMv7, 1.7GHz based board Kernel: linux 3.7 Uncompressed Kernel Size: 14MB Compressed Size Decompression Speed LZO 6.0MB 34.1MB/s, 52.2MB/s(UA) LZ4 6.5MB 86.7MB/s - UA: Unaligned memory Access support - Latest patch set for LZO applied This patch set is for adding support for LZ4-compressed Kernel. LZ4 is a very fast lossless compression algorithm and it also features an extremely fast decoder [1]. But we have five of decompressors already and one question which does arise, however, is that of where do we stop adding new ones? This issue had been discussed and came to the conclusion [2]. Russell King said that we should have: - one decompressor which is the fastest - one decompressor for the highest compression ratio - one popular decompressor (eg conventional gzip) If we have a replacement one for one of these, then it should do exactly that: replace it. The benchmark shows that an 8% increase in image size vs a 66% increase in decompression speed compared to LZO(which has been known as the fastest decompressor in the Kernel). Therefore the "fast but may not be small" compression title has clearly been taken by LZ4 [3]. [1] http://code.google.com/p/lz4/ [2] http://thread.gmane.org/gmane.linux.kbuild.devel/9157 [3] http://thread.gmane.org/gmane.linux.kbuild.devel/9347 LZ4 homepage: http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository: http://code.google.com/p/lz4/ Signed-off-by: Kyungsik Lee Signed-off-by: Yann Collet Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Russell King Cc: Borislav Petkov Cc: Florian Fainelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lz4.h | 51 ++++++++ lib/lz4/lz4_decompress.c | 326 +++++++++++++++++++++++++++++++++++++++++++++++ lib/lz4/lz4defs.h | 94 ++++++++++++++ 3 files changed, 471 insertions(+) create mode 100644 include/linux/lz4.h create mode 100644 lib/lz4/lz4_decompress.c create mode 100644 lib/lz4/lz4defs.h diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 100644 index 000000000000..7f6c75a093f8 --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1,51 @@ +#ifndef __LZ4_H__ +#define __LZ4_H__ +/* + * LZ4 Kernel Interface + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * lz4_compressbound() + * Provides the maximum size that LZ4 may output in a "worst case" scenario + * (input data not compressible) + */ +static inline size_t lz4_compressbound(size_t isize) +{ + return isize + (isize / 255) + 16; +} + +/* + * lz4_decompress() + * src : source address of the compressed data + * src_len : is the input size, whcih is returned after decompress done + * dest : output buffer address of the decompressed data + * actual_dest_len: is the size of uncompressed data, supposing it's known + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + * slightly faster than lz4_decompress_unknownoutputsize() + */ +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len); + +/* + * lz4_decompress_unknownoutputsize() + * src : source address of the compressed data + * src_len : is the input size, therefore the compressed size + * dest : output buffer address of the decompressed data + * dest_len: is the max size of the destination buffer, which is + * returned with actual size of decompressed data after + * decompress done + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + */ +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len); +#endif diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c new file mode 100644 index 000000000000..dcc89753af65 --- /dev/null +++ b/lib/lz4/lz4_decompress.c @@ -0,0 +1,326 @@ +/* + * LZ4 Decompressor for Linux kernel + * + * Copyright (C) 2013 LG Electronics Co., Ltd. (http://www.lge.com/) + * + * Based on LZ4 implementation by Yann Collet. + * + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + */ + +#ifndef STATIC +#include +#include +#endif +#include + +#include + +#include "lz4defs.h" + +static int lz4_uncompress(const char *source, char *dest, int osize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *ref; + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + osize; + BYTE *cpy; + unsigned token; + size_t length; + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + while (1) { + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + size_t len; + + len = *ip++; + for (; len == 255; length += 255) + len = *ip++; + length += len; + } + + /* copy literals */ + cpy = op + length; + if (unlikely(cpy > oend - COPYLENGTH)) { + /* + * Error: not enough place for another match + * (min 4) + 5 literals + */ + if (cpy != oend) + goto _output_error; + + memcpy(op, ip, length); + ip += length; + break; /* EOF */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + + /* Error: offset create reference outside destination buffer */ + if (unlikely(ref < (BYTE *const) dest)) + goto _output_error; + + /* get matchlength */ + length = token & ML_MASK; + if (length == ML_MASK) { + for (; *ip == 255; length += 255) + ip++; + length += *ip++; + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op-ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE - 4); + if (cpy > (oend - COPYLENGTH)) { + + /* Error: request to write beyond destination buffer */ + if (cpy > oend) + goto _output_error; + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *)ip) - source); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *)ip) - source)); +} + +static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, + int isize, size_t maxoutputsize) +{ + const BYTE *ip = (const BYTE *) source; + const BYTE *const iend = ip + isize; + const BYTE *ref; + + + BYTE *op = (BYTE *) dest; + BYTE * const oend = op + maxoutputsize; + BYTE *cpy; + + size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 + size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + + /* Main Loop */ + while (ip < iend) { + + unsigned token; + size_t length; + + /* get runlength */ + token = *ip++; + length = (token >> ML_BITS); + if (length == RUN_MASK) { + int s = 255; + while ((ip < iend) && (s == 255)) { + s = *ip++; + length += s; + } + } + /* copy literals */ + cpy = op + length; + if ((cpy > oend - COPYLENGTH) || + (ip + length > iend - COPYLENGTH)) { + + if (cpy > oend) + goto _output_error;/* writes beyond buffer */ + + if (ip + length != iend) + goto _output_error;/* + * Error: LZ4 format requires + * to consume all input + * at this stage + */ + memcpy(op, ip, length); + op += length; + break;/* Necessarily EOF, due to parsing restrictions */ + } + LZ4_WILDCOPY(ip, op, cpy); + ip -= (op - cpy); + op = cpy; + + /* get offset */ + LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); + ip += 2; + if (ref < (BYTE * const) dest) + goto _output_error; + /* + * Error : offset creates reference + * outside of destination buffer + */ + + /* get matchlength */ + length = (token & ML_MASK); + if (length == ML_MASK) { + while (ip < iend) { + int s = *ip++; + length += s; + if (s == 255) + continue; + break; + } + } + + /* copy repeated sequence */ + if (unlikely((op - ref) < STEPSIZE)) { +#if LZ4_ARCH64 + size_t dec64 = dec64table[op - ref]; +#else + const int dec64 = 0; +#endif + op[0] = ref[0]; + op[1] = ref[1]; + op[2] = ref[2]; + op[3] = ref[3]; + op += 4; + ref += 4; + ref -= dec32table[op - ref]; + PUT4(ref, op); + op += STEPSIZE - 4; + ref -= dec64; + } else { + LZ4_COPYSTEP(ref, op); + } + cpy = op + length - (STEPSIZE-4); + if (cpy > oend - COPYLENGTH) { + if (cpy > oend) + goto _output_error; /* write outside of buf */ + + LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); + while (op < cpy) + *op++ = *ref++; + op = cpy; + /* + * Check EOF (should never happen, since last 5 bytes + * are supposed to be literals) + */ + if (op == oend) + goto _output_error; + continue; + } + LZ4_SECURECOPY(ref, op, cpy); + op = cpy; /* correction */ + } + /* end of decoding */ + return (int) (((char *) op) - dest); + + /* write overflow error detected */ +_output_error: + return (int) (-(((char *) ip) - source)); +} + +int lz4_decompress(const char *src, size_t *src_len, char *dest, + size_t actual_dest_len) +{ + int ret = -1; + int input_len = 0; + + input_len = lz4_uncompress(src, dest, actual_dest_len); + if (input_len < 0) + goto exit_0; + *src_len = input_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress); +#endif + +int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, + char *dest, size_t *dest_len) +{ + int ret = -1; + int out_len = 0; + + out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len, + *dest_len); + if (out_len < 0) + goto exit_0; + *dest_len = out_len; + + return 0; +exit_0: + return ret; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 Decompressor"); +#endif diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h new file mode 100644 index 000000000000..43ac31d63f36 --- /dev/null +++ b/lib/lz4/lz4defs.h @@ -0,0 +1,94 @@ +/* + * lz4defs.h -- architecture specific defines + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * Detects 64 bits mode + */ +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \ + || defined(__ppc64__) || defined(__LP64__)) +#define LZ4_ARCH64 1 +#else +#define LZ4_ARCH64 0 +#endif + +/* + * Architecture-specific macros + */ +#define BYTE u8 +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ + || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ + && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; + +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) + +#define PUT4(s, d) (A32(d) = A32(s)) +#define PUT8(s, d) (A64(d) = A64(s)) +#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#define PUT4(s, d) \ + put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) +#define PUT8(s, d) \ + put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) +#endif + +#define COPYLENGTH 8 +#define ML_BITS 4 +#define ML_MASK ((1U << ML_BITS) - 1) +#define RUN_BITS (8 - ML_BITS) +#define RUN_MASK ((1U << RUN_BITS) - 1) + +#if LZ4_ARCH64/* 64-bit */ +#define STEPSIZE 8 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT8(s, d); \ + d += 8; \ + s += 8; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) + +#define LZ4_SECURECOPY(s, d, e) \ + do { \ + if (d < e) { \ + LZ4_WILDCOPY(s, d, e); \ + } \ + } while (0) + +#else /* 32-bit */ +#define STEPSIZE 4 + +#define LZ4_COPYSTEP(s, d) \ + do { \ + PUT4(s, d); \ + d += 4; \ + s += 4; \ + } while (0) + +#define LZ4_COPYPACKET(s, d) \ + do { \ + LZ4_COPYSTEP(s, d); \ + LZ4_COPYSTEP(s, d); \ + } while (0) + +#define LZ4_SECURECOPY LZ4_WILDCOPY +#endif + +#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ + (d = s - get_unaligned_le16(p)) + +#define LZ4_WILDCOPY(s, d, e) \ + do { \ + LZ4_COPYPACKET(s, d); \ + } while (d < e) -- cgit v1.2.3 From 7207f784ce8414af2393dd239d9e453b2225854c Mon Sep 17 00:00:00 2001 From: Chanho Min Date: Mon, 8 Jul 2013 16:01:49 -0700 Subject: lib: add lz4 compressor module This patchset is for supporting LZ4 compression and the crypto API using it. As shown below, the size of data is a little bit bigger but compressing speed is faster under the enabled unaligned memory access. We can use lz4 de/compression through crypto API as well. Also, It will be useful for another potential user of lz4 compression. lz4 Compression Benchmark: Compiler: ARM gcc 4.6.4 ARMv7, 1 GHz based board Kernel: linux 3.4 Uncompressed data Size: 101 MB Compressed Size compression Speed LZO 72.1MB 32.1MB/s, 33.0MB/s(UA) LZ4 75.1MB 30.4MB/s, 35.9MB/s(UA) LZ4HC 59.8MB 2.4MB/s, 2.5MB/s(UA) - UA: Unaligned memory Access support - Latest patch set for LZO applied This patch: Add support for LZ4 compression in the Linux Kernel. LZ4 Compression APIs for kernel are based on LZ4 implementation by Yann Collet and were changed for kernel coding style. LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html LZ4 source repository : http://code.google.com/p/lz4/ svn revision : r90 Two APIs are added: lz4_compress() support basic lz4 compression whereas lz4hc_compress() support high compression or CPU performance get lower but compression ratio get higher. Also, we require the pre-allocated working memory with the defined size and destination buffer must be allocated with the size of lz4_compressbound. [akpm@linux-foundation.org: make lz4_compresshcctx() static] Signed-off-by: Chanho Min Cc: "Darrick J. Wong" Cc: Bob Pearson Cc: Richard Weinberger Cc: Herbert Xu Cc: Yann Collet Cc: Kyungsik Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lz4.h | 36 ++++ lib/Kconfig | 9 + lib/Makefile | 3 + lib/lz4/Makefile | 3 + lib/lz4/lz4_compress.c | 443 ++++++++++++++++++++++++++++++++++++++ lib/lz4/lz4defs.h | 66 +++++- lib/lz4/lz4hc_compress.c | 539 +++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 1097 insertions(+), 2 deletions(-) create mode 100644 lib/lz4/Makefile create mode 100644 lib/lz4/lz4_compress.c create mode 100644 lib/lz4/lz4hc_compress.c diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 7f6c75a093f8..d21c13f10a64 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -9,6 +9,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *)) +#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *)) /* * lz4_compressbound() @@ -20,6 +22,40 @@ static inline size_t lz4_compressbound(size_t isize) return isize + (isize / 255) + 16; } +/* + * lz4_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + + /* + * lz4hc_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + /* * lz4_decompress() * src : source address of the compressed data diff --git a/lib/Kconfig b/lib/Kconfig index fe01d418b09a..06d94d885877 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -189,6 +189,15 @@ config LZO_COMPRESS config LZO_DECOMPRESS tristate +config LZ4_COMPRESS + tristate + +config LZ4HC_COMPRESS + tristate + +config LZ4_DECOMPRESS + tristate + source "lib/xz/Kconfig" # diff --git a/lib/Makefile b/lib/Makefile index c55a037a354e..4bd29d037f22 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -75,6 +75,9 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_LZ4_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ obj-$(CONFIG_XZ_DEC) += xz/ obj-$(CONFIG_RAID6_PQ) += raid6/ diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile new file mode 100644 index 000000000000..8085d04e9309 --- /dev/null +++ b/lib/lz4/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o +obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o +obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c new file mode 100644 index 000000000000..fd94058bd7f9 --- /dev/null +++ b/lib/lz4/lz4_compress.c @@ -0,0 +1,443 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +/* + * LZ4_compressCtx : + * ----------------- + * Compress 'isize' bytes from 'source' into an output buffer 'dest' of + * maximum size 'maxOutputSize'. * If it cannot achieve it, compression + * will stop, and result of the function will be zero. + * return : the number of bytes written in buffer 'dest', or 0 if the + * compression fails + */ +static inline int lz4_compressctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + HTYPE *hashtable = (HTYPE *)ctx; + const u8 *ip = (u8 *)source; +#if LZ4_ARCH64 + const BYTE * const base = ip; +#else + const int base = 0; +#endif + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + ip++; + forwardh = LZ4_HASH_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (unlikely(forwardip > mflimit)) + goto _last_literals; + + forwardh = LZ4_HASH_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = ip - base; + } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) && + unlikely(ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + + if (length >= (int)RUN_MASK) { + int len; + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + /* Encode MatchLength */ + length = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)) + return 0; + if (length >= (int)ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for (; length > 509 ; length -= 510) { + *op++ = 255; + *op++ = 255; + } + if (length > 254) { + length -= 255; + *op++ = 255; + } + *op++ = (u8)length; + } else + *token += length; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base; + + /* Test next position */ + ref = base + hashtable[LZ4_HASH_VALUE(ip)]; + hashtable[LZ4_HASH_VALUE(ip)] = ip - base; + if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (((char *)op - dest) + lastrun + 1 + + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize) + return 0; + + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + + /* End */ + return (int)(((char *)op) - dest); +} + +static inline int lz4_compress64kctx(void *ctx, + const char *source, + char *dest, + int isize, + int maxoutputsize) +{ + u16 *hashtable = (u16 *)ctx; + const u8 *ip = (u8 *) source; + const u8 *anchor = ip; + const u8 *const base = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + #define MATCHLIMIT (iend - LASTLITERALS) + + u8 *op = (u8 *) dest; + u8 *const oend = op + maxoutputsize; + int len, length; + const int skipstrength = SKIPSTRENGTH; + u32 forwardh; + int lastrun; + + /* Init */ + if (isize < MINLENGTH) + goto _last_literals; + + memset((void *)hashtable, 0, LZ4_MEM_COMPRESS); + + /* First Byte */ + ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + + /* Main Loop */ + for (;;) { + int findmatchattempts = (1U << skipstrength) + 3; + const u8 *forwardip = ip; + const u8 *ref; + u8 *token; + + /* Find a match */ + do { + u32 h = forwardh; + int step = findmatchattempts++ >> skipstrength; + ip = forwardip; + forwardip = ip + step; + + if (forwardip > mflimit) + goto _last_literals; + + forwardh = LZ4_HASH64K_VALUE(forwardip); + ref = base + hashtable[h]; + hashtable[h] = (u16)(ip - base); + } while (A32(ref) != A32(ip)); + + /* Catch up */ + while ((ip > anchor) && (ref > (u8 *)source) + && (ip[-1] == ref[-1])) { + ip--; + ref--; + } + + /* Encode Literal length */ + length = (int)(ip - anchor); + token = op++; + /* Check output limit */ + if (unlikely(op + length + (2 + 1 + LASTLITERALS) + + (length >> 8) > oend)) + return 0; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *op++ = 255; + *op++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(anchor, op, length); + +_next_match: + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref)); + + /* Start Counting */ + ip += MINMATCH; + /* MinMatch verified */ + ref += MINMATCH; + anchor = ip; + + while (ip < MATCHLIMIT - (STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(ref) ^ A64(ip); + #else + u32 diff = A32(ref) ^ A32(ip); + #endif + + if (!diff) { + ip += STEPSIZE; + ref += STEPSIZE; + continue; + } + ip += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) { + ip += 4; + ref += 4; + } + #endif + if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) { + ip += 2; + ref += 2; + } + if ((ip < MATCHLIMIT) && (*ref == *ip)) + ip++; +_endcount: + + /* Encode MatchLength */ + len = (int)(ip - anchor); + /* Check output limit */ + if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)) + return 0; + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *op++ = 255; + *op++ = 255; + } + if (len > 254) { + len -= 255; + *op++ = 255; + } + *op++ = (u8)len; + } else + *token += len; + + /* Test end of chunk */ + if (ip > mflimit) { + anchor = ip; + break; + } + + /* Fill table */ + hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base); + + /* Test next position */ + ref = base + hashtable[LZ4_HASH64K_VALUE(ip)]; + hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base); + if (A32(ref) == A32(ip)) { + token = op++; + *token = 0; + goto _next_match; + } + + /* Prepare next loop */ + anchor = ip++; + forwardh = LZ4_HASH64K_VALUE(ip); + } + +_last_literals: + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend) + return 0; + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8)lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int)(((char *)op) - dest); +} + +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + if (src_len < LZ4_64KLIMIT) + out_len = lz4_compress64kctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + else + out_len = lz4_compressctx(wrkmem, src, dst, src_len, + lz4_compressbound(src_len)); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + + return 0; +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index 43ac31d63f36..abcecdc2d0f2 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -22,23 +22,40 @@ * Architecture-specific macros */ #define BYTE u8 +typedef struct _U16_S { u16 v; } U16_S; +typedef struct _U32_S { u32 v; } U32_S; +typedef struct _U64_S { u64 v; } U64_S; #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \ && defined(ARM_EFFICIENT_UNALIGNED_ACCESS) -typedef struct _U32_S { u32 v; } U32_S; -typedef struct _U64_S { u64 v; } U64_S; +#define A16(x) (((U16_S *)(x))->v) #define A32(x) (((U32_S *)(x))->v) #define A64(x) (((U64_S *)(x))->v) #define PUT4(s, d) (A32(d) = A32(s)) #define PUT8(s, d) (A64(d) = A64(s)) +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + A16(p) = v; \ + p += 2; \ + } while (0) #else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ +#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v)) +#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v)) +#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v)) + #define PUT4(s, d) \ put_unaligned(get_unaligned((const u32 *) s), (u32 *) d) #define PUT8(s, d) \ put_unaligned(get_unaligned((const u64 *) s), (u64 *) d) + +#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \ + do { \ + put_unaligned(v, (u16 *)(p)); \ + p += 2; \ + } while (0) #endif #define COPYLENGTH 8 @@ -46,6 +63,29 @@ typedef struct _U64_S { u64 v; } U64_S; #define ML_MASK ((1U << ML_BITS) - 1) #define RUN_BITS (8 - ML_BITS) #define RUN_MASK ((1U << RUN_BITS) - 1) +#define MEMORY_USAGE 14 +#define MINMATCH 4 +#define SKIPSTRENGTH 6 +#define LASTLITERALS 5 +#define MFLIMIT (COPYLENGTH + MINMATCH) +#define MINLENGTH (MFLIMIT + 1) +#define MAXD_LOG 16 +#define MAXD (1 << MAXD_LOG) +#define MAXD_MASK (u32)(MAXD - 1) +#define MAX_DISTANCE (MAXD - 1) +#define HASH_LOG (MAXD_LOG - 1) +#define HASHTABLESIZE (1 << HASH_LOG) +#define MAX_NB_ATTEMPTS 256 +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1)) +#define HASHLOG64K ((MEMORY_USAGE - 2) + 1) +#define HASH64KTABLESIZE (1U << HASHLOG64K) +#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - (MEMORY_USAGE-2))) +#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASHLOG64K)) +#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \ + ((MINMATCH * 8) - HASH_LOG)) #if LZ4_ARCH64/* 64-bit */ #define STEPSIZE 8 @@ -65,6 +105,13 @@ typedef struct _U64_S { u64 v; } U64_S; LZ4_WILDCOPY(s, d, e); \ } \ } while (0) +#define HTYPE u32 + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3) +#endif #else /* 32-bit */ #define STEPSIZE 4 @@ -83,6 +130,14 @@ typedef struct _U64_S { u64 v; } U64_S; } while (0) #define LZ4_SECURECOPY LZ4_WILDCOPY +#define HTYPE const u8* + +#ifdef __BIG_ENDIAN +#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3) +#else +#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3) +#endif + #endif #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ @@ -92,3 +147,10 @@ typedef struct _U64_S { u64 v; } U64_S; do { \ LZ4_COPYPACKET(s, d); \ } while (d < e) + +#define LZ4_BLINDCOPY(s, d, l) \ + do { \ + u8 *e = (d) + l; \ + LZ4_WILDCOPY(s, d, e); \ + d = e; \ + } while (0) diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c new file mode 100644 index 000000000000..eb1a74f5e368 --- /dev/null +++ b/lib/lz4/lz4hc_compress.c @@ -0,0 +1,539 @@ +/* + * LZ4 HC - High Compression Mode of LZ4 + * Copyright (C) 2011-2012, Yann Collet. + * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at : + * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + * - LZ4 source repository : http://code.google.com/p/lz4/ + * + * Changed for kernel use by: + * Chanho Min + */ + +#include +#include +#include +#include +#include "lz4defs.h" + +struct lz4hc_data { + const u8 *base; + HTYPE hashtable[HASHTABLESIZE]; + u16 chaintable[MAXD]; + const u8 *nexttoupdate; +} __attribute__((__packed__)); + +static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base) +{ + memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable)); + memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable)); + +#if LZ4_ARCH64 + hc4->nexttoupdate = base + 1; +#else + hc4->nexttoupdate = base; +#endif + hc4->base = base; + return 1; +} + +/* Update chains up to ip (excluded) */ +static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip) +{ + u16 *chaintable = hc4->chaintable; + HTYPE *hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + + while (hc4->nexttoupdate < ip) { + const u8 *p = hc4->nexttoupdate; + size_t delta = p - (hashtable[HASH_VALUE(p)] + base); + if (delta > MAX_DISTANCE) + delta = MAX_DISTANCE; + chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta; + hashtable[HASH_VALUE(p)] = (p) - base; + hc4->nexttoupdate++; + } +} + +static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2, + const u8 *const matchlimit) +{ + const u8 *p1t = p1; + + while (p1t < matchlimit - (STEPSIZE - 1)) { +#if LZ4_ARCH64 + u64 diff = A64(p2) ^ A64(p1t); +#else + u32 diff = A32(p2) ^ A32(p1t); +#endif + if (!diff) { + p1t += STEPSIZE; + p2 += STEPSIZE; + continue; + } + p1t += LZ4_NBCOMMONBYTES(diff); + return p1t - p1; + } +#if LZ4_ARCH64 + if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) { + p1t += 4; + p2 += 4; + } +#endif + + if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) { + p1t += 2; + p2 += 2; + } + if ((p1t < matchlimit) && (*p2 == *p1t)) + p1t++; + return p1t - p1; +} + +static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *const matchlimit, const u8 **matchpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; + const u8 *ref; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + int nbattempts = MAX_NB_ATTEMPTS; + size_t repl = 0, ml = 0; + u16 delta; + + /* HC4 match finder */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + /* potential repetition */ + if (ref >= ip-4) { + /* confirmed */ + if (A32(ref) == A32(ip)) { + delta = (u16)(ip-ref); + repl = ml = lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + *matchpos = ref; + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + while ((ref >= ip - MAX_DISTANCE) && nbattempts) { + nbattempts--; + if (*(ref + ml) == *(ip + ml)) { + if (A32(ref) == A32(ip)) { + size_t mlt = + lz4hc_commonlength(ip + MINMATCH, + ref + MINMATCH, matchlimit) + MINMATCH; + if (mlt > ml) { + ml = mlt; + *matchpos = ref; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + + /* Complete table */ + if (repl) { + const BYTE *ptr = ip; + const BYTE *end; + end = ip + repl - (MINMATCH-1); + /* Pre-Load */ + while (ptr < end - delta) { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + ptr++; + } + do { + chaintable[(size_t)(ptr) & MAXD_MASK] = delta; + /* Head of chain */ + hashtable[HASH_VALUE(ptr)] = (ptr) - base; + ptr++; + } while (ptr < end); + hc4->nexttoupdate = end; + } + + return (int)ml; +} + +static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4, + const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest, + const u8 **matchpos, const u8 **startpos) +{ + u16 *const chaintable = hc4->chaintable; + HTYPE *const hashtable = hc4->hashtable; +#if LZ4_ARCH64 + const BYTE * const base = hc4->base; +#else + const int base = 0; +#endif + const u8 *ref; + int nbattempts = MAX_NB_ATTEMPTS; + int delta = (int)(ip - startlimit); + + /* First Match */ + lz4hc_insert(hc4, ip); + ref = hashtable[HASH_VALUE(ip)] + base; + + while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base) + && (nbattempts)) { + nbattempts--; + if (*(startlimit + longest) == *(ref - delta + longest)) { + if (A32(ref) == A32(ip)) { + const u8 *reft = ref + MINMATCH; + const u8 *ipt = ip + MINMATCH; + const u8 *startt = ip; + + while (ipt < matchlimit-(STEPSIZE - 1)) { + #if LZ4_ARCH64 + u64 diff = A64(reft) ^ A64(ipt); + #else + u32 diff = A32(reft) ^ A32(ipt); + #endif + + if (!diff) { + ipt += STEPSIZE; + reft += STEPSIZE; + continue; + } + ipt += LZ4_NBCOMMONBYTES(diff); + goto _endcount; + } + #if LZ4_ARCH64 + if ((ipt < (matchlimit - 3)) + && (A32(reft) == A32(ipt))) { + ipt += 4; + reft += 4; + } + ipt += 2; + #endif + if ((ipt < (matchlimit - 1)) + && (A16(reft) == A16(ipt))) { + reft += 2; + } + if ((ipt < matchlimit) && (*reft == *ipt)) + ipt++; +_endcount: + reft = ref; + + while ((startt > startlimit) + && (reft > hc4->base) + && (startt[-1] == reft[-1])) { + startt--; + reft--; + } + + if ((ipt - startt) > longest) { + longest = (int)(ipt - startt); + *matchpos = reft; + *startpos = startt; + } + } + } + ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK]; + } + return longest; +} + +static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor, + int ml, const u8 *ref) +{ + int length, len; + u8 *token; + + /* Encode Literal length */ + length = (int)(*ip - *anchor); + token = (*op)++; + if (length >= (int)RUN_MASK) { + *token = (RUN_MASK << ML_BITS); + len = length - RUN_MASK; + for (; len > 254 ; len -= 255) + *(*op)++ = 255; + *(*op)++ = (u8)len; + } else + *token = (length << ML_BITS); + + /* Copy Literals */ + LZ4_BLINDCOPY(*anchor, *op, length); + + /* Encode Offset */ + LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref)); + + /* Encode MatchLength */ + len = (int)(ml - MINMATCH); + if (len >= (int)ML_MASK) { + *token += ML_MASK; + len -= ML_MASK; + for (; len > 509 ; len -= 510) { + *(*op)++ = 255; + *(*op)++ = 255; + } + if (len > 254) { + len -= 255; + *(*op)++ = 255; + } + *(*op)++ = (u8)len; + } else + *token += len; + + /* Prepare next loop */ + *ip += ml; + *anchor = *ip; + + return 0; +} + +static int lz4_compresshcctx(struct lz4hc_data *ctx, + const char *source, + char *dest, + int isize) +{ + const u8 *ip = (const u8 *)source; + const u8 *anchor = ip; + const u8 *const iend = ip + isize; + const u8 *const mflimit = iend - MFLIMIT; + const u8 *const matchlimit = (iend - LASTLITERALS); + + u8 *op = (u8 *)dest; + + int ml, ml2, ml3, ml0; + const u8 *ref = NULL; + const u8 *start2 = NULL; + const u8 *ref2 = NULL; + const u8 *start3 = NULL; + const u8 *ref3 = NULL; + const u8 *start0; + const u8 *ref0; + int lastrun; + + ip++; + + /* Main Loop */ + while (ip < mflimit) { + ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref)); + if (!ml) { + ip++; + continue; + } + + /* saved, in case we would skip too much */ + start0 = ip; + ref0 = ref; + ml0 = ml; +_search2: + if (ip+ml < mflimit) + ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2, + ip + 1, matchlimit, ml, &ref2, &start2); + else + ml2 = ml; + /* No better match */ + if (ml2 == ml) { + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + continue; + } + + if (start0 < ip) { + /* empirical */ + if (start2 < ip + ml0) { + ip = start0; + ref = ref0; + ml = ml0; + } + } + /* + * Here, start0==ip + * First Match too small : removed + */ + if ((start2 - ip) < 3) { + ml = ml2; + ip = start2; + ref = ref2; + goto _search2; + } + +_search3: + /* + * Currently we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) + */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) + new_ml = OPTIMAL_ML; + if (ip + new_ml > start2 + ml2 - MINMATCH) + new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* + * Now, we have start2 = ip+new_ml, + * with new_ml=min(ml, OPTIMAL_ML=18) + */ + if (start2 + ml2 < mflimit) + ml3 = lz4hc_insertandgetwidermatch(ctx, + start2 + ml2 - 3, start2, matchlimit, + ml2, &ref3, &start3); + else + ml3 = ml2; + + /* No better match : 2 sequences to encode */ + if (ml3 == ml2) { + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) + ml = (int)(start2 - ip); + + /* Now, encode 2 sequences */ + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start2; + lz4_encodesequence(&ip, &op, &anchor, ml2, ref2); + continue; + } + + /* Not enough space for match 2 : remove it */ + if (start3 < ip + ml + 3) { + /* + * can write Seq1 immediately ==> Seq2 is removed, + * so Seq3 becomes Seq1 + */ + if (start3 >= (ip + ml)) { + if (start2 < ip + ml) { + int correction = + (int)(ip + ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _search3; + } + + /* + * OK, now we have 3 ascending matches; let's write at least + * the first one ip & ref are known; Now for ml + */ + if (start2 < ip + ml) { + if ((start2 - ip) < (int)ML_MASK) { + int correction; + if (ml > OPTIMAL_ML) + ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) + ml = (int)(start2 - ip) + ml2 + - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else + ml = (int)(start2 - ip); + } + lz4_encodesequence(&ip, &op, &anchor, ml, ref); + + ip = start2; + ref = ref2; + ml = ml2; + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + + goto _search3; + } + + /* Encode Last Literals */ + lastrun = (int)(iend - anchor); + if (lastrun >= (int)RUN_MASK) { + *op++ = (RUN_MASK << ML_BITS); + lastrun -= RUN_MASK; + for (; lastrun > 254 ; lastrun -= 255) + *op++ = 255; + *op++ = (u8) lastrun; + } else + *op++ = (lastrun << ML_BITS); + memcpy(op, anchor, iend - anchor); + op += iend - anchor; + /* End */ + return (int) (((char *)op) - dest); +} + +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem) +{ + int ret = -1; + int out_len = 0; + + struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem; + lz4hc_init(hc4, (const u8 *)src); + out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src, + (char *)dst, (int)src_len); + + if (out_len < 0) + goto exit; + + *dst_len = out_len; + return 0; + +exit: + return ret; +} +EXPORT_SYMBOL_GPL(lz4hc_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4HC compressor"); -- cgit v1.2.3 From 92b78405e5264ad7913c2c94192b768277f93607 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 11 Sep 2013 14:26:32 -0700 Subject: lz4: fix compression/decompression signedness mismatch LZ4 compression and decompression functions require different in signedness input/output parameters: unsigned char for compression and signed char for decompression. Change decompression API to require "(const) unsigned char *". Signed-off-by: Sergey Senozhatsky Cc: Kyungsik Lee Cc: Geert Uytterhoeven Cc: Yann Collet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lz4.h | 8 ++++---- lib/lz4/lz4_decompress.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/lz4.h b/include/linux/lz4.h index d21c13f10a64..4356686b0a39 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len, * note : Destination buffer must be already allocated. * slightly faster than lz4_decompress_unknownoutputsize() */ -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len); +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len); /* * lz4_decompress_unknownoutputsize() @@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest, * Error if return (< 0) * note : Destination buffer must be already allocated. */ -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len); +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len); #endif diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index dcc89753af65..677d1ea4833d 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -283,8 +283,8 @@ _output_error: return (int) (-(((char *) ip) - source)); } -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len) +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len) { int ret = -1; int input_len = 0; @@ -302,8 +302,8 @@ exit_0: EXPORT_SYMBOL_GPL(lz4_decompress); #endif -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len) +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len) { int ret = -1; int out_len = 0; -- cgit v1.2.3 From 428e6d675d83323d05649d901cc0fa0069f8e825 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Tue, 3 Mar 2015 23:16:16 +0900 Subject: net: ping: Return EAFNOSUPPORT when appropriate. 1. For an IPv4 ping socket, ping_check_bind_addr does not check the family of the socket address that's passed in. Instead, make it behave like inet_bind, which enforces either that the address family is AF_INET, or that the family is AF_UNSPEC and the address is 0.0.0.0. 2. For an IPv6 ping socket, ping_check_bind_addr returns EINVAL if the socket family is not AF_INET6. Return EAFNOSUPPORT instead, for consistency with inet6_bind. 3. Make ping_v4_sendmsg and ping_v6_sendmsg return EAFNOSUPPORT instead of EINVAL if an incorrect socket address structure is passed in. 4. Make IPv6 ping sockets be IPv6-only. The code does not support IPv4, and it cannot easily be made to support IPv4 because the protocol numbers for ICMP and ICMPv6 are different. This makes connect(::ffff:192.0.2.1) fail with EAFNOSUPPORT instead of making the socket unusable. Among other things, this fixes an oops that can be triggered by: int s = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP); struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_addr = in6addr_any, }; bind(s, (struct sockaddr *) &sin6, sizeof(sin6)); [backport of net 9145736d4862145684009d6a72a6e61324a9439e] Change-Id: If06ca86d9f1e4593c0d6df174caca3487c57a241 Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv4/ping.c | 13 ++++++++++++- net/ipv6/ping.c | 5 +++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 634994af080c..8642f0044dfa 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -256,6 +256,9 @@ int ping_init_sock(struct sock *sk) kgid_t low, high; int ret = 0; + if (sk->sk_family == AF_INET6) + inet6_sk(sk)->ipv6only = 1; + inet_get_ping_group_range_net(net, &low, &high); if (gid_lte(low, group) && gid_lte(group, high)) return 0; @@ -302,6 +305,11 @@ int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, if (addr_len < sizeof(*addr)) return -EINVAL; + if (addr->sin_family != AF_INET && + !(addr->sin_family == AF_UNSPEC && + addr->sin_addr.s_addr == htonl(INADDR_ANY))) + return -EAFNOSUPPORT; + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); @@ -326,6 +334,9 @@ int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, if (addr_len < sizeof(*addr)) return -EINVAL; + if (addr->sin6_family != AF_INET6) + return -EAFNOSUPPORT; + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); @@ -708,7 +719,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) - return -EINVAL; + return -EAFNOSUPPORT; daddr = usin->sin_addr.s_addr; /* no remote port */ } else { diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 38ceca8a6358..b5f23f48ca74 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -126,9 +126,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (msg->msg_name) { struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; - if (msg->msg_namelen < sizeof(struct sockaddr_in6) || - u->sin6_family != AF_INET6) { + if (msg->msg_namelen < sizeof(*u)) return -EINVAL; + if (u->sin6_family != AF_INET6) { + return -EAFNOSUPPORT; } if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != u->sin6_scope_id) { -- cgit v1.2.3 From 740006ef285f77b02b016b9a21453e56db1915ed Mon Sep 17 00:00:00 2001 From: Petr Cermak Date: Wed, 18 Feb 2015 10:39:10 +0000 Subject: fs/proc/task_mmu.c: add user-space support for resetting mm->hiwater_rss (peak RSS) Peak resident size of a process can be reset back to the process's current rss value by writing "5" to /proc/pid/clear_refs. The driving use-case for this would be getting the peak RSS value, which can be retrieved from the VmHWM field in /proc/pid/status, per benchmark iteration or test scenario. Origin: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=695f055936938c674473ea071ca7359a863551e7 [akpm@linux-foundation.org: clarify behaviour in documentation] Signed-off-by: Petr Cermak Cc: Bjorn Helgaas Cc: Primiano Tucci Cc: Petr Cermak Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: I543a7640639d9916e813af875003fe3ee3a6bfe0 --- Documentation/filesystems/proc.txt | 4 ++++ fs/proc/task_mmu.c | 17 ++++++++++++++++- include/linux/mm.h | 5 +++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index e0eb9d287312..22c3b9c17e5b 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -490,6 +490,10 @@ To clear the bits for the file mapped pages associated with the process > echo 3 > /proc/PID/clear_refs Any other value written to /proc/PID/clear_refs will have no effect. +To reset the peak resident set size ("high water mark") to the process's +current value: + > echo 5 > /proc/PID/clear_refs + The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags using /proc/kpageflags and number of times a page is mapped using /proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt. diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e2c925fa7827..e5bc1c7e2667 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -784,6 +784,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, #define CLEAR_REFS_ALL 1 #define CLEAR_REFS_ANON 2 #define CLEAR_REFS_MAPPED 3 +#define CLEAR_REFS_MM_HIWATER_RSS 5 static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) @@ -803,7 +804,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, rv = kstrtoint(strstrip(buffer), 10, &type); if (rv < 0) return rv; - if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) + if ((type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) && + type != CLEAR_REFS_MM_HIWATER_RSS) return -EINVAL; task = get_proc_task(file_inode(file)); if (!task) @@ -814,6 +816,18 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, .pmd_entry = clear_refs_pte_range, .mm = mm, }; + + if (type == CLEAR_REFS_MM_HIWATER_RSS) { + /* + * Writing 5 to /proc/pid/clear_refs resets the peak + * resident set size to this mm's current rss value. + */ + down_write(&mm->mmap_sem); + reset_mm_hiwater_rss(mm); + up_write(&mm->mmap_sem); + goto out_mm; + } + down_read(&mm->mmap_sem); for (vma = mm->mmap; vma; vma = vma->vm_next) { clear_refs_walk.private = vma; @@ -837,6 +851,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, } flush_tlb_mm(mm); up_read(&mm->mmap_sem); +out_mm: mmput(mm); } put_task_struct(task); diff --git a/include/linux/mm.h b/include/linux/mm.h index ff7f6375f33f..2d8d22cf15ce 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1166,6 +1166,11 @@ static inline void update_hiwater_vm(struct mm_struct *mm) mm->hiwater_vm = mm->total_vm; } +static inline void reset_mm_hiwater_rss(struct mm_struct *mm) +{ + mm->hiwater_rss = get_mm_rss(mm); +} + static inline void setmax_mm_hiwater_rss(unsigned long *maxrss, struct mm_struct *mm) { -- cgit v1.2.3 From 4f1d2937133c62cd7d5228960e9cdfb175522072 Mon Sep 17 00:00:00 2001 From: Rom Lemarchand Date: Sat, 7 Mar 2015 09:33:41 -0800 Subject: Revert "Grants system server access to /proc//oom_adj for Android applications." This reverts commit aa3305f2ba5976a95637c69c63409fdf007e1414. Bug: 19636629 Change-Id: I8dcd6a12cc591cb410d6a799c7e875726495504f --- fs/proc/base.c | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 2d00fdde9345..c3834dad09b3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -139,12 +139,6 @@ struct pid_entry { NULL, &proc_single_file_operations, \ { .proc_show = show } ) -/* ANDROID is for special files in /proc. */ -#define ANDROID(NAME, MODE, OTYPE) \ - NOD(NAME, (S_IFREG|(MODE)), \ - &proc_##OTYPE##_inode_operations, \ - &proc_##OTYPE##_operations, {}) - /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. @@ -1006,35 +1000,6 @@ out: return err < 0 ? err : count; } -static int oom_adjust_permission(struct inode *inode, int mask) -{ - uid_t uid; - struct task_struct *p; - - p = get_proc_task(inode); - if(p) { - uid = task_uid(p); - put_task_struct(p); - } - - /* - * System Server (uid == 1000) is granted access to oom_adj of all - * android applications (uid > 10000) as and services (uid >= 1000) - */ - if (p && (current_fsuid() == 1000) && (uid >= 1000)) { - if (inode->i_mode >> 6 & mask) { - return 0; - } - } - - /* Fall back to default. */ - return generic_permission(inode, mask); -} - -static const struct inode_operations proc_oom_adj_inode_operations = { - .permission = oom_adjust_permission, -}; - static const struct file_operations proc_oom_adj_operations = { .read = oom_adj_read, .write = oom_adj_write, @@ -2732,7 +2697,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - ANDROID("oom_adj", S_IRUGO|S_IWUSR, oom_adj), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), -- cgit v1.2.3 From 7b4fff3195f2938fdb859471a8ffaa467ffc5167 Mon Sep 17 00:00:00 2001 From: Rom Lemarchand Date: Sat, 7 Mar 2015 09:38:05 -0800 Subject: proc: make oom adjustment files user read-only Make oom_adj and oom_score_adj user read-only. Bug: 19636629 Change-Id: I055bb172d5b4d3d856e25918f3c5de8edf31e4a3 Signed-off-by: Rom Lemarchand --- fs/proc/base.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index c3834dad09b3..b5553afa778e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2697,8 +2697,8 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), @@ -3053,8 +3053,8 @@ static const struct pid_entry tid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), - REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), - REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), + REG("oom_adj", S_IRUSR, proc_oom_adj_operations), + REG("oom_score_adj", S_IRUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), -- cgit v1.2.3 From 062a6b5efee1a4e97fae2569517d35af9ed99148 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Thu, 26 Feb 2015 13:47:41 +0000 Subject: dm-verity: Add modes and emit uevent on corrupted blocks Add a device specific mode to dm-verity for handling corrupted blocks: DM_VERITY_MODE_EIO is the default behavior, where reading a corrupted block results in -EIO. DM_VERITY_MODE_LOGGING only logs corrupted blocks, but does not block the read. DM_VERITY_MODE_RESTART calls kernel_restart when a corrupted block is discovered. Each mode sends a uevent to notify userspace of corruption and allow further recovery actions. Defaults to previous behavior, other modes can be enabled with an optional parameter added to the verity table. Change-Id: Ib72ae6ccb865594d28f3553bdcc5a40b1d7af390 Signed-off-by: Sami Tolvanen --- Documentation/device-mapper/verity.txt | 12 ++++- drivers/md/dm-verity.c | 98 ++++++++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 10 deletions(-) diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt index 9884681535ee..2929f6b1ccf1 100644 --- a/Documentation/device-mapper/verity.txt +++ b/Documentation/device-mapper/verity.txt @@ -10,7 +10,7 @@ Construction Parameters - + This is the type of the on-disk hash format. @@ -62,6 +62,16 @@ Construction Parameters The hexadecimal encoding of the salt value. + + Optional. The mode of operation. + + 0 is the normal mode of operation where a corrupted block will result in an + I/O error. + + 1 is logging mode where corrupted blocks are logged and a uevent is sent to + notify user space. + + Theory of operation =================== diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index b948fd864d45..301a1dd2a08b 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -18,20 +18,36 @@ #include #include +#include #include #define DM_MSG_PREFIX "verity" +#define DM_VERITY_ENV_LENGTH 42 +#define DM_VERITY_ENV_VAR_NAME "VERITY_ERR_BLOCK_NR" + #define DM_VERITY_IO_VEC_INLINE 16 #define DM_VERITY_MEMPOOL_SIZE 4 #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 #define DM_VERITY_MAX_LEVELS 63 +#define DM_VERITY_MAX_CORRUPTED_ERRS 100 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, S_IRUGO | S_IWUSR); +enum verity_mode { + DM_VERITY_MODE_EIO = 0, + DM_VERITY_MODE_LOGGING = 1, + DM_VERITY_MODE_RESTART = 2 +}; + +enum verity_block_type { + DM_VERITY_BLOCK_TYPE_DATA, + DM_VERITY_BLOCK_TYPE_METADATA +}; + struct dm_verity { struct dm_dev *data_dev; struct dm_dev *hash_dev; @@ -54,6 +70,8 @@ struct dm_verity { unsigned digest_size; /* digest size for the current hash algorithm */ unsigned shash_descsize;/* the size of temporary space for crypto */ int hash_failed; /* set to 1 if hash of any block failed */ + enum verity_mode mode; /* mode for handling verification errors */ + unsigned corrupted_errs;/* Number of errors for corrupted blocks */ mempool_t *vec_mempool; /* mempool of bio vector */ @@ -179,6 +197,54 @@ static void verity_hash_at_level(struct dm_verity *v, sector_t block, int level, *offset = idx << (v->hash_dev_block_bits - v->hash_per_block_bits); } +/* + * Handle verification errors. + */ +static int verity_handle_err(struct dm_verity *v, enum verity_block_type type, + unsigned long long block) +{ + char verity_env[DM_VERITY_ENV_LENGTH]; + char *envp[] = { verity_env, NULL }; + const char *type_str = ""; + struct mapped_device *md = dm_table_get_md(v->ti->table); + + if (v->corrupted_errs >= DM_VERITY_MAX_CORRUPTED_ERRS) + goto out; + + ++v->corrupted_errs; + + switch (type) { + case DM_VERITY_BLOCK_TYPE_DATA: + type_str = "data"; + break; + case DM_VERITY_BLOCK_TYPE_METADATA: + type_str = "metadata"; + break; + default: + BUG(); + } + + DMERR_LIMIT("%s: %s block %llu is corrupted", v->data_dev->name, + type_str, block); + + if (v->corrupted_errs == DM_VERITY_MAX_CORRUPTED_ERRS) + DMERR("%s: reached maximum errors", v->data_dev->name); + + snprintf(verity_env, DM_VERITY_ENV_LENGTH, "%s=%d,%llu", + DM_VERITY_ENV_VAR_NAME, type, block); + + kobject_uevent_env(&disk_to_dev(dm_disk(md))->kobj, KOBJ_CHANGE, envp); + +out: + if (v->mode == DM_VERITY_MODE_LOGGING) + return 0; + + if (v->mode == DM_VERITY_MODE_RESTART) + kernel_restart("dm-verity device corrupted"); + + return 1; +} + /* * Verify hash of a metadata block pertaining to the specified data block * ("block" argument) at a specified level ("level" argument). @@ -256,11 +322,13 @@ static int verity_verify_level(struct dm_verity_io *io, sector_t block, goto release_ret_r; } if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - DMERR_LIMIT("metadata block %llu is corrupted", - (unsigned long long)hash_block); v->hash_failed = 1; - r = -EIO; - goto release_ret_r; + + if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, + hash_block)) { + r = -EIO; + goto release_ret_r; + } } else aux->hash_verified = 1; } @@ -377,10 +445,11 @@ test_block_hash: return r; } if (unlikely(memcmp(result, io_want_digest(v, io), v->digest_size))) { - DMERR_LIMIT("data block %llu is corrupted", - (unsigned long long)(io->block + b)); v->hash_failed = 1; - return -EIO; + + if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, + io->block + b)) + return -EIO; } } BUG_ON(vector != io->io_vec_size); @@ -689,8 +758,8 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - if (argc != 10) { - ti->error = "Invalid argument count: exactly 10 arguments required"; + if (argc < 10 || argc > 11) { + ti->error = "Invalid argument count: 10-11 arguments required"; r = -EINVAL; goto bad; } @@ -811,6 +880,17 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) } } + if (argc > 10) { + if (sscanf(argv[10], "%d%c", &num, &dummy) != 1 || + num < DM_VERITY_MODE_EIO || + num > DM_VERITY_MODE_RESTART) { + ti->error = "Invalid mode"; + r = -EINVAL; + goto bad; + } + v->mode = num; + } + v->hash_per_block_bits = fls((1 << v->hash_dev_block_bits) / v->digest_size) - 1; -- cgit v1.2.3 From e48e8c45925185c02b23ae461671be29c91101d5 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Wed, 11 Mar 2015 14:32:24 -0700 Subject: mm: reorder can_do_mlock to fix audit denial A userspace call to mmap(MAP_LOCKED) may result in the successful locking of memory while also producing a confusing audit log denial. can_do_mlock checks capable and rlimit. If either of these return positive can_do_mlock returns true. The capable check leads to an LSM hook used by apparmour and selinux which produce the audit denial. Reordering so rlimit is checked first eliminates the denial on success, only recording a denial when the lock is unsuccessful as a result of the denial. Signed-off-by: Jeff Vander Stoep Acked-by: Nick Kralevich Cc: Jeff Vander Stoep Cc: Sasha Levin Cc: "Paul E. McKenney" Cc: Rik van Riel Cc: Vlastimil Babka Cc: Paul Cassella Signed-off-by: Andrew Morton --- mm/mlock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mlock.c b/mm/mlock.c index 33861c780070..dd8a0c325a46 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -23,10 +23,10 @@ int can_do_mlock(void) { - if (capable(CAP_IPC_LOCK)) - return 1; if (rlimit(RLIMIT_MEMLOCK) != 0) return 1; + if (capable(CAP_IPC_LOCK)) + return 1; return 0; } EXPORT_SYMBOL(can_do_mlock); -- cgit v1.2.3 From 4be5c6a45a26cdfa7f6ad4a3c01cb69781f37535 Mon Sep 17 00:00:00 2001 From: Amit Pundir Date: Fri, 13 Mar 2015 13:59:47 +0530 Subject: fs: ecryptfs: readdir: constify actor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit actor is a constant in dir_context struct and because of that we run into following build failure: ---------- fs/ecryptfs/file.c: In function ‘ecryptfs_readdir’: fs/ecryptfs/file.c:130:2: error: assignment of read-only member ‘actor’ make[2]: *** [fs/ecryptfs/file.o] Error 1 make[1]: *** [fs/ecryptfs] Error 2 make: *** [fs] Error 2 ---------- This fix is based on commit: b2497fc3057a([readdir] constify ->actor) Signed-off-by: Amit Pundir --- fs/ecryptfs/file.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 041379a646b3..faa26beb1e4e 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -116,18 +116,18 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) int rc; struct file *lower_file; struct inode *inode; - struct ecryptfs_getdents_callback buf; + struct ecryptfs_getdents_callback buf = { + .dirent = dirent, + .dentry = file->f_path.dentry, + .filldir = filldir, + .filldir_called = 0, + .entries_written = 0, + .ctx.actor = ecryptfs_filldir + }; lower_file = ecryptfs_file_to_lower(file); lower_file->f_pos = file->f_pos; inode = file_inode(file); - memset(&buf, 0, sizeof(buf)); - buf.dirent = dirent; - buf.dentry = file->f_path.dentry; - buf.filldir = filldir; - buf.filldir_called = 0; - buf.entries_written = 0; - buf.ctx.actor = ecryptfs_filldir; rc = iterate_dir(lower_file, &buf.ctx); file->f_pos = lower_file->f_pos; if (rc < 0) -- cgit v1.2.3