Monday, February 15, 2010

linux.kernel - 26 new messages in 16 topics - digest

linux.kernel
http://groups.google.com/group/linux.kernel?hl=en

linux.kernel@googlegroups.com

Today's topics:

* mfd: Use completion interrupt for WM831x AUXADC - 1 messages, 1 author
http://groups.google.com/group/linux.kernel/t/0ddef51aad48724c?hl=en
* register long sp asm("r1") incorrect - 3 messages, 2 authors
http://groups.google.com/group/linux.kernel/t/407001dca048cc73?hl=en
* new nmi_watchdog using perf events - 1 messages, 1 author
http://groups.google.com/group/linux.kernel/t/7476d1dbefa8fe59?hl=en
* Blackfin: initial tracehook support - 1 messages, 1 author
http://groups.google.com/group/linux.kernel/t/5497cb48daecd717?hl=en
* P4 PMU early draft - 2 messages, 2 authors
http://groups.google.com/group/linux.kernel/t/cbcaa116bfdc609d?hl=en
* oops in uevent_helper [was: mmotm 2010-01-13-12-17 uploaded] - 1 messages, 1
author
http://groups.google.com/group/linux.kernel/t/0c0e978077e9facf?hl=en
* drm - 2 messages, 2 authors
http://groups.google.com/group/linux.kernel/t/c6cc765865fa7caa?hl=en
* tracehook: add some self tests - 3 messages, 2 authors
http://groups.google.com/group/linux.kernel/t/073978c29c9761d6?hl=en
* Staging: IIO: New ABI V2 - 1 messages, 1 author
http://groups.google.com/group/linux.kernel/t/10d582fa37e7bb44?hl=en
* ext4: number of blocks for fiemap - 1 messages, 1 author
http://groups.google.com/group/linux.kernel/t/5916399150a7aefb?hl=en
* Much higher wakeups for "<kernel IPI> : Rescheduling interrupts" since 2.6.
32.2 - 4 messages, 1 author
http://groups.google.com/group/linux.kernel/t/e304e94ab5c0997d?hl=en
* x86: get rid of the insane TIF_ABI_PENDING bit - 2 messages, 2 authors
http://groups.google.com/group/linux.kernel/t/5ce48c218f3fa6fa?hl=en
* X11 is black after resume from s2ram if my T400 was previous in docking
station before - 1 messages, 1 author
http://groups.google.com/group/linux.kernel/t/09ac7a70d02b0696?hl=en
* of/gpio: Introduce of_put_gpio(), add ref counting for OF GPIO chips - 1
messages, 1 author
http://groups.google.com/group/linux.kernel/t/174aef7bb6383d0d?hl=en
* kernel BUG at fs/dcache.c:670 +lvm +md +ext3 - 1 messages, 1 author
http://groups.google.com/group/linux.kernel/t/7f06f0a4f84df84e?hl=en
* Is it supposed to be ok to call del_gendisk while userspace is frozen? - 1
messages, 1 author
http://groups.google.com/group/linux.kernel/t/a9255c9e086f9196?hl=en

==============================================================================
TOPIC: mfd: Use completion interrupt for WM831x AUXADC
http://groups.google.com/group/linux.kernel/t/0ddef51aad48724c?hl=en
==============================================================================

== 1 of 1 ==
Date: Mon, Feb 15 2010 12:00 pm
From: Mark Brown


Use the completion interrupt generated by the device rather than
polling for conversions to complete. As a backup we still check
the status of the AUXADC if we don't get a completion, mostly for
systems that don't have the WM831x interrupt infrastructure hooked
up.

Also reduce the timeout for completion of conversions to 5ms from
the previous 10ms, the lower timeout should be sufficient.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
drivers/mfd/wm831x-core.c | 36 +++++++++++++++++++++++++++++-------
include/linux/mfd/wm831x/core.h | 2 ++
2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 4b2021a..2c93679 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -321,7 +321,6 @@ EXPORT_SYMBOL_GPL(wm831x_set_bits);
*/
int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input)
{
- int tries = 10;
int ret, src;

mutex_lock(&wm831x->auxadc_lock);
@@ -349,13 +348,14 @@ int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input)
goto disable;
}

- do {
- msleep(1);
+ /* Ignore the result to allow us to soldier on without IRQ hookup */
+ wait_for_completion_timeout(&wm831x->auxadc_done, msecs_to_jiffies(5));

- ret = wm831x_reg_read(wm831x, WM831X_AUXADC_CONTROL);
- if (ret < 0)
- ret = WM831X_AUX_CVT_ENA;
- } while ((ret & WM831X_AUX_CVT_ENA) && --tries);
+ ret = wm831x_reg_read(wm831x, WM831X_AUXADC_CONTROL);
+ if (ret < 0) {
+ dev_err(wm831x->dev, "AUXADC status read failed: %d\n", ret);
+ goto disable;
+ }

if (ret & WM831X_AUX_CVT_ENA) {
dev_err(wm831x->dev, "Timed out reading AUXADC\n");
@@ -390,6 +390,15 @@ out:
}
EXPORT_SYMBOL_GPL(wm831x_auxadc_read);

+static irqreturn_t wm831x_auxadc_irq(int irq, void *irq_data)
+{
+ struct wm831x *wm831x = irq_data;
+
+ complete(&wm831x->auxadc_done);
+
+ return IRQ_HANDLED;
+}
+
/**
* wm831x_auxadc_read_uv: Read a voltage from the WM831x AUXADC
*
@@ -1411,6 +1420,7 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
mutex_init(&wm831x->io_lock);
mutex_init(&wm831x->key_lock);
mutex_init(&wm831x->auxadc_lock);
+ init_completion(&wm831x->auxadc_done);
dev_set_drvdata(wm831x->dev, wm831x);

ret = wm831x_reg_read(wm831x, WM831X_PARENT_ID);
@@ -1508,6 +1518,16 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
if (ret != 0)
goto err;

+ if (wm831x->irq_base) {
+ ret = request_threaded_irq(wm831x->irq_base +
+ WM831X_IRQ_AUXADC_DATA,
+ NULL, wm831x_auxadc_irq, 0,
+ "auxadc", wm831x);
+ if (ret < 0)
+ dev_err(wm831x->dev, "AUXADC IRQ request failed: %d\n",
+ ret);
+ }
+
/* The core device is up, instantiate the subdevices. */
switch (parent) {
case WM8310:
@@ -1578,6 +1598,8 @@ static void wm831x_device_exit(struct wm831x *wm831x)
{
wm831x_otp_exit(wm831x);
mfd_remove_devices(wm831x->dev);
+ if (wm831x->irq_base)
+ free_irq(wm831x->irq_base + WM831X_IRQ_AUXADC_DATA, wm831x);
wm831x_irq_exit(wm831x);
kfree(wm831x);
}
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index 5184b79..a95d2da 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -15,6 +15,7 @@
#ifndef __MFD_WM831X_CORE_H__
#define __MFD_WM831X_CORE_H__

+#include <linux/completion.h>
#include <linux/interrupt.h>

/*
@@ -257,6 +258,7 @@ struct wm831x {
int num_gpio;

struct mutex auxadc_lock;
+ struct completion auxadc_done;

/* The WM831x has a security key blocking access to certain
* registers. The mutex is taken by the accessors for locking
--
1.6.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

==============================================================================
TOPIC: register long sp asm("r1") incorrect
http://groups.google.com/group/linux.kernel/t/407001dca048cc73?hl=en
==============================================================================

== 1 of 3 ==
Date: Mon, Feb 15 2010 12:10 pm
From: Benjamin Herrenschmidt


On Mon, 2010-02-15 at 08:34 +0100, Pavel Machek wrote:
> > On Tue, 2010-02-09 at 16:24 +0100, Pavel Machek wrote:
> > > ...according to gcc docs, sp should be global, or placement in
> > > register is not guaranteed (except at asm boundaries, but there
> are
> > > none).
> >
> > Sorry I'm not sure I grok what you mean.
>
> Well, according to gcc doscs and my experience, local "register int
> __asm()" variables only work by accident (or not at all).

Hrm... we definitely rely on that for our thread_info() access, and so
far it has worked well for us, but I'll poke our gcc folks just in case.

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


== 2 of 3 ==
Date: Mon, Feb 15 2010 12:30 pm
From: Pavel Machek


On Tue 2010-02-16 06:59:52, Benjamin Herrenschmidt wrote:
> On Mon, 2010-02-15 at 08:34 +0100, Pavel Machek wrote:
> > > On Tue, 2010-02-09 at 16:24 +0100, Pavel Machek wrote:
> > > > ...according to gcc docs, sp should be global, or placement in
> > > > register is not guaranteed (except at asm boundaries, but there
> > are
> > > > none).
> > >
> > > Sorry I'm not sure I grok what you mean.
> >
> > Well, according to gcc doscs and my experience, local "register int
> > __asm()" variables only work by accident (or not at all).
>
> Hrm... we definitely rely on that for our thread_info() access, and so
> far it has worked well for us, but I'll poke our gcc folks just in case.

Thanks, and let me know about any results.
Pavel
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


== 3 of 3 ==
Date: Mon, Feb 15 2010 1:10 pm
From: Benjamin Herrenschmidt


On Mon, 2010-02-15 at 21:28 +0100, Pavel Machek wrote:
> On Tue 2010-02-16 06:59:52, Benjamin Herrenschmidt wrote:
> > On Mon, 2010-02-15 at 08:34 +0100, Pavel Machek wrote:
> > > > On Tue, 2010-02-09 at 16:24 +0100, Pavel Machek wrote:
> > > > > ...according to gcc docs, sp should be global, or placement in
> > > > > register is not guaranteed (except at asm boundaries, but there
> > > are
> > > > > none).
> > > >
> > > > Sorry I'm not sure I grok what you mean.
> > >
> > > Well, according to gcc doscs and my experience, local "register int
> > > __asm()" variables only work by accident (or not at all).
> >
> > Hrm... we definitely rely on that for our thread_info() access, and so
> > far it has worked well for us, but I'll poke our gcc folks just in case.
>
> Thanks, and let me know about any results.

All the gcc folks I talked to say something along the lines that there
is no way in hell it doesn't work :-)

It's true that most other use of it we have are global scope (local_paca
in r13, glibc use of r2/r13, etc...) afaik, but since r1 itself is the
stack pointer always, I think they pretty much guarantee it works.

I'm CCing a couple of experts just to be sure.

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

==============================================================================
TOPIC: new nmi_watchdog using perf events
http://groups.google.com/group/linux.kernel/t/7476d1dbefa8fe59?hl=en
==============================================================================

== 1 of 1 ==
Date: Mon, Feb 15 2010 12:10 pm
From: Robert Richter


On 12.02.10 18:12:47, Stephane Eranian wrote:
> Don,
>
> On Fri, Feb 12, 2010 at 5:59 PM, Don Zickus <dzickus@redhat.com> wrote:
> > On Fri, Feb 12, 2010 at 05:12:38PM +0100, Stephane Eranian wrote:
> >> Don,
> >>
> >> How is this new NMI watchdog code going to work when you also have OProfile
> >> enabled in your kernel?
> >>
> >> Today, perf_event disables the NMI watchdog while there is at least one event.
> >> By releasing the PMU registers, it also allows for Oprofile to work.
> >>
> >> But now with this new NMI watchdog code, perf_event never releases the PMU.
> >> Thus, I suspect Oprofile will not work anymore, unless the NMI watchdog is
> >> explicitly disabled. Up until now OProfile could co-exist with the NMI watchdog.
> >
> > You are right.  Originally when I read the code I thought perf_event just
> > grabbed all the PMUs in reserve_pmc_init().  But I see that only happens
> > when someone actually creates a PERF_TYPE_HARDWARE event, which the new
> > nmi watchdog does.  Those PMUs only get released when the event is
> > destroyed which my new code only does when the cpu disappears.
> >
> > So yeah, I have effectively blocked oprofile from working.  I can change
> > my code such that when you disable the nmi_watchdog, you can release the
> > PMUs and let oprofile work.
> >
> > But then I am curious, considering that perf and oprofile do the same
> > thing, how much longer do we let competing subsystems control the same
> > hardware?  I thought the point of the perf_event subsystem was to have a
> > proper framework on top of the PMUs such that anyone who wants to use it
> > just registers themselves, which is what the new nmi_watchdog is doing.

There is the perfctr reservation framework what is used by all
subsystems. Perf reserves all counters if there is one event actively
running. This is ok as long you use perf from the userspace for
profiling. Nobody uses 2 different profilers at the same time. But if
the counters are also for implementing in-kernel features such as a
watchdog that is enabled all the time, perf must be modified to only
allocate those counters that are actually needed, and events may not
be scheduled on counters that are already reserved.

> > I can add code that allows oprofile and the new nmi watchdog to coexist,
> > but things get a little ugly to maintain.  Just wondering what the
> > gameplan is here?

There is no longer kernel feature implementation for oprofile. But it
will be still in the kernel for a while until we can completely switch
to perf. Perf is improving very fast, compared to the ongoing
development the implementation effort for coexistence is small. So I
think we all can spend some time to also improve the counter
reservation code.

> I believe OProfile should eventually be removed from the kernel. I suspect
> much of the functionalities it needs are already provided by perf_events.
> But that does not mean the OProfile user level tool must disappear. There is
> a very large user community. I think it could and should be ported to use
> perf_events instead. Given that the Oprofile users only interact through
> opcontrol, opreport, opannotate and such, they never "see" the actual kernel
> API. Thus by re-targeting the scripts, this should be mostly transparent to
> end-users.

I think, porting the oprofile userland to work on top of a performance
library (libpapi or libpfm) would be the cleanest solution. Alternativly
we could also port the kernel part to use the in-kernel perf api.

>
> But for now, I believe the most practical solution is to release the perf_event
> event when you disable the NMI watchdog. That would at least provide a
> way to run OProfile.

This solution is fine to me. The current implemenation also has some
limitations for oprofile if the watchdog is enabled.

-Robert

--
Advanced Micro Devices, Inc.
Operating System Research Center
email: robert.richter@amd.com

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

==============================================================================
TOPIC: Blackfin: initial tracehook support
http://groups.google.com/group/linux.kernel/t/5497cb48daecd717?hl=en
==============================================================================

== 1 of 1 ==
Date: Mon, Feb 15 2010 12:10 pm
From: Roland McGrath


> OK, this should be doable. are there any guidelines for what should
> be in a specific regset ? the Blackfin processor does not have a FPU,
> so the only set i have defined atm is the "general" set and that is
> exactly the same as the current set of ptrace registers. this is also
> what the current PTRACE_{G,S}ETREGS operates on (struct pt_regs).

For most machines, the decision was already made implicitly in the formats
used in ELF core dumps (e.g. elf_gregset_t). If you've never had ELF core
dumps, then you are not constrained by any existing userland compatibility.
The fundamental guideline is that you should use the "natural" layouts and
divisions for your arch. Usually what that means is fairly clear to people
well-versed in the particular arch. When you have a PTRACE_GETREGS format
and there is nothing particularly wrong with that layout choice, then that
is the obvious thing to use as the user_regset layout for NT_PRSTATUS.

> there are more pseudo regs as required by FDPIC, but they arent in the
> pt_regs layout ...

IMHO these do not belong in a regset at all. Like the name says, the
regset is for registers. If something is not really an arch-specific
detail, I don't think it belongs in user_regset.

> i believe the single step exception is taken first and then the
> syscall exception, but i'll have to do some hardware tests on the
> hardware to confirm

The ptrace-tests suite (see http://sourceware.org/systemtap/wiki/utrace/tests)
has some tests for various corners of these semantics. You'll have to port
some of the asm bits to blackfin, but that is probably easier and more
complete than what you'd try yourself from scratch.

> i fixed the Blackfin code to do NR checking after tracing and now
> `strace` shows the bad syscall

Good!

> as for register munging, i didnt realize this was accepted practice
> and something that should actively be supported.

Indeed. Try "strace -f" and see that its fork/clone tracing code relies
on fiddling syscall argument registers.

This stop (i.e. inside tracehook_report_syscall_entry) is the one and only
place where syscall_set_arguments() can validly be used.

Moreover, the fundamental rule is that at every tracehook_report_* call
site (aka ptrace stop), full access (including modification) via
user_regset interfaces (aka ptrace) should work and have the same effect as
if userland had set those register values normally before entering the
kernel (or just after exiting it, depending on the site in question).

> i had been toying around locally with optimizing some of the syscall
> paths by breaking this behavior, so i'll add some comments to prevent any
> further mucking here.

Other machines do have such fast paths for syscalls.
They take the slow paths when TIF_SYSCALL_TRACE is set.
Note that you can still have fast paths when TIF_SYSCALL_AUDIT
is set--those don't have to permit modification.
Note also that syscall_get_arguments() works anywhere.

> the Blackfin code when traced now does:
> call tracehook_report_syscall_entry(regs)
> reload syscall NR and all 6 args from regs
> if syscall NR is valid, call it
> if syscall NR is invalid, set return value to -ENOSYS
> store return value into regs
> call tracehook_report_syscall_exit(regs)

That's correct. Also do whatever reloading is required after so that the
return value register and other registers seen in userland have whatever
values may have been set inside tracehook_report_syscall_exit().


Thanks,
Roland
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

==============================================================================
TOPIC: P4 PMU early draft
http://groups.google.com/group/linux.kernel/t/cbcaa116bfdc609d?hl=en
==============================================================================

== 1 of 2 ==
Date: Mon, Feb 15 2010 12:20 pm
From: Robert Richter


On 08.02.10 21:45:04, Cyrill Gorcunov wrote:
> Hi all,
>
> first of all the patches are NOT for any kind of inclusion. It's not
> ready yet. More likely I'm asking for glance review, ideas, criticism.
>
> The main problem in implementing P4 PMU is that it has much more
> restrictions for event to MSR mapping. So to fit into current
> perf_events model I made the following:
>
> 1) Event representation. P4 uses a tuple of ESCR+CCCR+COUNTER
> as an "event". Since every CCCR register mapped directly to
> counter itself and ESCR and CCCR uses only 32bits of their
> appropriate MSRs, I decided to use "packed" config in
> in hw_perf_event::config. So that upper 31 bits are ESCR
> and lower 32 bits are CCCR values. The bit 64 is for HT flag.
>
> So the base idea here is to pack into 64bit hw_perf_event::config
> as much info as possible.
>
> Due to difference in bitfields I needed to implement
> hw_perf_event::config helper which unbind hw_perf_event::config field
> from processor specifics and allow to use it in P4 PMU.

If we introduce model specific configuration, we should put more model
specific code in here and then remove

u64 (*raw_event)(u64);

in struct x86_pmu.

> 3) I've started unbinding x86_schedule_events into per x86_pmu::schedule_events
> and there I hit hardness in binding HT bit. Have to think...

Instead of implemting x86_pmu.schedule_events() you should rather
abstract x86_pmu_enable(). This will be much more flexible to
implement other model spcific features.

-Robert

--
Advanced Micro Devices, Inc.
Operating System Research Center
email: robert.richter@amd.com

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/


== 2 of 2 ==
Date: Mon, Feb 15 2010 12:40 pm
From: Cyrill Gorcunov


On Mon, Feb 15, 2010 at 09:11:02PM +0100, Robert Richter wrote:
> On 08.02.10 21:45:04, Cyrill Gorcunov wrote:
> > Hi all,
> >
> > first of all the patches are NOT for any kind of inclusion. It's not
> > ready yet. More likely I'm asking for glance review, ideas, criticism.
> >
> > The main problem in implementing P4 PMU is that it has much more
> > restrictions for event to MSR mapping. So to fit into current
> > perf_events model I made the following:
> >
> > 1) Event representation. P4 uses a tuple of ESCR+CCCR+COUNTER
> > as an "event". Since every CCCR register mapped directly to
> > counter itself and ESCR and CCCR uses only 32bits of their
> > appropriate MSRs, I decided to use "packed" config in
> > in hw_perf_event::config. So that upper 31 bits are ESCR
> > and lower 32 bits are CCCR values. The bit 64 is for HT flag.
> >
> > So the base idea here is to pack into 64bit hw_perf_event::config
> > as much info as possible.
> >
> > Due to difference in bitfields I needed to implement
> > hw_perf_event::config helper which unbind hw_perf_event::config field
> > from processor specifics and allow to use it in P4 PMU.
>
> If we introduce model specific configuration, we should put more model
> specific code in here and then remove
>
> u64 (*raw_event)(u64);
>
> in struct x86_pmu.
>

It seems we should still support raw_events, if I understand the idea
right -- raw_events could be used for say OProfile (if we are going
to substitute oprofile with perfevents subsystem). So the only
difference with other architectural events is that raw_event
need to be "packed" before being set into.

Putting/packing more specific code into config could make code
even more complex. Dunno Robert...

> > 3) I've started unbinding x86_schedule_events into per x86_pmu::schedule_events
> > and there I hit hardness in binding HT bit. Have to think...
>
> Instead of implemting x86_pmu.schedule_events() you should rather
> abstract x86_pmu_enable(). This will be much more flexible to
> implement other model spcific features.

But I would need collect events and so on -- ie code duplication
will be there. Or you mean something else?

>
> -Robert
>
> --
> Advanced Micro Devices, Inc.
> Operating System Research Center
> email: robert.richter@amd.com
>
-- Cyrill
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

==============================================================================
TOPIC: oops in uevent_helper [was: mmotm 2010-01-13-12-17 uploaded]
http://groups.google.com/group/linux.kernel/t/0c0e978077e9facf?hl=en
==============================================================================

== 1 of 1 ==
Date: Mon, Feb 15 2010 12:20 pm
From: "Paul E. McKenney"


On Fri, Feb 12, 2010 at 12:06:24PM -0500, Neil Horman wrote:
> On Thu, Feb 11, 2010 at 09:27:08PM -0800, Andrew Morton wrote:
> > On Fri, 12 Feb 2010 06:21:26 +0100 Andi Kleen <andi@firstfloor.org> wrote:
> >
> > > > urgh, must I? That trashes Neil's
> > > > kmod-add-init-function-to-usermodehelper.patch and
> > > > kmod-replace-call_usermodehelper_pipe-with-use-of-umh-init-function-and-resolve-limit.patch
> > > > and probably requires repairing other stuff and sets the testing status
> > > > back to "square one".
> > > >
> > > > If you have patches queued, please make the time to support them!
> > >
> > > Ok, understood. I'll try to look into it today.
> >
> > Ta. As I mentioned to Neil, if it looks serious then let's shelve it
> > all and revisit for 2.6.35.
> >
> > > You want incrementals?
> >
> > If convenient, please. Otherwise we can drop--and-remerge.
> >
> >
>
>
> Ok, this fixes the oops Jiri reported for me. Its been tested by me, but only
> minimally, and my rcu-foo is not the greatest, so through reviews appreciated.
> The patch is incremental against the latest mmotm as of this AM.

A few questions interspersed below...

Thanx, Paul

> Thanks!
> Neil
>
>
> Fix up remaining references to uevent_helper to play nice with Andi's
> uevent_helper/rcu changes.
>
> Some changes were made recently which modified uevent_helper to be an rcu
> protected pointer, rather than a static char array. This has led to a few
> missed points in which the sysfs path still assumed that:
> 1) the uevent_helper symbol could still be accessed safely without
> rcu_dereference
> 2) that the sysfs path could copy data to that pointer safely.
>
> I've fixed this by chaging the sysfs path so that it duplicates the string on
> uevent_helper_store, and freeing it (only if it doesn't point to the
> CONFIG_DEFAULT_UEVENT_HELPER string), in a call_rcu post-quiescent point. I've
> also fixed up the remaining references to the uevent_helper pointers to use
> rcu_dereference.
>
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
>
>
> kernel/ksysfs.c | 38 ++++++++++++++++++++++++++++++++++++--
> lib/kobject_uevent.c | 4 +++-
> 2 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
> index 21fe3c4..66d1e5b 100644
> --- a/kernel/ksysfs.c
> +++ b/kernel/ksysfs.c
> @@ -37,19 +37,53 @@ KERNEL_ATTR_RO(uevent_seqnum);
> static ssize_t uevent_helper_show(struct kobject *kobj,
> struct kobj_attribute *attr, char *buf)
> {
> - return sprintf(buf, "%s\n", uevent_helper);
> + return sprintf(buf, "%s\n", rcu_dereference(uevent_helper));
> }
> +
> +struct uevent_helper_rcu {
> + char *oldptr;
> + struct rcu_head rcu;
> +};
> +
> +static void free_old_uevent_ptr(struct rcu_head *list)
> +{
> + struct uevent_helper_rcu *ptr;
> + char *dfl = CONFIG_UEVENT_HELPER_PATH;

Given that you kfree() something that might be equal to dfl, I am
hoping that the CONFIG_UEVENT_HELPER_PATH macro expands to something
that kfree() can do something with...

Or did you mean to put a "return;" in the then-clause of the "if"
statement below?

> + ptr = container_of(list, struct uevent_helper_rcu, rcu);
> + if (ptr->oldptr && (ptr->oldptr != dfl))
> + kfree(ptr->oldptr);
> +
> + kfree(ptr);
> +}
> +
> static ssize_t uevent_helper_store(struct kobject *kobj,
> struct kobj_attribute *attr,
> const char *buf, size_t count)
> {
> + char *kbuf;
> + struct uevent_helper_rcu *old;
> +
> if (count+1 > UEVENT_HELPER_PATH_LEN)
> return -ENOENT;
> - memcpy(uevent_helper, buf, count);
> + kbuf = kstrndup(buf, UEVENT_HELPER_PATH_LEN, GFP_KERNEL);
> + if (!kbuf)
> + return -ENOMEM;
> uevent_helper[count] = '\0';
> if (count && uevent_helper[count-1] == '\n')
> uevent_helper[count-1] = '\0';
> + old = kmalloc(sizeof(struct uevent_helper_rcu), GFP_KERNEL);
> + if (!old)
> + goto out_free;
> +
> + old->oldptr = rcu_dereference(uevent_helper);
> + rcu_assign_pointer(uevent_helper, kbuf);

Some lock protects this? Or does something else prevent multiple
instances of uevent_helper_store() from executing concurrently?

> + call_rcu(&old->rcu, free_old_uevent_ptr);
> +
> return count;
> +
> +out_free:
> + kfree(kbuf);
> + return -ENOMEM;
> }
> KERNEL_ATTR_RW(uevent_helper);
>

0 Comments:

Post a Comment

Subscribe to Post Comments [Atom]

<< Home


Real Estate