Monday, January 13, 2014

linux.kernel - 26 new messages in 7 topics - digest

linux.kernel
http://groups.google.com/group/linux.kernel?hl=en

linux.kernel@googlegroups.com

Today's topics:

* perf, tools: Enable printing the srcline in the history - 1 messages, 1
author
http://groups.google.com/group/linux.kernel/t/e9ad098f44690ff5?hl=en
* clk: samsung: exynos5250: Add CLK_IGNORE_UNUSED flag for the sysreg clock -
3 messages, 1 author
http://groups.google.com/group/linux.kernel/t/a53824587b1cf92d?hl=en
* rds: prevent dereference of a NULL device - 18 messages, 1 author
http://groups.google.com/group/linux.kernel/t/ef25c501937e80d8?hl=en
* mm/zswap: Check all pool pages instead of one pool pages - 1 messages, 1
author
http://groups.google.com/group/linux.kernel/t/81cd07f00a212f3a?hl=en
* x86, acpi memory hotplug, add parameter to disable memory hotplug - 1
messages, 1 author
http://groups.google.com/group/linux.kernel/t/fea2893681599c3e?hl=en
* x86, e820 disable ACPI Memory Hotplug if memory mapping is specified by user
[v2] - 1 messages, 1 author
http://groups.google.com/group/linux.kernel/t/4888d1ac18187fba?hl=en
* dma debug: introduce debug_dma_assert_idle() - 1 messages, 1 author
http://groups.google.com/group/linux.kernel/t/f63a9d0143e0c240?hl=en

==============================================================================
TOPIC: perf, tools: Enable printing the srcline in the history
http://groups.google.com/group/linux.kernel/t/e9ad098f44690ff5?hl=en
==============================================================================

== 1 of 1 ==
Date: Mon, Jan 13 2014 5:10 pm
From: Andi Kleen


From: Andi Kleen <ak@linux.intel.com>

For lbr-as-callgraph we need to see the line number in the history,
because many LBR entries can be in a single function, and just
showing the same function name many times is not useful.

When the history code is configured to sort by address, also try to
resolve the address to a file:srcline and display this in the browser.
If that doesn't work still display the address.

This can be also useful without LBRs for understanding which call in a large
function (or in which inlined function) called something else.

Contains fixes from Namhyung Kim

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
tools/perf/ui/browsers/hists.c | 15 ++++++++++++---
tools/perf/ui/stdio/hist.c | 16 +++++++++++++---
tools/perf/util/callchain.h | 1 +
tools/perf/util/machine.c | 2 +-
tools/perf/util/srcline.c | 6 ++++--
5 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index b720b92..060fffc 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -399,9 +399,18 @@ static char *callchain_list__sym_name(struct callchain_list *cl,
{
int printed;

- if (cl->ms.sym)
- printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name);
- else
+ if (cl->ms.sym) {
+ if (callchain_param.key == CCKEY_ADDRESS &&
+ cl->ms.map && !cl->srcline)
+ cl->srcline = get_srcline(cl->ms.map->dso,
+ map__rip_2objdump(cl->ms.map,
+ cl->ip));
+ if (cl->srcline)
+ printed = scnprintf(bf, bfsize, "%s %s",
+ cl->ms.sym->name, cl->srcline);
+ else
+ printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name);
+ } else
printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);

if (show_dso)
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 831fbb7..a494eed 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -56,9 +56,19 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain,
} else
ret += fprintf(fp, "%s", " ");
}
- if (chain->ms.sym)
- ret += fprintf(fp, "%s\n", chain->ms.sym->name);
- else
+ if (chain->ms.sym) {
+ if (callchain_param.key == CCKEY_ADDRESS &&
+ chain->ms.map)
+ chain->srcline = get_srcline(chain->ms.map->dso,
+ map__rip_2objdump(
+ chain->ms.map,
+ chain->ip));
+ if (chain->srcline)
+ ret += fprintf(fp, "%s %s\n",
+ chain->ms.sym->name, chain->srcline);
+ else
+ ret += fprintf(fp, "%s\n", chain->ms.sym->name);
+ } else
ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip);

return ret;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index a1a298a..7e74c1d 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -59,6 +59,7 @@ struct callchain_param {
struct callchain_list {
u64 ip;
struct map_symbol ms;
+ char *srcline;
struct list_head list;
};

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 084c80c..f839e6f 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1294,7 +1294,7 @@ static int add_callchain_ip(struct machine *machine,
return -EINVAL;
}

- return callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym);
+ return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym);
}

#define CHASHSZ 127
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 7e67879..680c02b 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -258,7 +258,7 @@ char *get_srcline(struct dso *dso, unsigned long addr)
const char *dso_name;

if (!dso->has_srcline)
- return SRCLINE_UNKNOWN;
+ goto out;

if (dso->symsrc_filename)
dso_name = dso->symsrc_filename;
@@ -289,7 +289,9 @@ out:
dso->has_srcline = 0;
dso__free_a2l(dso);
}
- return SRCLINE_UNKNOWN;
+ if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0)
+ return SRCLINE_UNKNOWN;
+ return srcline;
}

void free_srcline(char *srcline)
--
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/





==============================================================================
TOPIC: clk: samsung: exynos5250: Add CLK_IGNORE_UNUSED flag for the sysreg
clock
http://groups.google.com/group/linux.kernel/t/a53824587b1cf92d?hl=en
==============================================================================

== 1 of 3 ==
Date: Mon, Jan 13 2014 5:10 pm
From: Greg Kroah-Hartman


3.10-stable review patch. If anyone has any objections, please let me know.

------------------

From: Abhilash Kesavan <a.kesavan@samsung.com>

commit 2feed5aecf5f367b92bd6b6e92afe9e3de466907 upstream.

The sysreg (system register) generates control signals for various blocks
like disp1blk, i2c, mipi, usb etc. However, it gets disabled as an unused
clock at boot-up. This can lead to failures in operation of above blocks,
because they can not be configured properly if this clock is disabled.

Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
Acked-by: Mike Turquette <mturquette@linaro.org>
[t.figa: Updated patch description.]
Signed-off-by: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
drivers/clk/samsung/clk-exynos5250.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/clk/samsung/clk-exynos5250.c
+++ b/drivers/clk/samsung/clk-exynos5250.c
@@ -377,7 +377,8 @@ struct samsung_gate_clock exynos5250_gat
GATE(hsi2c2, "hsi2c2", "aclk66", GATE_IP_PERIC, 30, 0, 0),
GATE(hsi2c3, "hsi2c3", "aclk66", GATE_IP_PERIC, 31, 0, 0),
GATE(chipid, "chipid", "aclk66", GATE_IP_PERIS, 0, 0, 0),
- GATE(sysreg, "sysreg", "aclk66", GATE_IP_PERIS, 1, 0, 0),
+ GATE(sysreg, "sysreg", "aclk66",
+ GATE_IP_PERIS, 1, CLK_IGNORE_UNUSED, 0),
GATE(pmu, "pmu", "aclk66", GATE_IP_PERIS, 2, CLK_IGNORE_UNUSED, 0),
GATE(tzpc0, "tzpc0", "aclk66", GATE_IP_PERIS, 6, 0, 0),
GATE(tzpc1, "tzpc1", "aclk66", GATE_IP_PERIS, 7, 0, 0),


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 2 of 3 ==
Date: Mon, Jan 13 2014 5:10 pm
From: Greg Kroah-Hartman


3.10-stable review patch. If anyone has any objections, please let me know.

------------------

From: Nat Gurumoorthy <natg@google.com>

[ Upstream commit 388d3335575f4c056dcf7138a30f1454e2145cd8 ]

The new tg3 driver leaves REG_BASE_ADDR (PCI config offset 120)
uninitialized. From power on reset this register may have garbage in it. The
Register Base Address register defines the device local address of a
register. The data pointed to by this location is read or written using
the Register Data register (PCI config offset 128). When REG_BASE_ADDR has
garbage any read or write of Register Data Register (PCI 128) will cause the
PCI bus to lock up. The TCO watchdog will fire and bring down the system.

Signed-off-by: Nat Gurumoorthy <natg@google.com>
Acked-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/net/ethernet/broadcom/tg3.c | 3 +++
1 file changed, 3 insertions(+)

--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -16297,6 +16297,9 @@ static int tg3_get_invariants(struct tg3
/* Clear this out for sanity. */
tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0);

+ /* Clear TG3PCI_REG_BASE_ADDR to prevent hangs. */
+ tw32(TG3PCI_REG_BASE_ADDR, 0);
+
pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE,
&pci_state_reg);
if ((pci_state_reg & PCISTATE_CONV_PCI_MODE) == 0 &&


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 3 of 3 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.10-stable review patch. If anyone has any objections, please let me know.

------------------

From: Daniel Borkmann <dborkman@redhat.com>

[ Upstream commit 66e56cd46b93ef407c60adcac62cf33b06119d50 ]

Commit e40526cb20b5 introduced a cached dev pointer, that gets
hooked into register_prot_hook(), __unregister_prot_hook() to
update the device used for the send path.

We need to fix this up, as otherwise this will not work with
sockets created with protocol = 0, plus with sll_protocol = 0
passed via sockaddr_ll when doing the bind.

So instead, assign the pointer directly. The compiler can inline
these helper functions automagically.

While at it, also assume the cached dev fast-path as likely(),
and document this variant of socket creation as it seems it is
not widely used (seems not even the author of TX_RING was aware
of that in his reference example [1]). Tested with reproducer
from e40526cb20b5.

[1] http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap#Example

Fixes: e40526cb20b5 ("packet: fix use after free race in send path when dev is released")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Tested-by: Salam Noureddine <noureddine@aristanetworks.com>
Tested-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
Documentation/networking/packet_mmap.txt | 10 ++++
net/packet/af_packet.c | 65 +++++++++++++++++++------------
2 files changed, 50 insertions(+), 25 deletions(-)

--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -123,6 +123,16 @@ Transmission process is similar to captu
[shutdown] close() --------> destruction of the transmission socket and
deallocation of all associated resources.

+Socket creation and destruction is also straight forward, and is done
+the same way as in capturing described in the previous paragraph:
+
+ int fd = socket(PF_PACKET, mode, 0);
+
+The protocol can optionally be 0 in case we only want to transmit
+via this socket, which avoids an expensive call to packet_rcv().
+In this case, you also need to bind(2) the TX_RING with sll_protocol = 0
+set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example.
+
Binding the socket to your network interface is mandatory (with zero copy) to
know the header size of frames used in the circular buffer.

--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -237,6 +237,30 @@ struct packet_skb_cb {
static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);

+static struct net_device *packet_cached_dev_get(struct packet_sock *po)
+{
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(po->cached_dev);
+ if (likely(dev))
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ return dev;
+}
+
+static void packet_cached_dev_assign(struct packet_sock *po,
+ struct net_device *dev)
+{
+ rcu_assign_pointer(po->cached_dev, dev);
+}
+
+static void packet_cached_dev_reset(struct packet_sock *po)
+{
+ RCU_INIT_POINTER(po->cached_dev, NULL);
+}
+
/* register_prot_hook must be invoked with the po->bind_lock held,
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
@@ -246,12 +270,10 @@ static void register_prot_hook(struct so
struct packet_sock *po = pkt_sk(sk);

if (!po->running) {
- if (po->fanout) {
+ if (po->fanout)
__fanout_link(sk, po);
- } else {
+ else
dev_add_pack(&po->prot_hook);
- rcu_assign_pointer(po->cached_dev, po->prot_hook.dev);
- }

sock_hold(sk);
po->running = 1;
@@ -270,12 +292,11 @@ static void __unregister_prot_hook(struc
struct packet_sock *po = pkt_sk(sk);

po->running = 0;
- if (po->fanout) {
+
+ if (po->fanout)
__fanout_unlink(sk, po);
- } else {
+ else
__dev_remove_pack(&po->prot_hook);
- RCU_INIT_POINTER(po->cached_dev, NULL);
- }

__sock_put(sk);

@@ -2048,19 +2069,6 @@ static int tpacket_fill_skb(struct packe
return tp_len;
}

-static struct net_device *packet_cached_dev_get(struct packet_sock *po)
-{
- struct net_device *dev;
-
- rcu_read_lock();
- dev = rcu_dereference(po->cached_dev);
- if (dev)
- dev_hold(dev);
- rcu_read_unlock();
-
- return dev;
-}
-
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
struct sk_buff *skb;
@@ -2077,7 +2085,7 @@ static int tpacket_snd(struct packet_soc

mutex_lock(&po->pg_vec_lock);

- if (saddr == NULL) {
+ if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
@@ -2231,7 +2239,7 @@ static int packet_snd(struct socket *soc
* Get and verify the address.
*/

- if (saddr == NULL) {
+ if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
@@ -2440,6 +2448,8 @@ static int packet_release(struct socket

spin_lock(&po->bind_lock);
unregister_prot_hook(sk, false);
+ packet_cached_dev_reset(po);
+
if (po->prot_hook.dev) {
dev_put(po->prot_hook.dev);
po->prot_hook.dev = NULL;
@@ -2495,14 +2505,17 @@ static int packet_do_bind(struct sock *s

spin_lock(&po->bind_lock);
unregister_prot_hook(sk, true);
+
po->num = protocol;
po->prot_hook.type = protocol;
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);
- po->prot_hook.dev = dev;

+ po->prot_hook.dev = dev;
po->ifindex = dev ? dev->ifindex : 0;

+ packet_cached_dev_assign(po, dev);
+
if (protocol == 0)
goto out_unlock;

@@ -2615,7 +2628,8 @@ static int packet_create(struct net *net
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
po->num = proto;
- RCU_INIT_POINTER(po->cached_dev, NULL);
+
+ packet_cached_dev_reset(po);

sk->sk_destruct = packet_sock_destruct;
sk_refcnt_debug_inc(sk);
@@ -3369,6 +3383,7 @@ static int packet_notifier(struct notifi
sk->sk_error_report(sk);
}
if (msg == NETDEV_UNREGISTER) {
+ packet_cached_dev_reset(po);
po->ifindex = -1;
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/





==============================================================================
TOPIC: rds: prevent dereference of a NULL device
http://groups.google.com/group/linux.kernel/t/ef25c501937e80d8?hl=en
==============================================================================

== 1 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Sasha Levin <sasha.levin@oracle.com>

[ Upstream commit c2349758acf1874e4c2b93fe41d072336f1a31d0 ]

Binding might result in a NULL device, which is dereferenced
causing this BUG:

[ 1317.260548] BUG: unable to handle kernel NULL pointer dereference at 000000000000097
4
[ 1317.261847] IP: [<ffffffff84225f52>] rds_ib_laddr_check+0x82/0x110
[ 1317.263315] PGD 418bcb067 PUD 3ceb21067 PMD 0
[ 1317.263502] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
[ 1317.264179] Dumping ftrace buffer:
[ 1317.264774] (ftrace buffer empty)
[ 1317.265220] Modules linked in:
[ 1317.265824] CPU: 4 PID: 836 Comm: trinity-child46 Tainted: G W 3.13.0-rc4-
next-20131218-sasha-00013-g2cebb9b-dirty #4159
[ 1317.267415] task: ffff8803ddf33000 ti: ffff8803cd31a000 task.ti: ffff8803cd31a000
[ 1317.268399] RIP: 0010:[<ffffffff84225f52>] [<ffffffff84225f52>] rds_ib_laddr_check+
0x82/0x110
[ 1317.269670] RSP: 0000:ffff8803cd31bdf8 EFLAGS: 00010246
[ 1317.270230] RAX: 0000000000000000 RBX: ffff88020b0dd388 RCX: 0000000000000000
[ 1317.270230] RDX: ffffffff8439822e RSI: 00000000000c000a RDI: 0000000000000286
[ 1317.270230] RBP: ffff8803cd31be38 R08: 0000000000000000 R09: 0000000000000000
[ 1317.270230] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
[ 1317.270230] R13: 0000000054086700 R14: 0000000000a25de0 R15: 0000000000000031
[ 1317.270230] FS: 00007ff40251d700(0000) GS:ffff88022e200000(0000) knlGS:000000000000
0000
[ 1317.270230] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[ 1317.270230] CR2: 0000000000000974 CR3: 00000003cd478000 CR4: 00000000000006e0
[ 1317.270230] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1317.270230] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602
[ 1317.270230] Stack:
[ 1317.270230] 0000000054086700 5408670000a25de0 5408670000000002 0000000000000000
[ 1317.270230] ffffffff84223542 00000000ea54c767 0000000000000000 ffffffff86d26160
[ 1317.270230] ffff8803cd31be68 ffffffff84223556 ffff8803cd31beb8 ffff8800c6765280
[ 1317.270230] Call Trace:
[ 1317.270230] [<ffffffff84223542>] ? rds_trans_get_preferred+0x42/0xa0
[ 1317.270230] [<ffffffff84223556>] rds_trans_get_preferred+0x56/0xa0
[ 1317.270230] [<ffffffff8421c9c3>] rds_bind+0x73/0xf0
[ 1317.270230] [<ffffffff83e4ce62>] SYSC_bind+0x92/0xf0
[ 1317.270230] [<ffffffff812493f8>] ? context_tracking_user_exit+0xb8/0x1d0
[ 1317.270230] [<ffffffff8119313d>] ? trace_hardirqs_on+0xd/0x10
[ 1317.270230] [<ffffffff8107a852>] ? syscall_trace_enter+0x32/0x290
[ 1317.270230] [<ffffffff83e4cece>] SyS_bind+0xe/0x10
[ 1317.270230] [<ffffffff843a6ad0>] tracesys+0xdd/0xe2
[ 1317.270230] Code: 00 8b 45 cc 48 8d 75 d0 48 c7 45 d8 00 00 00 00 66 c7 45 d0 02 00
89 45 d4 48 89 df e8 78 49 76 ff 41 89 c4 85 c0 75 0c 48 8b 03 <80> b8 74 09 00 00 01 7
4 06 41 bc 9d ff ff ff f6 05 2a b6 c2 02
[ 1317.270230] RIP [<ffffffff84225f52>] rds_ib_laddr_check+0x82/0x110
[ 1317.270230] RSP <ffff8803cd31bdf8>
[ 1317.270230] CR2: 0000000000000974

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/rds/ib.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -338,7 +338,8 @@ static int rds_ib_laddr_check(__be32 add
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
/* due to this, we will claim to support iWARP devices unless we
check node_type. */
- if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA)
+ if (ret || !cm_id->device ||
+ cm_id->device->node_type != RDMA_NODE_IB_CA)
ret = -EADDRNOTAVAIL;

rdsdebug("addr %pI4 ret %d node type %d\n",


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 2 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Hannes Frederic Sowa <hannes@stressinduktion.org>

[ Upstream commit a3300ef4bbb1f1e33ff0400e1e6cf7733d988f4f ]

Brett Ciphery reported that new ipv6 addresses failed to get installed
because the addrconf generated dsts where counted against the dst gc
limit. We don't need to count those routes like we currently don't count
administratively added routes.

Because the max_addresses check enforces a limit on unbounded address
generation first in case someone plays with router advertisments, we
are still safe here.

Reported-by: Brett Ciphery <brett.ciphery@windriver.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/ipv6/route.c | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)

--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2114,15 +2114,11 @@ struct rt6_info *addrconf_dst_alloc(stru
{
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
- net->loopback_dev, 0);
+ net->loopback_dev, DST_NOCOUNT);
int err;

- if (!rt) {
- if (net_ratelimit())
- pr_warning("IPv6: Maximum number of routes reached,"
- " consider increasing route/max_size.\n");
+ if (!rt)
return ERR_PTR(-ENOMEM);
- }

in6_dev_hold(idev);



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 3 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Changli Gao <xiaosuo@gmail.com>

[ Upstream commit d323e92cc3f4edd943610557c9ea1bb4bb5056e8 ]

maxattr in genl_family should be used to save the max attribute
type, but not the max command type. Drop monitor doesn't support
any attributes, so we should leave it as zero.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/core/drop_monitor.c | 1 -
1 file changed, 1 deletion(-)

--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -61,7 +61,6 @@ static struct genl_family net_drop_monit
.hdrsize = 0,
.name = "NET_DM",
.version = 2,
- .maxattr = NET_DM_CMD_MAX,
};

static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 4 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Ben Segall <bsegall@google.com>

commit 927b54fccbf04207ec92f669dce6806848cbec7d upstream.

__start_cfs_bandwidth calls hrtimer_cancel while holding rq->lock,
waiting for the hrtimer to finish. However, if sched_cfs_period_timer
runs for another loop iteration, the hrtimer can attempt to take
rq->lock, resulting in deadlock.

Fix this by ensuring that cfs_b->timer_active is cleared only if the
_latest_ call to do_sched_cfs_period_timer is returning as idle. Then
__start_cfs_bandwidth can just call hrtimer_try_to_cancel and wait for
that to succeed or timer_active == 1.

Signed-off-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: pjt@google.com
Link: http://lkml.kernel.org/r/20131016181622.22647.16643.stgit@sword-of-the-dawn.mtv.corp.google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Chris J Arges <chris.j.arges@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
kernel/sched/fair.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1771,6 +1771,13 @@ static int do_sched_cfs_period_timer(str
if (idle)
goto out_unlock;

+ /*
+ * if we have relooped after returning idle once, we need to update our
+ * status as actually running, so that other cpus doing
+ * __start_cfs_bandwidth will stop trying to cancel us.
+ */
+ cfs_b->timer_active = 1;
+
__refill_cfs_bandwidth_runtime(cfs_b);

if (!throttled) {
@@ -2043,11 +2050,11 @@ void __start_cfs_bandwidth(struct cfs_ba
* (timer_active==0 becomes visible before the hrtimer call-back
* terminates). In either case we ensure that it's re-programmed
*/
- while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
+ while (unlikely(hrtimer_active(&cfs_b->period_timer)) &&
+ hrtimer_try_to_cancel(&cfs_b->period_timer) < 0) {
+ /* bounce the lock to allow do_sched_cfs_period_timer to run */
raw_spin_unlock(&cfs_b->lock);
- /* ensure cfs_b->lock is available while we wait */
- hrtimer_cancel(&cfs_b->period_timer);
-
+ cpu_relax();
raw_spin_lock(&cfs_b->lock);
/* if someone else restarted the timer then we're done */
if (cfs_b->timer_active)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 5 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Jason Wang <jasowang@redhat.com>

[ Upstream commit ce232ce01d61b184202bb185103d119820e1260c ]

macvtap_put_user() never return a value grater than iov length, this in fact
bypasses the truncated checking in macvtap_recvmsg(). Fix this by always
returning the size of packet plus the possible vlan header to let the trunca
checking work.

Cc: Vlad Yasevich <vyasevich@gmail.com>
Cc: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/net/macvtap.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)

--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -800,7 +800,7 @@ static ssize_t macvtap_put_user(struct m
int ret;
int vnet_hdr_len = 0;
int vlan_offset = 0;
- int copied;
+ int copied, total;

if (q->flags & IFF_VNET_HDR) {
struct virtio_net_hdr vnet_hdr;
@@ -815,7 +815,8 @@ static ssize_t macvtap_put_user(struct m
if (memcpy_toiovecend(iv, (void *)&vnet_hdr, 0, sizeof(vnet_hdr)))
return -EFAULT;
}
- copied = vnet_hdr_len;
+ total = copied = vnet_hdr_len;
+ total += skb->len;

if (!vlan_tx_tag_present(skb))
len = min_t(int, skb->len, len);
@@ -830,6 +831,7 @@ static ssize_t macvtap_put_user(struct m

vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
len = min_t(int, skb->len + VLAN_HLEN, len);
+ total += VLAN_HLEN;

copy = min_t(int, vlan_offset, len);
ret = skb_copy_datagram_const_iovec(skb, 0, iv, copied, copy);
@@ -847,10 +849,9 @@ static ssize_t macvtap_put_user(struct m
}

ret = skb_copy_datagram_const_iovec(skb, vlan_offset, iv, copied, len);
- copied += len;

done:
- return ret ? ret : copied;
+ return ret ? ret : total;
}

static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
@@ -904,7 +905,7 @@ static ssize_t macvtap_aio_read(struct k
}

ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
- ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
+ ret = min_t(ssize_t, ret, len);
if (ret > 0)
iocb->ki_pos = ret;
out:


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 6 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Wenliang Fan <fanwlexca@gmail.com>

[ Upstream commit e9db5c21d3646a6454fcd04938dd215ac3ab620a ]

The local variable 'bi' comes from userspace. If userspace passed a
large number to 'bi.data.calibrate', there would be an integer overflow
in the following line:
s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16;

Signed-off-by: Wenliang Fan <fanwlexca@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/net/hamradio/hdlcdrv.c | 2 ++
1 file changed, 2 insertions(+)

--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -571,6 +571,8 @@ static int hdlcdrv_ioctl(struct net_devi
case HDLCDRVCTL_CALIBRATE:
if(!capable(CAP_SYS_RAWIO))
return -EPERM;
+ if (bi.data.calibrate > INT_MAX / s->par.bitrate)
+ return -EINVAL;
s->hdlctx.calibrate = bi.data.calibrate * s->par.bitrate / 16;
return 0;



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 7 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>

[ Upstream commit e6ebc7f16ca1434a334647aa56399c546be4e64b ]

Signed-off-by: Zhi Yong Wu <wuzhy@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/net/macvtap.c | 2 ++
1 file changed, 2 insertions(+)

--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -905,6 +905,8 @@ static ssize_t macvtap_aio_read(struct k

ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
+ if (ret > 0)
+ iocb->ki_pos = ret;
out:
return ret;
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 8 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Russell King <rmk+kernel@arm.linux.org.uk>

commit 29c350bf28da333e41e30497b649fe335712a2ab upstream.

The array was missing the final entry for the undefined instruction
exception handler; this commit adds it.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
arch/arm/kernel/traps.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)

--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -37,7 +37,13 @@

#include "signal.h"

-static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
+static const char *handler[]= {
+ "prefetch abort",
+ "data abort",
+ "address exception",
+ "interrupt",
+ "undefined instruction",
+};

void *vectors_page;



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 9 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Linus Torvalds <torvalds@linux-foundation.org>

commit 26bef1318adc1b3a530ecc807ef99346db2aa8b0 upstream.

Before we do an EMMS in the AMD FXSAVE information leak workaround we
need to clear any pending exceptions, otherwise we trap with a
floating-point exception inside this code.

Reported-by: halfdog <me@halfdog.net>
Tested-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/CA%2B55aFxQnY_PCG_n4=0w-VG=YLXL-yr7oMxyy0WU2gCBAf3ydg@mail.gmail.com
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
arch/x86/include/asm/fpu-internal.h | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)

--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -266,12 +266,13 @@ static inline int restore_fpu_checking(s
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed
values. "m" is a random variable that should be in L1 */
- alternative_input(
- ASM_NOP8 ASM_NOP2,
- "emms\n\t" /* clear stack tags */
- "fildl %P[addr]", /* set F?P to defined value */
- X86_FEATURE_FXSAVE_LEAK,
- [addr] "m" (tsk->thread.fpu.has_fpu));
+ if (unlikely(static_cpu_has(X86_FEATURE_FXSAVE_LEAK))) {
+ asm volatile(
+ "fnclex\n\t"
+ "emms\n\t"
+ "fildl %P[addr]" /* set F?P to defined value */
+ : : [addr] "m" (tsk->thread.fpu.has_fpu));
+ }

return fpu_restore_checking(&tsk->thread.fpu);
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 10 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Daniel Borkmann <dborkman@redhat.com>

[ Upstream commit 4d231b76eef6c4a6bd9c96769e191517765942cb ]

While commit 30a584d944fb fixes datagram interface in LLC, a use
after free bug has been introduced for SOCK_STREAM sockets that do
not make use of MSG_PEEK.

The flow is as follow ...

if (!(flags & MSG_PEEK)) {
...
sk_eat_skb(sk, skb, false);
...
}
...
if (used + offset < skb->len)
continue;

... where sk_eat_skb() calls __kfree_skb(). Therefore, cache
original length and work on skb_len to check partial reads.

Fixes: 30a584d944fb ("[LLX]: SOCK_DGRAM interface fixes")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/llc/af_llc.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -716,7 +716,7 @@ static int llc_ui_recvmsg(struct kiocb *
unsigned long cpu_flags;
size_t copied = 0;
u32 peek_seq = 0;
- u32 *seq;
+ u32 *seq, skb_len;
unsigned long used;
int target; /* Read at least this many bytes */
long timeo;
@@ -814,6 +814,7 @@ static int llc_ui_recvmsg(struct kiocb *
}
continue;
found_ok_skb:
+ skb_len = skb->len;
/* Ok so how much can we use? */
used = skb->len - offset;
if (len < used)
@@ -846,7 +847,7 @@ static int llc_ui_recvmsg(struct kiocb *
}

/* Partial read */
- if (used + offset < skb->len)
+ if (used + offset < skb_len)
continue;
} while (len > 0);



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 11 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Sasha Levin <sasha.levin@oracle.com>

[ Upstream commit 12663bfc97c8b3fdb292428105dd92d563164050 ]

unix_dgram_recvmsg() will hold the readlock of the socket until recv
is complete.

In the same time, we may try to setsockopt(SO_PEEK_OFF) which will hang until
unix_dgram_recvmsg() will complete (which can take a while) without allowing
us to break out of it, triggering a hung task spew.

Instead, allow set_peek_off to fail, this way userspace will not hang.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
include/linux/net.h | 2 +-
net/core/sock.c | 2 +-
net/unix/af_unix.c | 8 ++++++--
3 files changed, 8 insertions(+), 4 deletions(-)

--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -215,7 +215,7 @@ struct proto_ops {
int offset, size_t size, int flags);
ssize_t (*splice_read)(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, unsigned int flags);
- void (*set_peek_off)(struct sock *sk, int val);
+ int (*set_peek_off)(struct sock *sk, int val);
};

#define DECLARE_SOCKADDR(type, dst, src) \
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -795,7 +795,7 @@ set_rcvbuf:

case SO_PEEK_OFF:
if (sock->ops->set_peek_off)
- sock->ops->set_peek_off(sk, val);
+ ret = sock->ops->set_peek_off(sk, val);
else
ret = -EOPNOTSUPP;
break;
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -524,13 +524,17 @@ static int unix_seqpacket_sendmsg(struct
static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
struct msghdr *, size_t, int);

-static void unix_set_peek_off(struct sock *sk, int val)
+static int unix_set_peek_off(struct sock *sk, int val)
{
struct unix_sock *u = unix_sk(sk);

- mutex_lock(&u->readlock);
+ if (mutex_lock_interruptible(&u->readlock))
+ return -EINTR;
+
sk->sk_peek_off = val;
mutex_unlock(&u->readlock);
+
+ return 0;
}




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 12 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>

commit b6328a6b7ba57fc84c38248f6f0e387e1170f1a8 upstream.

Commit 4dcfa60071b3d23f0181f27d8519f12e37cefbb9 ("ARM: DMA-API: better
handing of DMA masks for coherent allocations") added an additional
check to the coherent DMA mask that results in an error when the mask is
larger than what dma_addr_t can address.

Set the LCDC coherent DMA mask to DMA_BIT_MASK(32) instead of ~0 to fix
the problem.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
arch/arm/mach-shmobile/board-mackerel.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -422,7 +422,7 @@ static struct platform_device lcdc_devic
.resource = lcdc_resources,
.dev = {
.platform_data = &lcdc_info,
- .coherent_dma_mask = ~0,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};

@@ -498,7 +498,7 @@ static struct platform_device hdmi_lcdc_
.id = 1,
.dev = {
.platform_data = &hdmi_lcdc_info,
- .coherent_dma_mask = ~0,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
},
};



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 13 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>

[ Upstream commit 18fc25c94eadc52a42c025125af24657a93638c0 ]

After congestion update on a local connection, when rds_ib_xmit returns
less bytes than that are there in the message, rds_send_xmit calls
back rds_ib_xmit with an offset that causes BUG_ON(off & RDS_FRAG_SIZE)
to trigger.

For a 4Kb PAGE_SIZE rds_ib_xmit returns min(8240,4096)=4096 when actually
the message contains 8240 bytes. rds_send_xmit thinks there is more to send
and calls rds_ib_xmit again with a data offset "off" of 4096-48(rds header)
=4048 bytes thus hitting the BUG_ON(off & RDS_FRAG_SIZE) [RDS_FRAG_SIZE=4k].

The commit 6094628bfd94323fc1cea05ec2c6affd98c18f7f
"rds: prevent BUG_ON triggering on congestion map updates" introduced
this regression. That change was addressing the triggering of a different
BUG_ON in rds_send_xmit() on PowerPC architecture with 64Kbytes PAGE_SIZE:
BUG_ON(ret != 0 &&
conn->c_xmit_sg == rm->data.op_nents);
This was the sequence it was going through:
(rds_ib_xmit)
/* Do not send cong updates to IB loopback */
if (conn->c_loopback
&& rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
}
rds_ib_xmit returns 8240
rds_send_xmit:
c_xmit_data_off = 0 + 8240 - 48 (rds header accounted only the first time)
= 8192
c_xmit_data_off < 65536 (sg->length), so calls rds_ib_xmit again
rds_ib_xmit returns 8240
rds_send_xmit:
c_xmit_data_off = 8192 + 8240 = 16432, calls rds_ib_xmit again
and so on (c_xmit_data_off 24672,32912,41152,49392,57632)
rds_ib_xmit returns 8240
On this iteration this sequence causes the BUG_ON in rds_send_xmit:
while (ret) {
tmp = min_t(int, ret, sg->length - conn->c_xmit_data_off);
[tmp = 65536 - 57632 = 7904]
conn->c_xmit_data_off += tmp;
[c_xmit_data_off = 57632 + 7904 = 65536]
ret -= tmp;
[ret = 8240 - 7904 = 336]
if (conn->c_xmit_data_off == sg->length) {
conn->c_xmit_data_off = 0;
sg++;
conn->c_xmit_sg++;
BUG_ON(ret != 0 &&
conn->c_xmit_sg == rm->data.op_nents);
[c_xmit_sg = 1, rm->data.op_nents = 1]

What the current fix does:
Since the congestion update over loopback is not actually transmitted
as a message, all that rds_ib_xmit needs to do is let the caller think
the full message has been transmitted and not return partial bytes.
It will return 8240 (RDS_CONG_MAP_BYTES+48) when PAGE_SIZE is 4Kb.
And 64Kb+48 when page size is 64Kb.

Reported-by: Josh Hunt <joshhunt00@gmail.com>
Tested-by: Honggang Li <honli@redhat.com>
Acked-by: Bang Nguyen <bang.nguyen@oracle.com>
Signed-off-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
net/rds/ib_send.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)

--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -552,9 +552,8 @@ int rds_ib_xmit(struct rds_connection *c
&& rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
scat = &rm->data.op_sg[sg];
- ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
- ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
- return ret;
+ ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length);
+ return sizeof(struct rds_header) + ret;
}

/* FIXME we may overallocate here */


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 14 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Vlad Yasevich <vyasevic@redhat.com>

[ Upstream commit 006da7b07bc4d3a7ffabad17cf639eec6849c9dc ]

Currently macvlan will count received packets after calling each
vlans receive handler. Macvtap attempts to count the packet
yet again when the user reads the packet from the tap socket.
This code doesn't do this consistently either. Remove the
counting from macvtap and let only macvlan count received
packets.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/net/macvtap.c | 7 -------
1 file changed, 7 deletions(-)

--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -797,7 +797,6 @@ static ssize_t macvtap_put_user(struct m
const struct sk_buff *skb,
const struct iovec *iv, int len)
{
- struct macvlan_dev *vlan;
int ret;
int vnet_hdr_len = 0;
int vlan_offset = 0;
@@ -851,12 +850,6 @@ static ssize_t macvtap_put_user(struct m
copied += len;

done:
- rcu_read_lock_bh();
- vlan = rcu_dereference_bh(q->vlan);
- if (vlan)
- macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0);
- rcu_read_unlock_bh();
-
return ret ? ret : copied;
}



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 15 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Nat Gurumoorthy <natg@google.com>

[ Upstream commit 388d3335575f4c056dcf7138a30f1454e2145cd8 ]

The new tg3 driver leaves REG_BASE_ADDR (PCI config offset 120)
uninitialized. From power on reset this register may have garbage in it. The
Register Base Address register defines the device local address of a
register. The data pointed to by this location is read or written using
the Register Data register (PCI config offset 128). When REG_BASE_ADDR has
garbage any read or write of Register Data Register (PCI 128) will cause the
PCI bus to lock up. The TCO watchdog will fire and bring down the system.

Signed-off-by: Nat Gurumoorthy <natg@google.com>
Acked-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/net/ethernet/broadcom/tg3.c | 3 +++
1 file changed, 3 insertions(+)

--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14671,6 +14671,9 @@ static int __devinit tg3_get_invariants(
/* Clear this out for sanity. */
tw32(TG3PCI_MEM_WIN_BASE_ADDR, 0);

+ /* Clear TG3PCI_REG_BASE_ADDR to prevent hangs. */
+ tw32(TG3PCI_REG_BASE_ADDR, 0);
+
pci_read_config_dword(tp->pdev, TG3PCI_PCISTATE,
&pci_state_reg);
if ((pci_state_reg & PCISTATE_CONV_PCI_MODE) == 0 &&


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/




== 16 of 18 ==
Date: Mon, Jan 13 2014 5:20 pm
From: Greg Kroah-Hartman


3.4-stable review patch. If anyone has any objections, please let me know.

------------------

From: Ben Segall <bsegall@google.com>

commit 1ee14e6c8cddeeb8a490d7b54cd9016e4bb900b4 upstream.

When we transition cfs_bandwidth_used to false, any currently
throttled groups will incorrectly return false from cfs_rq_throttled.
While tg_set_cfs_bandwidth will unthrottle them eventually, currently
running code (including at least dequeue_task_fair and
distribute_cfs_runtime) will cause errors.

Fix this by turning off cfs_bandwidth_used only after unthrottling all
cfs_rqs.

Tested: toggle bandwidth back and forth on a loaded cgroup. Caused
crashes in minutes without the patch, hasn't crashed with it.

Signed-off-by: Ben Segall <bsegall@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: pjt@google.com
Link: http://lkml.kernel.org/r/20131016181611.22647.80365.stgit@sword-of-the-dawn.mtv.corp.google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Chris J Arges <chris.j.arges@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

---
kernel/sched/core.c | 9 ++++++++-
kernel/sched/fair.c | 16 +++++++++-------
kernel/sched/sched.h | 3 ++-
3 files changed, 19 insertions(+), 9 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7906,7 +7906,12 @@ static int tg_set_cfs_bandwidth(struct t

runtime_enabled = quota != RUNTIME_INF;
runtime_was_enabled = cfs_b->quota != RUNTIME_INF;
- account_cfs_bandwidth_used(runtime_enabled, runtime_was_enabled);
+ /*
+ * If we need to toggle cfs_bandwidth_used, off->on must occur
+ * before making related changes, and on->off must occur afterwards
+ */
+ if (runtime_enabled && !runtime_was_enabled)
+ cfs_bandwidth_usage_inc();
raw_spin_lock_irq(&cfs_b->lock);
cfs_b->period = ns_to_ktime(period);
cfs_b->quota = quota;
@@ -7932,6 +7937,8 @@ static int tg_set_cfs_bandwidth(struct t
unthrottle_cfs_rq(cfs_rq);
raw_spin_unlock_irq(&rq->lock);
}
+ if (runtime_was_enabled && !runtime_enabled)
+ cfs_bandwidth_usage_dec();
out_unlock:
mutex_unlock(&cfs_constraints_mutex);

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1393,13 +1393,14 @@ static inline bool cfs_bandwidth_used(vo
return static_key_false(&__cfs_bandwidth_used);
}

-void account_cfs_bandwidth_used(int enabled, int was_enabled)
+void cfs_bandwidth_usage_inc(void)
{
- /* only need to count groups transitioning between enabled/!enabled */
- if (enabled && !was_enabled)
- static_key_slow_inc(&__cfs_bandwidth_used);
- else if (!enabled && was_enabled)
- static_key_slow_dec(&__cfs_bandwidth_used);
+ static_key_slow_inc(&__cfs_bandwidth_used);
+}
+
+void cfs_bandwidth_usage_dec(void)
+{
+ static_key_slow_dec(&__cfs_bandwidth_used);
}
#else /* HAVE_JUMP_LABEL */
static bool cfs_bandwidth_used(void)
@@ -1407,7 +1408,8 @@ static bool cfs_bandwidth_used(void)
return true;
}

-void account_cfs_bandwidth_used(int enabled, int was_enabled) {}
+void cfs_bandwidth_usage_inc(void) {}
+void cfs_bandwidth_usage_dec(void) {}

0 Comments:

Post a Comment

Subscribe to Post Comments [Atom]

<< Home


Real Estate