Discussion:
fq codel panic: ifq_is_serialized and MP interrupt
Sebastien Marie
2017-05-27 05:28:26 UTC
Permalink
Hi,

I am experiencing often the following panic:

panic: kernel diagnostic assertion "ifq_is_serialized(ifq)" failed: ../sys/net/ifq.c, line 394

while running with GENERIC.MP patched with mikeb@ diff:

if_start(struct ifnet *ifp)
{
KASSERT(ifp->if_qstart == if_qstart_compat);
- if_qstart_compat(&ifp->if_snd);
+ ifq_start(&ifp->if_snd);
}


I report it in another thread, as I am unsure if the problem is exactly
correlated: the ddb backtrace showed a network interrupt inside
ifq_serialize().

ddb{0}> trace
db_enter(x,x,x,x,x) at db_enter+0x7
panic(x,x,x,x,18a) at panic+0x71
__assert(x,x,18a,x,bbd5) at __assert+0x2e
ifq_mfreeml(x,x,x,2bbb,x) at ifq_mfreeml+0x6a
fqcodel_deq_begin(x,x,x,x,x) at fqcodel_decbe+0x186
ifq_deb_begin(x,x,f0,0,x) at ifq_deb_begin+0x37
ifq_dequeue(x,x,x,x,x) at ifq_dequeue+0x17
bce_start(x,20,1000000,x,200282,bbd5,0) at bce_start+0x11f
bce_intr(x,x,x,2bbb,x) at bce_intr+0xc3
Xintr_ioapic3() at Xintr_ioapic3+0x66
--- interrupt ---
ifq_serialize(x,x,2,x,x) at ifq_serialize+0x1
ether_output(x,x,x,x,0) at ether_output+0x1d2
ip_output(x,0,x,800,0) at ip_output+0x821
tcp_output(x,x,x,x,0) at tcp_output+0x81d
tcp_usrreq(x,8,0,0,0,0) at tcp_usrreq+0x633
soreceive(x,0,x,0,0) at soreceive+0x2da
soo_read(x,x,x,x,0) at soo_read+0x43
dofilereadv(x,3,x,x,1) at dofilereadv+0x1c5
sys_read(x,x,x,0,x) at sys_read+0x8f
syscall() at syscall+0x250
--- syscall (number 2081103872) ---
0x6:
ddb{0}>



the panic seems to occurs at tcp connection (ssh session incoming)
whereas it is already doing some network activity (here it was updating
using pkg_add).


the host was running with:
queue fq on bce0 flows 1024 default

# ifconfig

lo0: flags=8049<UP,LOOPBACK,RUNNING,MULTICAST> mtu 32768
index 4 priority 0 llprio 3
groups: lo
inet6 ::1 prefixlen 128
inet6 fe80::1%lo0 prefixlen 64 scopeid 0x4
inet 127.0.0.1 netmask 0xff000000
wpi0: flags=8802<BROADCAST,SIMPLEX,MULTICAST> mtu 1500
lladdr 00:13:02:2e:8b:46
index 1 priority 4 llprio 3
groups: wlan
media: IEEE802.11 autoselect
status: no network
ieee80211: nwid ""
bce0: flags=208a43<UP,BROADCAST,RUNNING,ALLMULTI,SIMPLEX,MULTICAST,AUTOCONF6> mtu 1500
lladdr 00:15:c5:0b:8b:7a
index 2 priority 0 llprio 3
groups: egress
media: Ethernet autoselect (100baseTX full-duplex)
status: active
inet 192.168.92.11 netmask 0xffffff00 broadcast 192.168.92.255
inet6 fe80::215:c5ff:fe0b:8b7a%bce0 prefixlen 64 scopeid 0x2
inet6 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a prefixlen 64 autoconf pltime 604784 vltime 2591984
inet6 2001:41d0:fe39:c05c:5057:c993:3ee2:599a prefixlen 64 autoconf autoconfprivacy pltime 85934 vltime 604710
enc0: flags=0<>
index 3 priority 0 llprio 3
groups: enc
status: active
pppoe0: flags=8810<POINTOPOINT,SIMPLEX,MULTICAST> mtu 1492
index 5 priority 0 llprio 3
dev: state: initial
sid: 0x0 PADI retries: 0 PADR retries: 0
groups: pppoe
pflog0: flags=141<UP,RUNNING,PROMISC> mtu 33172
index 6 priority 0 llprio 3
groups: pflog


(note: the pppoe0 is here for testing. it is only created and put in
down state).


# dmesg
OpenBSD 6.1-current (GENERIC.MP) #0: Thu May 25 14:00:16 CEST 2017
***@bert.local:/home/openbsd/src/sys/arch/i386/compile/GENERIC.MP
cpu0: Genuine Intel(R) CPU T2400 @ 1.83GHz ("GenuineIntel" 686-class) 1.83 GHz
cpu0: FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,NXE,SSE3,MWAIT,VMX,EST,TM2,xTPR,PDCM,PERF,SENSOR
real mem = 2137354240 (2038MB)
avail mem = 2083602432 (1987MB)
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: date 06/13/07, BIOS32 rev. 0 @ 0xffa10, SMBIOS rev. 2.4 @ 0xf7980 (44 entries)
bios0: vendor Dell Inc. version "A17" date 06/13/2007
bios0: Dell Inc. MM061
acpi0 at bios0: rev 0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP HPET APIC MCFG SLIC BOOT SSDT
acpi0: wakeup devices LID_(S3) PBTN(S4) MBTN(S5) PCI0(S3) USB0(S0) USB1(S0) USB2(S0) USB3(S0) EHCI(S0) AZAL(S3) PCIE(S4) RP01(S4) RP02(S3) RP03(S3) RP04(S3) RP05(S3) [...]
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpihpet0 at acpi0: 14318179 Hz
acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 166MHz
cpu0: mwait min=64, max=64, C-substates=0.2.2.2.2, IBE
cpu1 at mainbus0: apid 1 (application processor)
cpu1: Genuine Intel(R) CPU T2400 @ 1.83GHz ("GenuineIntel" 686-class) 1.83 GHz
cpu1: FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,NXE,SSE3,MWAIT,VMX,EST,TM2,xTPR,PDCM,PERF,SENSOR
ioapic0 at mainbus0: apid 2 pa 0xfec00000, version 20, 24 pins
acpimcfg0 at acpi0 addr 0xf0000000, bus 0-63
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus -1 (AGP_)
acpiprt2 at acpi0: bus 3 (PCIE)
acpiprt3 at acpi0: bus 11 (RP01)
acpiprt4 at acpi0: bus -1 (RP02)
acpiprt5 at acpi0: bus -1 (RP03)
acpiprt6 at acpi0: bus 12 (RP04)
acpiprt7 at acpi0: bus -1 (RP05)
acpiprt8 at acpi0: bus -1 (RP06)
acpicpu0 at acpi0: !C3(***@85 ***@0x1015), !C2(***@1 ***@0x1014), C1(***@1 halt), PSS
acpicpu1 at acpi0: !C3(***@85 ***@0x1015), !C2(***@1 ***@0x1014), C1(***@1 halt), PSS
acpitz0 at acpi0: critical temperature is 126 degC
"*pnp0c14" at acpi0 not configured
acpiac0 at acpi0: AC unit online
acpibat0 at acpi0: BAT0 model " DELLUD2649" serial 878 type LION oem "Sanyo"
acpibtn0 at acpi0: LID_
acpibtn1 at acpi0: PBTN
acpibtn2 at acpi0: SBTN
"PNP0C32" at acpi0 not configured
"PNP0F13" at acpi0 not configured
"PNP0303" at acpi0 not configured
acpivideo0 at acpi0: VID_
acpivideo1 at acpi0: VID_
acpivout0 at acpivideo1: LCD_
acpivideo2 at acpi0: VID2
bios0: ROM list: 0xc0000/0xe800! 0xce800/0x1800
cpu0: Enhanced SpeedStep 1829 MHz: speeds: 1833, 1333, 1000 MHz
pci0 at mainbus0 bus 0: configuration mode 1 (bios)
pchb0 at pci0 dev 0 function 0 "Intel 82945GM Host" rev 0x03
inteldrm0 at pci0 dev 2 function 0 "Intel 82945GM Video" rev 0x03
drm0 at inteldrm0
intagp0 at inteldrm0
agp0 at intagp0: aperture at 0xd0000000, size 0x10000000
inteldrm0: apic 2 int 16
inteldrm0: 1280x800, 32bpp
wsdisplay0 at inteldrm0 mux 1: console (std, vt100 emulation)
wsdisplay0: screen 1-5 added (std, vt100 emulation)
"Intel 82945GM Video" rev 0x03 at pci0 dev 2 function 1 not configured
azalia0 at pci0 dev 27 function 0 "Intel 82801GB HD Audio" rev 0x01: msi
azalia0: codecs: Sigmatel STAC9200, Conexant/0x2bfa, using Sigmatel STAC9200
audio0 at azalia0
ppb0 at pci0 dev 28 function 0 "Intel 82801GB PCIE" rev 0x01: apic 2 int 16
pci1 at ppb0 bus 11
wpi0 at pci1 dev 0 function 0 "Intel PRO/Wireless 3945ABG" rev 0x02: msi, MoW2, address 00:13:02:2e:8b:46
ppb1 at pci0 dev 28 function 3 "Intel 82801GB PCIE" rev 0x01: apic 2 int 19
pci2 at ppb1 bus 12
uhci0 at pci0 dev 29 function 0 "Intel 82801GB USB" rev 0x01: apic 2 int 20
uhci1 at pci0 dev 29 function 1 "Intel 82801GB USB" rev 0x01: apic 2 int 21
uhci2 at pci0 dev 29 function 2 "Intel 82801GB USB" rev 0x01: apic 2 int 22
uhci3 at pci0 dev 29 function 3 "Intel 82801GB USB" rev 0x01: apic 2 int 23
ehci0 at pci0 dev 29 function 7 "Intel 82801GB USB" rev 0x01: apic 2 int 20
usb0 at ehci0: USB revision 2.0
uhub0 at usb0 configuration 1 interface 0 "Intel EHCI root hub" rev 2.00/1.00 addr 1
ppb2 at pci0 dev 30 function 0 "Intel 82801BAM Hub-to-PCI" rev 0xe1
pci3 at ppb2 bus 3
bce0 at pci3 dev 0 function 0 "Broadcom BCM4401B1" rev 0x02: apic 2 int 17, address 00:15:c5:0b:8b:7a
bmtphy0 at bce0 phy 1: BCM4401 10/100baseTX PHY, rev. 0
"Ricoh 5C832 Firewire" rev 0x00 at pci3 dev 1 function 0 not configured
sdhc0 at pci3 dev 1 function 1 "Ricoh 5C822 SD/MMC" rev 0x19: apic 2 int 18
sdhc0: SDHC 1.0, 33 MHz base clock
sdmmc0 at sdhc0: 4-bit, sd high-speed, mmc high-speed
"Ricoh 5C843 MMC" rev 0x01 at pci3 dev 1 function 2 not configured
"Ricoh 5C592 Memory Stick" rev 0x0a at pci3 dev 1 function 3 not configured
"Ricoh 5C852 xD" rev 0x05 at pci3 dev 1 function 4 not configured
ichpcib0 at pci0 dev 31 function 0 "Intel 82801GBM LPC" rev 0x01: PM disabled
pciide0 at pci0 dev 31 function 2 "Intel 82801GBM SATA" rev 0x01: DMA, channel 0 wired to compatibility, channel 1 wired to compatibility
wd0 at pciide0 channel 0 drive 0: <ST96812AS>
wd0: 16-sector PIO, LBA48, 57241MB, 117231408 sectors
wd0(pciide0:0:0): using PIO mode 4, Ultra-DMA mode 5
atapiscsi0 at pciide0 channel 1 drive 0
scsibus1 at atapiscsi0: 2 targets
cd0 at scsibus1 targ 0 lun 0: <_NEC, DVD+-RW ND-6650A, 102C> ATAPI 5/cdrom removable
cd0(pciide0:1:0): using PIO mode 4, Ultra-DMA mode 2
ichiic0 at pci0 dev 31 function 3 "Intel 82801GB SMBus" rev 0x01: apic 2 int 17
iic0 at ichiic0
spdmem0 at iic0 addr 0x50: 1GB DDR2 SDRAM non-parity PC2-5300CL5 SO-DIMM
spdmem1 at iic0 addr 0x52: 1GB DDR2 SDRAM non-parity PC2-5300CL5 SO-DIMM
usb1 at uhci0: USB revision 1.0
uhub1 at usb1 configuration 1 interface 0 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb2 at uhci1: USB revision 1.0
uhub2 at usb2 configuration 1 interface 0 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb3 at uhci2: USB revision 1.0
uhub3 at usb3 configuration 1 interface 0 "Intel UHCI root hub" rev 1.00/1.00 addr 1
usb4 at uhci3: USB revision 1.0
uhub4 at usb4 configuration 1 interface 0 "Intel UHCI root hub" rev 1.00/1.00 addr 1
isa0 at ichpcib0
isadma0 at isa0
pckbc0 at isa0 port 0x60/5 irq 1 irq 12
pckbd0 at pckbc0 (kbd slot)
wskbd0 at pckbd0: console keyboard, using wsdisplay0
pms0 at pckbc0 (aux slot)
wsmouse0 at pms0 mux 0
pms0: Synaptics touchpad, firmware 6.2
pcppi0 at isa0 port 0x61
spkr0 at pcppi0
npx0 at isa0 port 0xf0/16: reported by CPUID; using exception 16
vmm0 at mainbus0: VMX
vscsi0 at root
scsibus2 at vscsi0: 256 targets
softraid0 at root
scsibus3 at softraid0: 256 targets
root on wd0a (055d82c92ce5e61f.a) swap on wd0b dump on wd0b
--
Sebastien Marie
Sebastien Marie
2017-05-27 18:06:15 UTC
Permalink
Post by Sebastien Marie
Hi,
panic: kernel diagnostic assertion "ifq_is_serialized(ifq)" failed: ../sys/net/ifq.c, line 394
if_start(struct ifnet *ifp)
{
KASSERT(ifp->if_qstart == if_qstart_compat);
- if_qstart_compat(&ifp->if_snd);
+ ifq_start(&ifp->if_snd);
}
mikeb@, I don't reproduce if I backout the local diff.

it seems bce(4) doesn't like it.
--
Sebastien Marie
Mike Belopuhov
2017-05-28 20:45:34 UTC
Permalink
Post by Sebastien Marie
Post by Sebastien Marie
Hi,
panic: kernel diagnostic assertion "ifq_is_serialized(ifq)" failed: ../sys/net/ifq.c, line 394
if_start(struct ifnet *ifp)
{
KASSERT(ifp->if_qstart == if_qstart_compat);
- if_qstart_compat(&ifp->if_snd);
+ ifq_start(&ifp->if_snd);
}
it seems bce(4) doesn't like it.
Indeed, a lot of things won't like this since xxx_start may be called
directly by the driver. To handle this situation we need to make sure
that IFQ_DEQUEUE that is called by these xxx_start routines provides a
compatible interface. Please try the diff below. If this solves your
issue, I'd like to get it in w/o assertions unless there are objections.


---
sys/net/if.c | 3 ++-
sys/net/if_var.h | 2 +-
sys/net/ifq.c | 26 ++++++++++++++++++++++++++
sys/net/ifq.h | 1 +
4 files changed, 30 insertions(+), 2 deletions(-)

diff --git sys/net/if.c sys/net/if.c
index 34fd1aa84d7..acea3ae53ea 100644
--- sys/net/if.c
+++ sys/net/if.c
@@ -623,12 +623,13 @@ if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)

void
if_start(struct ifnet *ifp)
{
KASSERT(ifp->if_qstart == if_qstart_compat);
- if_qstart_compat(&ifp->if_snd);
+ ifq_start(&ifp->if_snd);
}
+
void
if_qstart_compat(struct ifqueue *ifq)
{
struct ifnet *ifp = ifq->ifq_if;
int s;
diff --git sys/net/if_var.h sys/net/if_var.h
index 14e118bcbef..9df7c8f4d80 100644
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -255,11 +255,11 @@ do { \
(err) = ifq_enqueue((ifq), (m)); \
} while (/* CONSTCOND */0)

#define IFQ_DEQUEUE(ifq, m) \
do { \
- (m) = ifq_dequeue(ifq); \
+ (m) = ifq_dequeue_compat(ifq); \
} while (/* CONSTCOND */0)

#define IFQ_PURGE(ifq) \
do { \
(void)ifq_purge(ifq); \
diff --git sys/net/ifq.c sys/net/ifq.c
index d37be87f444..0a49567f81d 100644
--- sys/net/ifq.c
+++ sys/net/ifq.c
@@ -337,10 +337,36 @@ ifq_dequeue(struct ifqueue *ifq)
ifq_deq_commit(ifq, m);

return (m);
}

+struct mbuf *
+ifq_dequeue_compat(struct ifqueue *ifq)
+{
+ struct mbuf *m;
+ void *serializer;
+
+ KERNEL_ASSERT_LOCKED();
+ KASSERT(ifq->ifq_serializer == NULL ||
+ ifq->ifq_serializer == curcpu());
+
+ serializer = ifq->ifq_serializer;
+ ifq->ifq_serializer = curcpu();
+
+ m = ifq_deq_begin(ifq);
+ if (m == NULL)
+ return (NULL);
+
+ ifq_deq_commit(ifq, m);
+
+ ml_purge(&ifq->ifq_free);
+
+ ifq->ifq_serializer = serializer;
+
+ return (m);
+}
+
unsigned int
ifq_purge(struct ifqueue *ifq)
{
struct mbuf_list ml = MBUF_LIST_INITIALIZER();
unsigned int rv;
diff --git sys/net/ifq.h sys/net/ifq.h
index 29b89c38b23..3a7fde55ca2 100644
--- sys/net/ifq.h
+++ sys/net/ifq.h
@@ -380,10 +380,11 @@ void ifq_destroy(struct ifqueue *);
int ifq_enqueue(struct ifqueue *, struct mbuf *);
struct mbuf *ifq_deq_begin(struct ifqueue *);
void ifq_deq_commit(struct ifqueue *, struct mbuf *);
void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
struct mbuf *ifq_dequeue(struct ifqueue *);
+struct mbuf *ifq_dequeue_compat(struct ifqueue *);
void ifq_mfreem(struct ifqueue *, struct mbuf *);
void ifq_mfreeml(struct ifqueue *, struct mbuf_list *);
unsigned int ifq_purge(struct ifqueue *);
void *ifq_q_enter(struct ifqueue *, const struct ifq_ops *);
void ifq_q_leave(struct ifqueue *, void *);
--
2.13.0
Sebastien Marie
2017-05-29 04:57:40 UTC
Permalink
Post by Mike Belopuhov
Post by Sebastien Marie
it seems bce(4) doesn't like it.
Indeed, a lot of things won't like this since xxx_start may be called
directly by the driver. To handle this situation we need to make sure
that IFQ_DEQUEUE that is called by these xxx_start routines provides a
compatible interface. Please try the diff below. If this solves your
issue, I'd like to get it in w/o assertions unless there are objections.
with or without codel queue: the remote connection seems really *slow*.

bert (patched) and clyde (unpatched) hsots are connected on same LAN
segment.

clyde:~$ ping6 bert
PING bert (2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a): 56 data bytes
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=0 hlim=64 time=1002.655 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=1 hlim=64 time=0.763 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=2 hlim=64 time=1000.330 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=3 hlim=64 time=1000.388 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=4 hlim=64 time=1000.396 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=5 hlim=64 time=1000.373 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=6 hlim=64 time=0.432 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=7 hlim=64 time=1000.349 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=8 hlim=64 time=0.480 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=9 hlim=64 time=228.896 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=10 hlim=64 time=20.386 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=11 hlim=64 time=0.504 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=12 hlim=64 time=0.582 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=13 hlim=64 time=864.730 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=14 hlim=64 time=1000.343 ms
^C
--- bert ping statistics ---
16 packets transmitted, 15 packets received, 6.2% packet loss
round-trip min/avg/max/std-dev = 0.432/541.440/1002.655/476.996 ms



Surprising thing (for me): having tcpbench running in background between
the two hosts reduces the lag (but still high on a LAN).

clyde:~$ ping6 bert
PING bert (2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a): 56 data bytes
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=0 hlim=64 time=1.393 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=1 hlim=64 time=19.961 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=2 hlim=64 time=23.404 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=3 hlim=64 time=26.457 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=4 hlim=64 time=29.159 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=5 hlim=64 time=30.862 ms
^C
--- bert ping statistics ---
6 packets transmitted, 6 packets received, 0.0% packet loss
round-trip min/avg/max/std-dev = 1.393/21.873/30.862/9.835 ms
Post by Mike Belopuhov
---
sys/net/if.c | 3 ++-
sys/net/if_var.h | 2 +-
sys/net/ifq.c | 26 ++++++++++++++++++++++++++
sys/net/ifq.h | 1 +
4 files changed, 30 insertions(+), 2 deletions(-)
diff --git sys/net/if.c sys/net/if.c
index 34fd1aa84d7..acea3ae53ea 100644
--- sys/net/if.c
+++ sys/net/if.c
@@ -623,12 +623,13 @@ if_attach_ifq(struct ifnet *ifp, const struct ifq_ops *newops, void *args)
void
if_start(struct ifnet *ifp)
{
KASSERT(ifp->if_qstart == if_qstart_compat);
- if_qstart_compat(&ifp->if_snd);
+ ifq_start(&ifp->if_snd);
}
+
void
if_qstart_compat(struct ifqueue *ifq)
{
struct ifnet *ifp = ifq->ifq_if;
int s;
diff --git sys/net/if_var.h sys/net/if_var.h
index 14e118bcbef..9df7c8f4d80 100644
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -255,11 +255,11 @@ do { \
(err) = ifq_enqueue((ifq), (m)); \
} while (/* CONSTCOND */0)
#define IFQ_DEQUEUE(ifq, m) \
do { \
- (m) = ifq_dequeue(ifq); \
+ (m) = ifq_dequeue_compat(ifq); \
} while (/* CONSTCOND */0)
#define IFQ_PURGE(ifq) \
do { \
(void)ifq_purge(ifq); \
diff --git sys/net/ifq.c sys/net/ifq.c
index d37be87f444..0a49567f81d 100644
--- sys/net/ifq.c
+++ sys/net/ifq.c
@@ -337,10 +337,36 @@ ifq_dequeue(struct ifqueue *ifq)
ifq_deq_commit(ifq, m);
return (m);
}
+struct mbuf *
+ifq_dequeue_compat(struct ifqueue *ifq)
+{
+ struct mbuf *m;
+ void *serializer;
+
+ KERNEL_ASSERT_LOCKED();
+ KASSERT(ifq->ifq_serializer == NULL ||
+ ifq->ifq_serializer == curcpu());
+
+ serializer = ifq->ifq_serializer;
+ ifq->ifq_serializer = curcpu();
+
+ m = ifq_deq_begin(ifq);
+ if (m == NULL)
+ return (NULL);
+
+ ifq_deq_commit(ifq, m);
+
+ ml_purge(&ifq->ifq_free);
+
+ ifq->ifq_serializer = serializer;
+
+ return (m);
+}
+
unsigned int
ifq_purge(struct ifqueue *ifq)
{
struct mbuf_list ml = MBUF_LIST_INITIALIZER();
unsigned int rv;
diff --git sys/net/ifq.h sys/net/ifq.h
index 29b89c38b23..3a7fde55ca2 100644
--- sys/net/ifq.h
+++ sys/net/ifq.h
@@ -380,10 +380,11 @@ void ifq_destroy(struct ifqueue *);
int ifq_enqueue(struct ifqueue *, struct mbuf *);
struct mbuf *ifq_deq_begin(struct ifqueue *);
void ifq_deq_commit(struct ifqueue *, struct mbuf *);
void ifq_deq_rollback(struct ifqueue *, struct mbuf *);
struct mbuf *ifq_dequeue(struct ifqueue *);
+struct mbuf *ifq_dequeue_compat(struct ifqueue *);
void ifq_mfreem(struct ifqueue *, struct mbuf *);
void ifq_mfreeml(struct ifqueue *, struct mbuf_list *);
unsigned int ifq_purge(struct ifqueue *);
void *ifq_q_enter(struct ifqueue *, const struct ifq_ops *);
void ifq_q_leave(struct ifqueue *, void *);
--
2.13.0
--
Sebastien Marie
Mike Belopuhov
2017-05-29 12:16:40 UTC
Permalink
Post by Sebastien Marie
Post by Mike Belopuhov
Post by Sebastien Marie
it seems bce(4) doesn't like it.
Indeed, a lot of things won't like this since xxx_start may be called
directly by the driver. To handle this situation we need to make sure
that IFQ_DEQUEUE that is called by these xxx_start routines provides a
compatible interface. Please try the diff below. If this solves your
issue, I'd like to get it in w/o assertions unless there are objections.
with or without codel queue: the remote connection seems really *slow*.
This is an interesting artifact. Thanks for noticing, I'm curious what's
causing it.
Post by Sebastien Marie
bert (patched) and clyde (unpatched) hsots are connected on same LAN
segment.
clyde:~$ ping6 bert
PING bert (2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a): 56 data bytes
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=0 hlim=64 time=1002.655 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=1 hlim=64 time=0.763 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=2 hlim=64 time=1000.330 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=3 hlim=64 time=1000.388 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=4 hlim=64 time=1000.396 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=5 hlim=64 time=1000.373 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=6 hlim=64 time=0.432 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=7 hlim=64 time=1000.349 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=8 hlim=64 time=0.480 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=9 hlim=64 time=228.896 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=10 hlim=64 time=20.386 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=11 hlim=64 time=0.504 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=12 hlim=64 time=0.582 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=13 hlim=64 time=864.730 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=14 hlim=64 time=1000.343 ms
^C
--- bert ping statistics ---
16 packets transmitted, 15 packets received, 6.2% packet loss
round-trip min/avg/max/std-dev = 0.432/541.440/1002.655/476.996 ms
Surprising thing (for me): having tcpbench running in background between
the two hosts reduces the lag (but still high on a LAN).
clyde:~$ ping6 bert
PING bert (2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a): 56 data bytes
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=0 hlim=64 time=1.393 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=1 hlim=64 time=19.961 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=2 hlim=64 time=23.404 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=3 hlim=64 time=26.457 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=4 hlim=64 time=29.159 ms
64 bytes from 2001:41d0:fe39:c05c:215:c5ff:fe0b:8b7a: icmp_seq=5 hlim=64 time=30.862 ms
^C
--- bert ping statistics ---
6 packets transmitted, 6 packets received, 0.0% packet loss
round-trip min/avg/max/std-dev = 1.393/21.873/30.862/9.835 ms
Thanks for taking your time to test, but unfortunately I have to
withdraw both diffs. I've realised that there's a ton of drivers
calling ifq_deq_begin manually from their start routines and
there's no simple solution for that that I can find right now.

I'll try to come up with something that works for all cases ASAP.
Mike Belopuhov
2017-06-01 08:30:04 UTC
Permalink
Post by Mike Belopuhov
Thanks for taking your time to test, but unfortunately I have to
withdraw both diffs. I've realised that there's a ton of drivers
calling ifq_deq_begin manually from their start routines and
there's no simple solution for that that I can find right now.
I'll try to come up with something that works for all cases ASAP.
Here you go. KERNEL_ASSERT_LOCKED might be better written as
NET_ASSERT_LOCKED for the future. I'm not sure I want to keep
it there though as it's additional code in a hot path.

diff --git sys/net/ifq.c sys/net/ifq.c
index d37be87f444..bad95c840ff 100644
--- sys/net/ifq.c
+++ sys/net/ifq.c
@@ -313,18 +313,28 @@ ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
cookie = m->m_pkthdr.ph_cookie;

ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
ifq->ifq_len--;
mtx_leave(&ifq->ifq_mtx);
+
+ if (ifq->ifq_serializer == NULL) {
+ KERNEL_ASSERT_LOCKED();
+ ml_purge(&ifq->ifq_free);
+ }
}

void
ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
{
KASSERT(m != NULL);

mtx_leave(&ifq->ifq_mtx);
+
+ if (ifq->ifq_serializer == NULL) {
+ KERNEL_ASSERT_LOCKED();
+ ml_purge(&ifq->ifq_free);
+ }
}

struct mbuf *
ifq_dequeue(struct ifqueue *ifq)
{
@@ -379,21 +389,21 @@ ifq_q_leave(struct ifqueue *ifq, void *q)
}

void
ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
{
- IFQ_ASSERT_SERIALIZED(ifq);
+ /* IFQ_ASSERT_SERIALIZED(ifq); */

ifq->ifq_len--;
ifq->ifq_qdrops++;
ml_enqueue(&ifq->ifq_free, m);
}

void
ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
{
- IFQ_ASSERT_SERIALIZED(ifq);
+ /* IFQ_ASSERT_SERIALIZED(ifq); */

ifq->ifq_len -= ml_len(ml);
ifq->ifq_qdrops += ml_len(ml);
ml_enlist(&ifq->ifq_free, ml);
}
Mike Belopuhov
2017-06-01 12:16:02 UTC
Permalink
Post by Mike Belopuhov
Post by Mike Belopuhov
Thanks for taking your time to test, but unfortunately I have to
withdraw both diffs. I've realised that there's a ton of drivers
calling ifq_deq_begin manually from their start routines and
there's no simple solution for that that I can find right now.
I'll try to come up with something that works for all cases ASAP.
Here you go. KERNEL_ASSERT_LOCKED might be better written as
NET_ASSERT_LOCKED for the future. I'm not sure I want to keep
it there though as it's additional code in a hot path.
dlg@ has sent me his version which is similar in effect.
The big difference is that he completely removes the ifq_free
handling from ifq_serialize. I've OK'ed his diff so you might
want to apply it instead of mine.

Index: ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
retrieving revision 1.11
diff -u -p -r1.11 ifq.c
--- ifq.c 3 May 2017 20:55:29 -0000 1.11
+++ ifq.c 1 Jun 2017 09:57:41 -0000
@@ -70,7 +70,6 @@ void ifq_barrier_task(void *);
void
ifq_serialize(struct ifqueue *ifq, struct task *t)
{
- struct mbuf_list ml = MBUF_LIST_INITIALIZER();
struct task work;

if (ISSET(t->t_flags, TASK_ONQUEUE))
@@ -97,20 +96,9 @@ ifq_serialize(struct ifqueue *ifq, struc
mtx_enter(&ifq->ifq_task_mtx);
}

- /*
- * ifq->ifq_free is only modified by dequeue, which
- * is only called from within this serialization
- * context. it is therefore safe to access and modify
- * here without taking ifq->ifq_mtx.
- */
- ml = ifq->ifq_free;
- ml_init(&ifq->ifq_free);
-
ifq->ifq_serializer = NULL;
}
mtx_leave(&ifq->ifq_task_mtx);
-
- ml_purge(&ml);
}

int
@@ -286,16 +274,36 @@ ifq_enqueue(struct ifqueue *ifq, struct
return (dm == m ? ENOBUFS : 0);
}

+static inline void
+ifq_deq_enter(struct ifqueue *ifq)
+{
+ mtx_enter(&ifq->ifq_mtx);
+}
+
+static inline void
+ifq_deq_leave(struct ifqueue *ifq)
+{
+ struct mbuf_list ml;
+
+ ml = ifq->ifq_free;
+ ml_init(&ifq->ifq_free);
+
+ mtx_leave(&ifq->ifq_mtx);
+
+ if (!ml_empty(&ml))
+ ml_purge(&ml);
+}
+
struct mbuf *
ifq_deq_begin(struct ifqueue *ifq)
{
struct mbuf *m = NULL;
void *cookie;

- mtx_enter(&ifq->ifq_mtx);
+ ifq_deq_enter(ifq);
if (ifq->ifq_len == 0 ||
(m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
- mtx_leave(&ifq->ifq_mtx);
+ ifq_deq_leave(ifq);
return (NULL);
}

@@ -314,7 +322,7 @@ ifq_deq_commit(struct ifqueue *ifq, stru

ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
ifq->ifq_len--;
- mtx_leave(&ifq->ifq_mtx);
+ ifq_deq_leave(ifq);
}

void
@@ -322,7 +330,7 @@ ifq_deq_rollback(struct ifqueue *ifq, st
{
KASSERT(m != NULL);

- mtx_leave(&ifq->ifq_mtx);
+ ifq_deq_leave(ifq);
}

struct mbuf *
@@ -381,7 +389,7 @@ ifq_q_leave(struct ifqueue *ifq, void *q
void
ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
{
- IFQ_ASSERT_SERIALIZED(ifq);
+ MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);

ifq->ifq_len--;
ifq->ifq_qdrops++;
@@ -391,7 +399,7 @@ ifq_mfreem(struct ifqueue *ifq, struct m
void
ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
{
- IFQ_ASSERT_SERIALIZED(ifq);
+ MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);

ifq->ifq_len -= ml_len(ml);
ifq->ifq_qdrops += ml_len(ml);

Loading...