diff --git a/distrib/sets/lists/comp/mi b/distrib/sets/lists/comp/mi index aa4ba23..472b470 100644 --- a/distrib/sets/lists/comp/mi +++ b/distrib/sets/lists/comp/mi @@ -17129,6 +17129,7 @@ ./usr/share/man/html9/in_getifa.html comp-sys-htmlman html ./usr/share/man/html9/incore.html comp-sys-htmlman html ./usr/share/man/html9/inittodr.html comp-sys-htmlman html +./usr/share/man/html9/intr_distribute.html comp-sys-htmlman html ./usr/share/man/html9/intro.html comp-sys-htmlman html ./usr/share/man/html9/ioasic.html comp-sys-htmlman html ./usr/share/man/html9/ioasic_attach_devs.html comp-sys-htmlman html @@ -17364,10 +17365,27 @@ ./usr/share/man/html9/pci_intr_evcnt.html comp-sys-htmlman html ./usr/share/man/html9/pci_intr_map.html comp-sys-htmlman html ./usr/share/man/html9/pci_intr_string.html comp-sys-htmlman html +./usr/share/man/html9/pci_intx_alloc.html comp-sys-htmlman html +./usr/share/man/html9/pci_intx_release.html comp-sys-htmlman html ./usr/share/man/html9/pci_make_tag.html comp-sys-htmlman html ./usr/share/man/html9/pci_mapreg_info.html comp-sys-htmlman html ./usr/share/man/html9/pci_mapreg_map.html comp-sys-htmlman html ./usr/share/man/html9/pci_mapreg_type.html comp-sys-htmlman html +./usr/share/man/html9/pci_msi.html comp-sys-htmlman html +./usr/share/man/html9/pci_msi_alloc.html comp-sys-htmlman html +./usr/share/man/html9/pci_msi_alloc_exact.html comp-sys-htmlman html +./usr/share/man/html9/pci_msi_count.html comp-sys-htmlman html +./usr/share/man/html9/pci_msi_disestablish.html comp-sys-htmlman html +./usr/share/man/html9/pci_msi_establish.html comp-sys-htmlman html +./usr/share/man/html9/pci_msi_release.html comp-sys-htmlman html +./usr/share/man/html9/pci_msix.html comp-sys-htmlman html +./usr/share/man/html9/pci_msix_alloc.html comp-sys-htmlman html +./usr/share/man/html9/pci_msix_alloc_exact.html comp-sys-htmlman html +./usr/share/man/html9/pci_msix_alloc_map.html comp-sys-htmlman html +./usr/share/man/html9/pci_msix_count.html comp-sys-htmlman html +./usr/share/man/html9/pci_msix_disestablish.html comp-sys-htmlman html +./usr/share/man/html9/pci_msix_establish.html comp-sys-htmlman html +./usr/share/man/html9/pci_msix_release.html comp-sys-htmlman html ./usr/share/man/html9/pci_set_powerstate.html comp-sys-htmlman html ./usr/share/man/html9/pci_vpd_read.html comp-sys-htmlman html ./usr/share/man/html9/pci_vpd_write.html comp-sys-htmlman html @@ -24061,6 +24079,7 @@ ./usr/share/man/man9/in_getifa.9 comp-sys-man .man ./usr/share/man/man9/incore.9 comp-sys-man .man ./usr/share/man/man9/inittodr.9 comp-sys-man .man +./usr/share/man/man9/intr_distribute.9 comp-sys-man .man ./usr/share/man/man9/intro.9 comp-sys-man .man ./usr/share/man/man9/ioasic.9 comp-sys-man .man ./usr/share/man/man9/ioasic_attach_devs.9 comp-sys-man .man @@ -24302,10 +24321,27 @@ ./usr/share/man/man9/pci_intr_evcnt.9 comp-sys-man .man ./usr/share/man/man9/pci_intr_map.9 comp-sys-man .man ./usr/share/man/man9/pci_intr_string.9 comp-sys-man .man +./usr/share/man/man9/pci_intx_alloc.9 comp-sys-man .man +./usr/share/man/man9/pci_intx_release.9 comp-sys-man .man ./usr/share/man/man9/pci_make_tag.9 comp-sys-man .man ./usr/share/man/man9/pci_mapreg_info.9 comp-sys-man .man ./usr/share/man/man9/pci_mapreg_map.9 comp-sys-man .man ./usr/share/man/man9/pci_mapreg_type.9 comp-sys-man .man +./usr/share/man/man9/pci_msi.9 comp-sys-man .man +./usr/share/man/man9/pci_msi_alloc.9 comp-sys-man .man +./usr/share/man/man9/pci_msi_alloc_exact.9 comp-sys-man .man +./usr/share/man/man9/pci_msi_count.9 comp-sys-man .man +./usr/share/man/man9/pci_msi_disestablish.9 comp-sys-man .man +./usr/share/man/man9/pci_msi_establish.9 comp-sys-man .man +./usr/share/man/man9/pci_msi_release.9 comp-sys-man .man +./usr/share/man/man9/pci_msix.9 comp-sys-man .man +./usr/share/man/man9/pci_msix_alloc.9 comp-sys-man .man +./usr/share/man/man9/pci_msix_alloc_exact.9 comp-sys-man .man +./usr/share/man/man9/pci_msix_alloc_map.9 comp-sys-man .man +./usr/share/man/man9/pci_msix_count.9 comp-sys-man .man +./usr/share/man/man9/pci_msix_disestablish.9 comp-sys-man .man +./usr/share/man/man9/pci_msix_establish.9 comp-sys-man .man +./usr/share/man/man9/pci_msix_release.9 comp-sys-man .man ./usr/share/man/man9/pci_set_powerstate.9 comp-sys-man .man ./usr/share/man/man9/pci_vpd_read.9 comp-sys-man .man ./usr/share/man/man9/pci_vpd_write.9 comp-sys-man .man diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index cdf3154..bb8ce3f 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -27,9 +27,9 @@ MAN= accept_filter.9 accf_data.9 accf_http.9 \ ieee80211_node.9 ieee80211_output.9 ieee80211_proto.9 \ ieee80211_radiotap.9 iic.9 imax.9 \ in_getifa.9 \ - in4_cksum.9 inittodr.9 intro.9 ioasic.9 ioctl.9 ipkdb.9 ipi.9 isa.9 \ - isapnp.9 itimerfix.9 kauth.9 kcopy.9 kcpuset.9 kmem.9 \ - kpause.9 \ + in4_cksum.9 inittodr.9 intr_distribute.9 intro.9 ioasic.9 ioctl.9 \ + ipkdb.9 ipi.9 isa.9 isapnp.9 itimerfix.9 kauth.9 kcopy.9 kcpuset.9 \ + kmem.9 kpause.9 \ kfilter_register.9 knote.9 \ kprintf.9 kthread.9 linedisc.9 lock.9 log.9 ltsleep.9 \ LWP_CACHE_CREDS.9 \ @@ -39,9 +39,9 @@ MAN= accept_filter.9 accf_data.9 accf_http.9 \ microseq.9 microtime.9 microuptime.9 mi_switch.9 module.9 \ mstohz.9 mutex.9 m_tag.9 namecache.9 \ namei.9 nullop.9 opencrypto.9 optstr.9 \ - panic.9 pathbuf.9 pci.9 pci_configure_bus.9 pci_intr.9 pckbport.9 \ - pcmcia.9 pcq.9 pcu.9 percpu.9 pfil.9 physio.9 pmap.9 pmatch.9 \ - pmc.9 pmf.9 pool.9 pool_cache.9 powerhook_establish.9 ppi.9 \ + panic.9 pathbuf.9 pci.9 pci_configure_bus.9 pci_intr.9 pci_msi.9 \ + pckbport.9 pcmcia.9 pcq.9 pcu.9 percpu.9 pfil.9 physio.9 pmap.9 \ + pmatch.9 pmc.9 pmf.9 pool.9 pool_cache.9 powerhook_establish.9 ppi.9 \ ppsratecheck.9 preempt.9 proc_find.9 pserialize.9 putter.9 \ radio.9 ras.9 rasops.9 ratecheck.9 resettodr.9 rnd.9 rndsink.9 \ roundup.9 rssadapt.9 rt_timer.9 rwlock.9 RUN_ONCE.9 STACK.9 \ @@ -572,6 +572,22 @@ MLINKS+=pci.9 pci_conf_read.9 \ pci.9 PCI_VENDOR.9 \ pci.9 PCI_PRODUCT.9 \ pci.9 PCI_REVISION.9 +MLINKS+=pci_msi.9 pci_msix.9 \ + pci_msi.9 pci_intx_alloc.9 \ + pci_msi.9 pci_intx_release.9 \ + pci_msi.9 pci_msi_count.9 \ + pci_msi.9 pci_msi_alloc.9 \ + pci_msi.9 pci_msi_alloc_exact.9 \ + pci_msi.9 pci_msi_release.9 \ + pci_msi.9 pci_msi_establish.9 \ + pci_msi.9 pci_msi_disestablish.9 \ + pci_msi.9 pci_msix_count.9 \ + pci_msi.9 pci_msix_alloc.9 \ + pci_msi.9 pci_msix_alloc_exact.9 \ + pci_msi.9 pci_msix_alloc_map.9 \ + pci_msi.9 pci_msix_release.9 \ + pci_msi.9 pci_msix_establish.9 \ + pci_msi.9 pci_msix_disestablish.9 MLINKS+=pci_configure_bus.9 pci_conf_hook.9 \ pci_configure_bus.9 pci_conf_interrupt.9 MLINKS+=pckbport.9 pckbport_attach.9 \ diff --git a/share/man/man9/intr_distribute.9 b/share/man/man9/intr_distribute.9 new file mode 100644 index 0000000..adad375 --- /dev/null +++ b/share/man/man9/intr_distribute.9 @@ -0,0 +1,56 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2015 Internet Initiative Japan Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Bill Sommerfeld +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 8, 2015 +.Dt INTR_DISTRIBUTE 9 +.Os +.Sh NAME +.Nm intr_distribute +.Sh SYNOPSIS +.In sys/intr.h +.Ft int +.Fn intr_distribute "void *ich" "const kcpuset_t *newset" \ +"kcpuset_t *oldset" +.Sh DESCRIPTION +The +.Nm +functions exist to assing an interrupt to CPU. +.Pp +If a driver (or the other kernel component) wishes to assign an +interrupt to CPU, it should pass the return value of +.Fn pci_intr_establish +to the +.Ft ich . +And it should set kcpuset which want to assign to +.Ft newset . +If it want to get the assignment before changing, it should be +pass non-NULL value to +.Ft oldset . +If not, it should set NULL to +.Ft oldset . diff --git a/share/man/man9/pci_msi.9 b/share/man/man9/pci_msi.9 new file mode 100644 index 0000000..132ab31 --- /dev/null +++ b/share/man/man9/pci_msi.9 @@ -0,0 +1,213 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2015 Internet Initiative Japan Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Bill Sommerfeld +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd April 8, 2015 +.Dt PCI_MSI 9 (DRAFT) +.Os +.Sh NAME (DRAFT) +.Nm pci_msi , +.Nm pci_msix, +.Nm pci_msi_count , +.Nm pci_msi_alloc , +.Nm pci_msi_alloc_exact , +.Nm pci_msi_release , +.Nm pci_msi_establish , +.Nm pci_msi_disestablish , +.Nm pci_msi_string +.Nm pci_msix_count , +.Nm pci_msix_alloc , +.Nm pci_msix_alloc_exact , +.Nm pci_msix_alloc_map , +.Nm pci_msix_release , +.Nm pci_msix_establish , +.Nm pci_msix_disestablish , +.Nm pci_intx_alloc , +.Nm pci_intx_release +.Nd PCI MSI{,-X} manipulation functions +.Sh SYNOPSIS +.In NOTYET +.Ft int +.Fn pci_msi_count "struct pci_attach_args *pa" +.Ft int +.Fn pci_msi_alloc "struct pci_attach_args *pa" \ +"pci_intr_handle_t **ihps" "int *count" +.Ft int +.Fn pci_msi_alloc_exect "struct pci_attach_args *pa" \ +"pci_intr_handle_t **ihps" "int count" +.Ft void +.Fn pci_msi_release "pci_intr_handle_t **pihs" "int count" +.Ft void * +.Fn pci_msi_establish "pci_chipset_tag_t pc" "pci_intr_handle_t ih" \ +"int level" "int (*func)(void *)" "void *arg" +.Ft void +.Fn pci_msi_disestablish "pci_chipset_tag_t pc" "void *cookie" +.Ft const char * +.Ft pci_msi_string "pci_chipset_tag_t pc" \ +"pci_intr_handle_t, char *buf" "size_t len" +.Ft int +.Fn pci_msix_count "struct pci_attach_args *pa" +.Ft int +.Fn pci_msix_alloc "struct pci_attach_args *pa" \ +"pci_intr_handle_t **ihps" "int *count" +.Ft int +.Fn pci_msix_alloc_exect "struct pci_attach_args *pa" \ +"pci_intr_handle_t **ihps" "int count" +.Ft int +.Fn pci_msix_alloc_map "struct pci_attach_args *pa" \ +"pci_intr_handle_t **ihps" "u_int *table_indexes" "int count" +.Ft void +.Fn pci_msix_release "pci_intr_handle_t **pihs" "int count" +.Ft void * +.Fn pci_msix_establish "pci_chipset_tag_t pc" "pci_intr_handle_t ih" \ +"int level" "int (*func)(void *)" "void *arg" +.Fn pci_msix_disestablish "pci_chipset_tag_t pc" "void *cookie" +.Ft int +.Fn pci_intx_alloc "struct pci_attach_args *pa" \ +"pci_intr_handle_t **ihp" +.Ft void +.Fn pci_intx_release "pci_intr_handle_t *pih" +.Sh DESCRIPTION +XXX This decument describes draft APIs. These APIs may change later. +.Pp +The +.Nm +functions exist to allow device drivers to use MSI/MSI-X. +When the system use MSI/MSI-X, it must enable a PCI_MSI_MSIX kernel +option. +.Pp +Each driver has an +.Fn attach +function which has a bus-specific +.Ft attach_args +structure. +Each driver for a PCI device is passed a pointer to an object of type +.Ft struct pci_attach_args +which contains, among other things, information about the location +of the device in the PCI bus topology sufficient to allow interrupts +from the device to be handled. +.Pp +If a driver wishes to establish an MSI handler for the device, +it should pass the +.Ft struct pci_attach_args * +and +.Ft count +.Fn pci_msi_alloc +or +.Fn pci_msi_alloc_exact +functions, which returns zero on success, and nonzero on failure. +When the functions successed, set the pointer to allocated handle +array to +.Ft pihs +whose size is +.Ft count +or less. The difference between +.Fn pci_msi_alloc +and +.Fn pci_msi_alloc_exact +is +.Ft count +can be decremented or not. +.Fn pci_msi_alloc +can decrement +.Ft count , +and +which is similar to FreeBSD's +.Fn pci_alloc_msi . +In contrast, +.Fn pci_msi_alloc_exact +can not decrement +.Ft count . +.Pp +If the driver wishes to refer to the MSI source in an attach or +error message, it should use the value returned by +.Fn pci_msi_string . +The buffer passed to +.Fn pci_msi_string +should be at least +.Dv PCI_INTRSTR_LEN +bytes. +.Pp +Subsequently, when the driver is prepared to receive MSIs, it +should call +.Fn pci_msi_establish +to actually establish the handler; when the device interrupts, +.Fa intrhand +will be called with a single argument +.Fa intrarg , +and will run at the interrupt priority level +.Fa ipl . +This is the same as +.Fn pci_intr_establish . +.Pp +The return value of +.Fn pci_msi_establish +may be saved and passed to +.Fn pci_msi_disestablish +to disable the interrupt handler +when the driver is no longer interested in MSIs from the device. +After that, the driver should also +.Fn pci_msi_release +to free resources about MSI. +.Pp +If a driver wishes to establish an MSI-X handler for the device, +it is alomost the same as MSI. +The only differences is +.Fn pci_msix_alloc_map . +This function can assign each handles to MSI-X table entries. +e.g. If the driver want assign each handler to +.Bd -literal + msix_handler0 => MSI-X table index: 4 + msix_handler1 => MSI-X table index: 5 + msix_handler2 => MSI-X table index: 0 +.Ed +, the driver should set +.Bd -literal + table_indexes[0] = 4; + table_indexes[1] = 5; + table_indexes[2] = 0; +.Ed +to +.Ft table_indexes . +.Pp +If the driver want to fallback to INTx, the driver should use +.Fn pci_intx_alloc +and +.Fn pci_intx_release +instead of +.Fn pci_intr_map +to resolve contradiction of the interrupt handler ownership. +i.e. +.Fn pci_intr_map +does not have the ownership (the function just calcurates value), +in contrast, +.Fn pci_msi_alloc +and +.Fn pci_msix_alloc +has the owneship (the functions allocate memory for interrupt +handlers). diff --git a/sys/arch/amd64/amd64/mainbus.c b/sys/arch/amd64/amd64/mainbus.c index d94aada..a902246 100644 --- a/sys/arch/amd64/amd64/mainbus.c +++ b/sys/arch/amd64/amd64/mainbus.c @@ -53,6 +53,7 @@ __KERNEL_RCSID(0, "$NetBSD: mainbus.c,v 1.34 2013/07/31 14:05:33 soren Exp $"); #include "opt_acpi.h" #include "opt_mpbios.h" #include "opt_pcifixup.h" +#include "opt_pci_msi_msix.h" #include #include @@ -74,6 +75,9 @@ __KERNEL_RCSID(0, "$NetBSD: mainbus.c,v 1.34 2013/07/31 14:05:33 soren Exp $"); #include #endif #endif +#ifdef PCI_MSI_MSIX +#include +#endif #endif /* @@ -175,6 +179,10 @@ mainbus_attach(device_t parent, device_t self, void *aux) #endif #if NPCI > 0 +#ifdef PCI_MSI_MSIX + msipic_init(); +#endif + /* * ACPI needs to be able to access PCI configuration space. */ diff --git a/sys/arch/amd64/conf/ALL b/sys/arch/amd64/conf/ALL index 57c211b..9a4e6d0 100644 --- a/sys/arch/amd64/conf/ALL +++ b/sys/arch/amd64/conf/ALL @@ -421,6 +421,7 @@ applesmctemp* at applesmcbus? pci* at mainbus? bus ? pci* at pchb? bus ? pci* at ppb? bus ? +options PCI_MSI_MSIX # Configure PCI using BIOS information options PCIBIOS # PCI BIOS support diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC index 38e3028..3c0d04f 100644 --- a/sys/arch/amd64/conf/GENERIC +++ b/sys/arch/amd64/conf/GENERIC @@ -339,6 +339,7 @@ wmimsi* at acpiwmibus? # MSI WMI mappings pci* at mainbus? bus ? pci* at pchb? bus ? pci* at ppb? bus ? +#options PCI_MSI_MSIX # PCI bridges pchb* at pci? dev ? function ? # PCI-Host bridges diff --git a/sys/arch/i386/conf/ALL b/sys/arch/i386/conf/ALL index 9631975..02b8e9f 100644 --- a/sys/arch/i386/conf/ALL +++ b/sys/arch/i386/conf/ALL @@ -452,6 +452,7 @@ pci* at mainbus? bus ? pci* at pchb? bus ? pci* at ppb? bus ? pci* at elansc? bus ? +options PCI_MSI_MSIX # Configure PCI using BIOS information options PCIBIOS # PCI BIOS support diff --git a/sys/arch/i386/conf/GENERIC b/sys/arch/i386/conf/GENERIC index b87de12..e6d5c0d 100644 --- a/sys/arch/i386/conf/GENERIC +++ b/sys/arch/i386/conf/GENERIC @@ -421,6 +421,7 @@ pci* at mainbus? bus ? pci* at pchb? bus ? pci* at ppb? bus ? pci* at elansc? bus ? +#options PCI_MSI_MSIX # Configure PCI using BIOS information #options PCIBIOS # PCI BIOS support diff --git a/sys/arch/i386/i386/mainbus.c b/sys/arch/i386/i386/mainbus.c index 4c19879..391480d 100644 --- a/sys/arch/i386/i386/mainbus.c +++ b/sys/arch/i386/i386/mainbus.c @@ -57,6 +57,7 @@ __KERNEL_RCSID(0, "$NetBSD: mainbus.c,v 1.98 2013/11/08 03:12:48 christos Exp $" #include "opt_acpi.h" #include "opt_mpbios.h" #include "opt_pcifixup.h" +#include "opt_pci_msi_msix.h" #include #include @@ -86,6 +87,9 @@ __KERNEL_RCSID(0, "$NetBSD: mainbus.c,v 1.98 2013/11/08 03:12:48 christos Exp $" #include #endif #endif +#ifdef PCI_MSI_MSIX +#include +#endif #endif void mainbus_childdetached(device_t, device_t); @@ -227,6 +231,10 @@ mainbus_attach(device_t parent, device_t self, void *aux) #endif #if NPCI > 0 +#ifdef PCI_MSI_MSIX + msipic_init(); +#endif + /* * ACPI needs to be able to access PCI configuration space. */ diff --git a/sys/arch/x86/conf/files.x86 b/sys/arch/x86/conf/files.x86 index 2a4f0a8..399fa5c 100644 --- a/sys/arch/x86/conf/files.x86 +++ b/sys/arch/x86/conf/files.x86 @@ -142,6 +142,9 @@ file arch/x86/pci/pci_machdep.c pci #file arch/x86/pci/pci_ranges.c pci file arch/x86/pci/pci_intr_machdep.c pci +file arch/x86/pci/pci_msi_machdep.c pci_msi_msix +file arch/x86/pci/msipic.c pci_msi_msix + file arch/x86/pci/pciide_machdep.c pciide_common file arch/x86/pci/pci_bus_fixup.c pci_bus_fixup diff --git a/sys/arch/x86/include/i82093var.h b/sys/arch/x86/include/i82093var.h index a3ae23b..aa8c9a4 100644 --- a/sys/arch/x86/include/i82093var.h +++ b/sys/arch/x86/include/i82093var.h @@ -68,19 +68,34 @@ struct ioapic_softc { * (ih&0xff0000)>>16 -> ioapic id. * (ih&0x00ff00)>>8 -> ioapic pin. * - * 0x80000000 is used by pci_intr_machdep.c for MPSAFE_MASK + * MSI/MSI-X: + * (ih&0x000ff80000000000)>>43 -> MSI/MSI-X device id. + * (ih&0x000007ff00000000)>>32 -> MSI/MSI-X vector id in a device. */ - -#define APIC_INT_VIA_APIC 0x10000000 -#define APIC_INT_APIC_MASK 0x00ff0000 +#define MPSAFE_MASK 0x80000000ULL +#define APIC_INT_VIA_APIC 0x10000000ULL +#define APIC_INT_VIA_MSI 0x20000000ULL +#define APIC_INT_APIC_MASK 0x00ff0000ULL #define APIC_INT_APIC_SHIFT 16 -#define APIC_INT_PIN_MASK 0x0000ff00 +#define APIC_INT_PIN_MASK 0x0000ff00ULL #define APIC_INT_PIN_SHIFT 8 -#define APIC_IRQ_APIC(x) ((x & APIC_INT_APIC_MASK) >> APIC_INT_APIC_SHIFT) -#define APIC_IRQ_PIN(x) ((x & APIC_INT_PIN_MASK) >> APIC_INT_PIN_SHIFT) -#define APIC_IRQ_ISLEGACY(x) (!((x) & APIC_INT_VIA_APIC)) -#define APIC_IRQ_LEGACY_IRQ(x) ((x) & 0xff) +#define APIC_IRQ_APIC(x) (int)(((x) & APIC_INT_APIC_MASK) >> APIC_INT_APIC_SHIFT) +#define APIC_IRQ_PIN(x) (int)(((x) & APIC_INT_PIN_MASK) >> APIC_INT_PIN_SHIFT) +#define APIC_IRQ_ISLEGACY(x) (bool)(!((x) & APIC_INT_VIA_APIC)) +#define APIC_IRQ_LEGACY_IRQ(x) (int)((x) & 0xff) + +#define INT_VIA_MSI(x) (bool)(((x) & APIC_INT_VIA_MSI) != 0) + +#define MSI_INT_MSIX 0x1000000000000000ULL +#define MSI_INT_DEV_MASK 0x000ff80000000000ULL +#define MSI_INT_VEC_MASK 0x000007ff00000000ULL + +#define MSI_INT_IS_MSIX(x) ((bool)(((x) & MSI_INT_MSIX) != 0)) +#define MSI_INT_MAKE_MSI(x) ((x) &= ~MSI_INT_MSIX) +#define MSI_INT_MAKE_MSIX(x) ((x) |= MSI_INT_MSIX) +#define MSI_INT_DEV(x) __SHIFTOUT((x), MSI_INT_DEV_MASK) +#define MSI_INT_VEC(x) __SHIFTOUT((x), MSI_INT_VEC_MASK) void ioapic_print_redir(struct ioapic_softc *, const char *, int); void ioapic_format_redir(char *, const char *, int, uint32_t, uint32_t); diff --git a/sys/arch/x86/include/intr.h b/sys/arch/x86/include/intr.h index 8484177..fc32ad8 100644 --- a/sys/arch/x86/include/intr.h +++ b/sys/arch/x86/include/intr.h @@ -42,6 +42,7 @@ #endif #include +#include #include #ifndef _LOCORE @@ -71,6 +72,11 @@ struct intrstub { void *ist_resume; }; +struct percpu_evcnt { + cpuid_t cpuid; + uint64_t count; +}; + struct intrsource { int is_maxlevel; /* max. IPL for this source */ int is_pin; /* IRQ for legacy; pin for IO APIC, @@ -86,6 +92,10 @@ struct intrsource { int is_idtvec; int is_minlevel; char is_evname[32]; /* event counter name */ + char is_intrid[INTRIDBUF]; /* intrid created by create_intrid() */ + cpuid_t is_active_cpu; /* active cpuid */ + struct percpu_evcnt *is_saved_evcnt; /* interrupt count of deactivated cpus */ + SIMPLEQ_ENTRY(intrsource) is_list; /* link of intrsources */ }; #define IS_LEGACY 0x0001 /* legacy ISA irq source */ @@ -171,17 +181,22 @@ struct cpu_info; struct pcibus_attach_args; +typedef uint64_t intr_handle_t; + void intr_default_setup(void); void x86_nmi(void); void *intr_establish(int, struct pic *, int, int, int, int (*)(void *), void *, bool); void intr_disestablish(struct intrhand *); void intr_add_pcibus(struct pcibus_attach_args *); -const char *intr_string(int, char *, size_t); +const char *intr_string(intr_handle_t, char *, size_t); void cpu_intr_init(struct cpu_info *); -int intr_find_mpmapping(int, int, int *); +int intr_find_mpmapping(int, int, intr_handle_t *); struct pic *intr_findpic(int); void intr_printconfig(void); +struct intrsource *intr_allocate_io_intrsource(const char *); +void intr_free_io_intrsource(const char *); + int x86_send_ipi(struct cpu_info *, int); void x86_broadcast_ipi(int); void x86_ipi_handler(void); diff --git a/sys/arch/x86/include/mpconfig.h b/sys/arch/x86/include/mpconfig.h index 733ba64..1726aef 100644 --- a/sys/arch/x86/include/mpconfig.h +++ b/sys/arch/x86/include/mpconfig.h @@ -60,7 +60,7 @@ struct mp_intr_map int bus_pin; struct pic *ioapic; /* NULL for local apic */ int ioapic_pin; - int ioapic_ih; /* int handle, see i82093var.h for encoding */ + intr_handle_t ioapic_ih; /* int handle, see i82093var.h for encoding */ int type; /* from mp spec intr record */ int flags; /* from mp spec intr record */ uint32_t redir; diff --git a/sys/arch/x86/include/pci_machdep.h b/sys/arch/x86/include/pci_machdep.h index 1d017f2..40c9fdd 100644 --- a/sys/arch/x86/include/pci_machdep.h +++ b/sys/arch/x86/include/pci_machdep.h @@ -33,11 +33,12 @@ #ifndef _X86_PCI_MACHDEP_H_ #define _X86_PCI_MACHDEP_H_ +#include /* * Types provided to machine-independent PCI code * See also i82093var.h to find out pci_intr_handle_t's bitfield. */ -typedef int pci_intr_handle_t; +typedef intr_handle_t pci_intr_handle_t; #include diff --git a/sys/arch/x86/include/pci_machdep_common.h b/sys/arch/x86/include/pci_machdep_common.h index 0c111d6..17834b3 100644 --- a/sys/arch/x86/include/pci_machdep_common.h +++ b/sys/arch/x86/include/pci_machdep_common.h @@ -117,9 +117,34 @@ void *pci_intr_establish(pci_chipset_tag_t, pci_intr_handle_t, int, int (*)(void *), void *); void pci_intr_disestablish(pci_chipset_tag_t, void *); +/* + * If device drivers use MSI/MSI-X, they should use these API for INTx + * instead of pci_intr_map(), because of conforming the pci_intr_handle + * ownership to MSI/MSI-X. + */ +int pci_intx_alloc(const struct pci_attach_args *, + pci_intr_handle_t **); +void pci_intx_release(pci_chipset_tag_t, pci_intr_handle_t *); + /* experimental MSI support */ -void *pci_msi_establish(struct pci_attach_args *, int, int (*)(void *), void *); -void pci_msi_disestablish(void *); +const char *pci_msi_string(pci_chipset_tag_t, pci_intr_handle_t, char *, size_t); +int pci_msi_count(struct pci_attach_args *); +int pci_msi_alloc(struct pci_attach_args *, pci_intr_handle_t **, int *); +int pci_msi_alloc_exact(struct pci_attach_args *, pci_intr_handle_t **, int); +void pci_msi_release(pci_chipset_tag_t, pci_intr_handle_t **, int); +void *pci_msi_establish(pci_chipset_tag_t, pci_intr_handle_t, + int, int (*)(void *), void *); +void pci_msi_disestablish(pci_chipset_tag_t, void *); + +/* experimental MSI-X support */ +int pci_msix_count(struct pci_attach_args *); +int pci_msix_alloc(struct pci_attach_args *, pci_intr_handle_t **, int *); +int pci_msix_alloc_exact(struct pci_attach_args *, pci_intr_handle_t **, int); +int pci_msix_alloc_map(struct pci_attach_args *, pci_intr_handle_t **, u_int *, int); +void pci_msix_release(pci_chipset_tag_t, pci_intr_handle_t **, int); +void *pci_msix_establish(pci_chipset_tag_t, pci_intr_handle_t, + int, int (*)(void *), void *); +void pci_msix_disestablish(pci_chipset_tag_t, void *); /* * ALL OF THE FOLLOWING ARE MACHINE-DEPENDENT, AND SHOULD NOT BE USED diff --git a/sys/arch/x86/include/pic.h b/sys/arch/x86/include/pic.h index 3a3f512..a9aba7d 100644 --- a/sys/arch/x86/include/pic.h +++ b/sys/arch/x86/include/pic.h @@ -22,6 +22,7 @@ struct pic { struct intrstub *pic_level_stubs; struct intrstub *pic_edge_stubs; struct ioapic_softc *pic_ioapic; /* if pic_type == PIC_IOAPIC */ + struct msipic *pic_msipic; /* if (pic_type == PIC_MSI) || (pic_type == PIC_MSIX) */ }; /* @@ -30,7 +31,9 @@ struct pic { #define PIC_I8259 0 #define PIC_IOAPIC 1 #define PIC_LAPIC 2 -#define PIC_SOFT 3 +#define PIC_MSI 3 +#define PIC_MSIX 4 +#define PIC_SOFT 5 extern struct pic i8259_pic; extern struct pic local_pic; diff --git a/sys/arch/x86/isa/isa_machdep.c b/sys/arch/x86/isa/isa_machdep.c index 750ecbc..bc4135a 100644 --- a/sys/arch/x86/isa/isa_machdep.c +++ b/sys/arch/x86/isa/isa_machdep.c @@ -203,7 +203,7 @@ isa_intr_establish(isa_chipset_tag_t ic, int irq, int type, int level, struct pic *pic; int pin; #if NIOAPIC > 0 - int mpih; + intr_handle_t mpih; struct ioapic_softc *ioapic; #endif diff --git a/sys/arch/x86/pci/msipic.c b/sys/arch/x86/pci/msipic.c new file mode 100644 index 0000000..0b36c28 --- /dev/null +++ b/sys/arch/x86/pci/msipic.c @@ -0,0 +1,737 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 2014 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + +#ifdef INTRDEBUG +#define MSIPICDEBUG +#endif + +#ifdef MSIPICDEBUG +#define DPRINTF(msg) printf msg +#else +#define DPRINTF(msg) +#endif + +#define BUS_SPACE_WRITE_FLUSH(pc, tag) (void)bus_space_read_4(pc, tag, 0) + +#define MSIPICNAMEBUF 16 + +/* + * A Pseudo pic for single MSI/MSI-X device. + * The pic and MSI/MSI-X device are distinbuished by "devid". The "devid" + * is managed by below "dev_seqs". + */ +struct msipic { + int mp_bus; + int mp_dev; + int mp_fun; + + int mp_devid; /* The device id for the MSI/MSI-X device. */ + int mp_veccnt; /* The number of MSI/MSI-X vectors. */ + + char mp_pic_name[MSIPICNAMEBUF]; /* The MSI/MSI-X device's name. */ + + struct pci_attach_args mp_pa; + bus_space_tag_t mp_bstag; + bus_space_handle_t mp_bshandle; + bus_size_t mp_bssize; + struct pic *mp_pic; + + LIST_ENTRY(msipic) mp_list; +}; + +static kmutex_t msipic_list_lock; + +static LIST_HEAD(, msipic) msipic_list = + LIST_HEAD_INITIALIZER(msipic_list); + +/* + * This struct managements "devid" to use the same "devid" for the device + * re-attached. If the device's bus number and device numer and function + * number are equal, it is assumed re-attached. + */ +struct dev_last_used_seq { + bool ds_using; + int ds_bus; + int ds_dev; + int ds_fun; +}; +/* The number of MSI/MSI-X devices supported by system. */ +#define NUM_MSI_DEVS 256 +/* Record devids to use the same devid when the device is re-attached. */ +static struct dev_last_used_seq dev_seqs[NUM_MSI_DEVS]; + +static int msipic_allocate_common_msi_devid(struct pci_attach_args *); +static void msipic_release_common_msi_devid(int); + +static struct pic *msipic_find_msi_pic_locked(int); +static struct pic *msipic_construct_common_msi_pic(struct pci_attach_args *, + struct pic *); +static void msipic_destruct_common_msi_pic(struct pic *); + +static void msi_set_msictl_enablebit(struct pic *, int, int); +static void msi_hwmask(struct pic *, int); +static void msi_hwunmask(struct pic *, int); +static void msi_addroute(struct pic *, struct cpu_info *, int, int, int); +static void msi_delroute(struct pic *, struct cpu_info *, int, int, int); + +static void msix_set_vecctl_mask(struct pic *, int, int); +static void msix_hwmask(struct pic *, int); +static void msix_hwunmask(struct pic *, int); +static void msix_addroute(struct pic *, struct cpu_info *, int, int, int); +static void msix_delroute(struct pic *, struct cpu_info *, int, int, int); + +/* + * Return new "devid" for the device attached first. + * Return the same "devid" for the device re-attached after dettached once. + * Return -1 if the number of attached MSI/MSI-X devices is over NUM_MSI_DEVS. + */ +static int +msipic_allocate_common_msi_devid(struct pci_attach_args *pa) +{ + pci_chipset_tag_t pc; + pcitag_t tag; + int bus, dev, fun, i; + + KASSERT(mutex_owned(&msipic_list_lock)); + + pc = pa->pa_pc; + tag = pa->pa_tag; + pci_decompose_tag(pc, tag, &bus, &dev, &fun); + + /* if the device was once attached, use same devid */ + for (i = 0; i < NUM_MSI_DEVS; i++) { + /* skip host bridge */ + if (dev_seqs[i].ds_bus == 0 + && dev_seqs[i].ds_dev == 0 + && dev_seqs[i].ds_fun == 0) + break; + + if (dev_seqs[i].ds_bus == bus + && dev_seqs[i].ds_dev == dev + && dev_seqs[i].ds_fun == fun) { + dev_seqs[i].ds_using = true; + return i; + } + } + + for (i = 0; i < NUM_MSI_DEVS; i++) { + if (dev_seqs[i].ds_using == 0) { + dev_seqs[i].ds_using = true; + dev_seqs[i].ds_bus = bus; + dev_seqs[i].ds_dev = dev; + dev_seqs[i].ds_fun = fun; + return i; + } + } + + DPRINTF(("too many MSI devices.\n")); + return -1; +} + +/* + * Set the "devid" unused, but keep reserving the "devid" to reuse when + * the device is re-attached. + */ +static void +msipic_release_common_msi_devid(int devid) +{ + + KASSERT(mutex_owned(&msipic_list_lock)); + + if (devid < 0 || NUM_MSI_DEVS <= devid) { + DPRINTF(("%s: invalid devid.\n", __func__)); + return; + } + + dev_seqs[devid].ds_using = false; + /* Keep ds_* to reuse the same devid for the same device. */ +} + +static struct pic * +msipic_find_msi_pic_locked(int devid) +{ + struct msipic *mpp; + + KASSERT(mutex_owned(&msipic_list_lock)); + + LIST_FOREACH(mpp, &msipic_list, mp_list) { + if(mpp->mp_devid == devid) + return mpp->mp_pic; + } + return NULL; +} + +/* + * Return the msi_pic whose device is already registered. + * If the device is not registered yet, return NULL. + */ +struct pic * +msipic_find_msi_pic(int devid) +{ + struct pic *msipic; + + mutex_enter(&msipic_list_lock); + msipic = msipic_find_msi_pic_locked(devid); + mutex_exit(&msipic_list_lock); + + return msipic; +} + +/* + * A common construct process of MSI and MSI-X. + */ +static struct pic * +msipic_construct_common_msi_pic(struct pci_attach_args *pa, + struct pic *pic_tmpl) +{ + struct pic *pic; + struct msipic *msipic; + int devid; + + pic = kmem_alloc(sizeof(*pic), KM_SLEEP); + if (pic == NULL) + return NULL; + + msipic = kmem_zalloc(sizeof(*msipic), KM_SLEEP); + if (msipic == NULL) { + kmem_free(pic, sizeof(*pic)); + return NULL; + } + + mutex_enter(&msipic_list_lock); + + devid = msipic_allocate_common_msi_devid(pa); + if (devid == -1) { + mutex_exit(&msipic_list_lock); + kmem_free(pic, sizeof(*pic)); + kmem_free(msipic, sizeof(*msipic)); + return NULL; + } + + memcpy(pic, pic_tmpl, sizeof(*pic)); + pic->pic_msipic = msipic; + msipic->mp_pic = pic; + pci_decompose_tag(pa->pa_pc, pa->pa_tag, + &msipic->mp_bus, &msipic->mp_dev, &msipic->mp_fun); + memcpy(&msipic->mp_pa, pa, sizeof(msipic->mp_pa)); + msipic->mp_devid = devid; + /* + * pci_msi{,x}_alloc() must be called only once in the device driver. + */ + KASSERT(msipic_find_msi_pic_locked(msipic->mp_devid) == NULL); + + LIST_INSERT_HEAD(&msipic_list, msipic, mp_list); + + mutex_exit(&msipic_list_lock); + + return pic; +} + +static void +msipic_destruct_common_msi_pic(struct pic *msi_pic) +{ + struct msipic *msipic; + + if (msi_pic == NULL) + return; + + msipic = msi_pic->pic_msipic; + mutex_enter(&msipic_list_lock); + LIST_REMOVE(msipic, mp_list); + msipic_release_common_msi_devid(msipic->mp_devid); + mutex_exit(&msipic_list_lock); + + kmem_free(msipic, sizeof(*msipic)); + kmem_free(msi_pic, sizeof(*msi_pic)); +} + +/* + * The pic is MSI/MSI-X pic or not. + */ +bool +msipic_is_msi_pic(struct pic *pic) +{ + + return (pic->pic_msipic != NULL); +} + +/* + * Return the MSI/MSI-X devid which is unique for each devices. + */ +int +msipic_get_devid(struct pic *pic) +{ + + KASSERT(msipic_is_msi_pic(pic)); + + return pic->pic_msipic->mp_devid; +} + +#define MSI_MSICTL_ENABLE 1 +#define MSI_MSICTL_DISABLE 0 +static void +msi_set_msictl_enablebit(struct pic *pic, int msi_vec, int flag) +{ + pci_chipset_tag_t pc; + struct pci_attach_args *pa; + pcitag_t tag; + pcireg_t ctl; + int off; + + pc = NULL; + pa = &pic->pic_msipic->mp_pa; + tag = pa->pa_tag; + KASSERT(pci_get_capability(pc, tag, PCI_CAP_MSI, &off, NULL) != 0); + + /* + * MSI can establish only one vector at once. + * So, use whole device mask bit instead of a vector mask bit. + */ + ctl = pci_conf_read(pc, tag, off + PCI_MSI_CTL); + if (flag == MSI_MSICTL_ENABLE) + ctl |= PCI_MSI_CTL_MSI_ENABLE; + else + ctl &= ~PCI_MSI_CTL_MSI_ENABLE; + + pci_conf_write(pc, tag, off, ctl); +} + +static void +msi_hwmask(struct pic *pic, int msi_vec) +{ + + msi_set_msictl_enablebit(pic, msi_vec, MSI_MSICTL_DISABLE); +} + +/* + * Do not use pic->hwunmask() immediately after pic->delroute(). + * It is required to use pic->addroute() before pic->hwunmask(). + */ +static void +msi_hwunmask(struct pic *pic, int msi_vec) +{ + + msi_set_msictl_enablebit(pic, msi_vec, MSI_MSICTL_ENABLE); +} + +static void +msi_addroute(struct pic *pic, struct cpu_info *ci, + int unused, int idt_vec, int type) +{ + pci_chipset_tag_t pc; + struct pci_attach_args *pa; + pcitag_t tag; + pcireg_t addr, data, ctl; + int off; + + pc = NULL; + pa = &pic->pic_msipic->mp_pa; + tag = pa->pa_tag; + KASSERT(pci_get_capability(pc, tag, PCI_CAP_MSI, &off, NULL) != 0); + + /* + * See Intel 64 and IA-32 Architectures Software Developer's Manual + * Volume 3 10.11 Message Signalled Interrupts. + */ + /* + * "cpuid" for MSI address is local APIC ID. In NetBSD, the ID is + * the same as ci->ci_cpuid. + */ + addr = LAPIC_MSIADDR_BASE | __SHIFTIN(ci->ci_cpuid, + LAPIC_MSIADDR_DSTID_MASK); + /* If trigger mode is edge, it don't care level for trigger mode. */ + data = __SHIFTIN(idt_vec, LAPIC_MSIDATA_VECTOR_MASK) + | LAPIC_MSIDATA_TRGMODE_EDGE | LAPIC_MSIDATA_DM_FIXED; + + ctl = pci_conf_read(pc, tag, off + PCI_MSI_CTL); + if (ctl & PCI_MSI_CTL_64BIT_ADDR) { + pci_conf_write(pc, tag, off + PCI_MSI_MADDR64_LO, addr); + pci_conf_write(pc, tag, off + PCI_MSI_MADDR64_HI, 0); + pci_conf_write(pc, tag, off + PCI_MSI_MDATA64, data); + } else { + pci_conf_write(pc, tag, off + PCI_MSI_MADDR, addr); + pci_conf_write(pc, tag, off + PCI_MSI_MDATA, data); + } + ctl |= PCI_MSI_CTL_MSI_ENABLE; + pci_conf_write(pc, tag, off + PCI_MSI_CTL, ctl); +} + +/* + * Do not use pic->hwunmask() immediately after pic->delroute(). + * It is required to use pic->addroute() before pic->hwunmask(). + */ +static void +msi_delroute(struct pic *pic, struct cpu_info *ci, + int msi_vec, int idt_vec, int type) +{ + + msi_hwmask(pic, msi_vec); +} + +/* + * Template for MSI pic. + * .pic_msipic is set later in construct_msi_pic(). + */ +static struct pic msi_pic_tmpl = { + .pic_type = PIC_MSI, + .pic_vecbase = 0, + .pic_apicid = 0, + .pic_lock = __SIMPLELOCK_UNLOCKED, /* not used for msi_pic */ + .pic_hwmask = msi_hwmask, + .pic_hwunmask = msi_hwunmask, + .pic_addroute = msi_addroute, + .pic_delroute = msi_delroute, + .pic_edge_stubs = ioapic_edge_stubs, + .pic_ioapic = NULL, +}; + +/* + * Create pseudo pic for a MSI device. + */ +struct pic * +msipic_construct_msi_pic(struct pci_attach_args *pa) +{ + struct pic *msi_pic; + char pic_name_buf[MSIPICNAMEBUF]; + + msi_pic = msipic_construct_common_msi_pic(pa, &msi_pic_tmpl); + if (msi_pic == NULL) { + DPRINTF(("cannot allocate MSI pic.\n")); + return NULL; + } + + memset(pic_name_buf, 0, MSIPICNAMEBUF); + snprintf(pic_name_buf, MSIPICNAMEBUF, "msi%d", + msi_pic->pic_msipic->mp_devid); + strncpy(msi_pic->pic_msipic->mp_pic_name, pic_name_buf, + MSIPICNAMEBUF - 1); + msi_pic->pic_name = msi_pic->pic_msipic->mp_pic_name; + + return msi_pic; +} + +/* + * Delete pseudo pic for a MSI device. + */ +void +msipic_destruct_msi_pic(struct pic *msi_pic) +{ + + msipic_destruct_common_msi_pic(msi_pic); +} + +#define MSIX_VECCTL_HWMASK 1 +#define MSIX_VECCTL_HWUNMASK 0 +static void +msix_set_vecctl_mask(struct pic *pic, int msix_vec, int flag) +{ + bus_space_tag_t bstag; + bus_space_handle_t bshandle; + uint64_t entry_base; + uint32_t vecctl; + + if (msix_vec < 0) { + DPRINTF(("%s: invalid MSI-X table index, devid=%d vecid=%d", + __func__, msi_get_devid(pic), msix_vec)); + return; + } + + entry_base = PCI_MSIX_TABLE_ENTRY_SIZE * msix_vec; + + bstag = pic->pic_msipic->mp_bstag; + bshandle = pic->pic_msipic->mp_bshandle; + vecctl = bus_space_read_4(bstag, bshandle, + entry_base + PCI_MSIX_TABLE_ENTRY_VECTCTL); + if (flag == MSIX_VECCTL_HWMASK) + vecctl |= PCI_MSIX_VECTCTL_HWMASK_MASK; + else + vecctl &= ~PCI_MSIX_VECTCTL_HWMASK_MASK; + + bus_space_write_4(bstag, bshandle, + entry_base + PCI_MSIX_TABLE_ENTRY_VECTCTL, vecctl); + BUS_SPACE_WRITE_FLUSH(bstag, bshandle); +} + +static void +msix_hwmask(struct pic *pic, int msix_vec) +{ + + msix_set_vecctl_mask(pic, msix_vec, MSIX_VECCTL_HWMASK); +} + +/* + * Do not use pic->hwunmask() immediately after pic->delroute(). + * It is required to use pic->addroute() before pic->hwunmask(). + */ +static void +msix_hwunmask(struct pic *pic, int msix_vec) +{ + + msix_set_vecctl_mask(pic, msix_vec, MSIX_VECCTL_HWUNMASK); +} + +static void +msix_addroute(struct pic *pic, struct cpu_info *ci, + int msix_vec, int idt_vec, int type) +{ + pci_chipset_tag_t pc; + struct pci_attach_args *pa; + pcitag_t tag; + bus_space_tag_t bstag; + bus_space_handle_t bshandle; + uint64_t entry_base; + pcireg_t addr, data, ctl; + int off; + + if (msix_vec < 0) { + DPRINTF(("%s: invalid MSI-X table index, devid=%d vecid=%d", + __func__, msi_get_devid(pic), msix_vec)); + return; + } + + pa = &pic->pic_msipic->mp_pa; + pc = pa->pa_pc; + tag = pa->pa_tag; + KASSERT(pci_get_capability(pc, tag, PCI_CAP_MSIX, &off, NULL) != 0); + + entry_base = PCI_MSIX_TABLE_ENTRY_SIZE * msix_vec; + + /* + * See Intel 64 and IA-32 Architectures Software Developer's Manual + * Volume 3 10.11 Message Signalled Interrupts. + */ + /* + * "cpuid" for MSI-X address is local APIC ID. In NetBSD, the ID is + * the same as ci->ci_cpuid. + */ + addr = LAPIC_MSIADDR_BASE | __SHIFTIN(ci->ci_cpuid, + LAPIC_MSIADDR_DSTID_MASK); + /* If trigger mode is edge, it don't care level for trigger mode. */ + data = __SHIFTIN(idt_vec, LAPIC_MSIDATA_VECTOR_MASK) + | LAPIC_MSIDATA_TRGMODE_EDGE | LAPIC_MSIDATA_DM_FIXED; + + bstag = pic->pic_msipic->mp_bstag; + bshandle = pic->pic_msipic->mp_bshandle; + bus_space_write_4(bstag, bshandle, + entry_base + PCI_MSIX_TABLE_ENTRY_ADDR_LO, addr); + bus_space_write_4(bstag, bshandle, + entry_base + PCI_MSIX_TABLE_ENTRY_ADDR_HI, 0); + bus_space_write_4(bstag, bshandle, + entry_base + PCI_MSIX_TABLE_ENTRY_DATA, data); + bus_space_write_4(bstag, bshandle, + entry_base + PCI_MSIX_TABLE_ENTRY_VECTCTL, 0); + BUS_SPACE_WRITE_FLUSH(bstag, bshandle); + + ctl = pci_conf_read(pc, tag, off + PCI_MSIX_CTL); + ctl |= PCI_MSIX_CTL_ENABLE; + pci_conf_write(pc, tag, off + PCI_MSIX_CTL, ctl); +} + +/* + * Do not use pic->hwunmask() immediately after pic->delroute(). + * It is required to use pic->addroute() before pic->hwunmask(). + */ +static void +msix_delroute(struct pic *pic, struct cpu_info *ci, + int msix_vec, int vec, int type) +{ + + msix_hwmask(pic, msix_vec); +} + +/* + * Template for MSI-X pic. + * .pic_msipic is set later in construct_msix_pic(). + */ +static struct pic msix_pic_tmpl = { + .pic_type = PIC_MSIX, + .pic_vecbase = 0, + .pic_apicid = 0, + .pic_lock = __SIMPLELOCK_UNLOCKED, /* not used for msix_pic */ + .pic_hwmask = msix_hwmask, + .pic_hwunmask = msix_hwunmask, + .pic_addroute = msix_addroute, + .pic_delroute = msix_delroute, + .pic_edge_stubs = ioapic_edge_stubs, +}; + +struct pic * +msipic_construct_msix_pic(struct pci_attach_args *pa) +{ + struct pic *msix_pic; + pci_chipset_tag_t pc; + pcitag_t tag; + pcireg_t tbl; + bus_space_tag_t bstag; + bus_space_handle_t bshandle; + bus_size_t bssize; + size_t table_size; + uint32_t table_offset; + u_int memtype; + int bir, bar, err, off, table_nentry; + char pic_name_buf[MSIPICNAMEBUF]; + + table_nentry = pci_msix_count(pa); + if (table_nentry == 0) { + DPRINTF(("MSI-X table entry is 0.\n")); + return NULL; + } + + pc = pa->pa_pc; + tag = pa->pa_tag; + if (pci_get_capability(pc, tag, PCI_CAP_MSIX, &off, NULL) == 0) { + DPRINTF(("%s: no msix capability", __func__)); + return NULL; + } + + msix_pic = msipic_construct_common_msi_pic(pa, &msix_pic_tmpl); + if (msix_pic == NULL) { + DPRINTF(("cannot allocate MSI-X pic.\n")); + return NULL; + } + + memset(pic_name_buf, 0, MSIPICNAMEBUF); + snprintf(pic_name_buf, MSIPICNAMEBUF, "msix%d", + msix_pic->pic_msipic->mp_devid); + strncpy(msix_pic->pic_msipic->mp_pic_name, pic_name_buf, + MSIPICNAMEBUF - 1); + msix_pic->pic_name = msix_pic->pic_msipic->mp_pic_name; + + tbl = pci_conf_read(pc, tag, off + PCI_MSIX_TBLOFFSET); + table_offset = tbl & PCI_MSIX_TBLOFFSET_MASK; + bir = tbl & PCI_MSIX_PBABIR_MASK; + switch(bir) { + case 0: + bar = PCI_BAR0; + break; + case 1: + bar = PCI_BAR1; + break; + case 2: + bar = PCI_BAR2; + break; + case 3: + bar = PCI_BAR3; + break; + case 4: + bar = PCI_BAR4; + break; + case 5: + bar = PCI_BAR5; + break; + default: + aprint_error("detect an illegal device! The device use reserved BIR values.\n"); + msipic_destruct_common_msi_pic(msix_pic); + return NULL; + } + memtype = pci_mapreg_type(pc, tag, bar); + /* + * PCI_MSIX_TABLE_ENTRY_SIZE consists below + * - Vector Control (32bit) + * - Message Data (32bit) + * - Message Upper Address (32bit) + * - Message Lower Address (32bit) + */ + table_size = table_nentry * PCI_MSIX_TABLE_ENTRY_SIZE; + err = pci_mapreg_submap(pa, bar, memtype, BUS_SPACE_MAP_LINEAR, + roundup(table_size, PAGE_SIZE), table_offset, + &bstag, &bshandle, NULL, &bssize); + if (err) { + DPRINTF(("cannot map msix table.\n")); + msipic_destruct_common_msi_pic(msix_pic); + return NULL; + } + msix_pic->pic_msipic->mp_bstag = bstag; + msix_pic->pic_msipic->mp_bshandle = bshandle; + msix_pic->pic_msipic->mp_bssize = bssize; + + return msix_pic; +} + +/* + * Delete pseudo pic for a MSI-X device. + */ +void +msipic_destruct_msix_pic(struct pic *msix_pic) +{ + struct msipic *msipic; + + KASSERT(msipic_is_msi_pic(msix_pic)); + KASSERT(msix_pic->pic_type == PIC_MSIX); + + msipic = msix_pic->pic_msipic; + bus_space_unmap(msipic->mp_bstag, msipic->mp_bshandle, + msipic->mp_bssize); + + msipic_destruct_common_msi_pic(msix_pic); +} + +/* + * Set the number of MSI vectors for pseudo MSI pic. + */ +int +msipic_set_msi_vectors(struct pic *msi_pic, pci_intr_handle_t *pihs, + int count) +{ + + KASSERT(msipic_is_msi_pic(msi_pic)); + + msi_pic->pic_msipic->mp_veccnt = count; + return 0; +} + +/* + * Initialize the system to use MSI/MSI-X. + */ +void +msipic_init(void) +{ + + mutex_init(&msipic_list_lock, MUTEX_DEFAULT, IPL_NONE); +} diff --git a/sys/arch/x86/pci/msipic.h b/sys/arch/x86/pci/msipic.h new file mode 100644 index 0000000..91f142f --- /dev/null +++ b/sys/arch/x86/pci/msipic.h @@ -0,0 +1,46 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 2014 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _X86_PCI_MSIPIC_H_ +#define _X86_PCI_MSIPIC_H_ + +#include + +struct pic *msipic_construct_msi_pic(struct pci_attach_args *); +void msipic_destruct_msi_pic(struct pic *); +struct pic *msipic_construct_msix_pic(struct pci_attach_args *); +void msipic_destruct_msix_pic(struct pic *); +struct pic *msipic_find_msi_pic(int); +int msipic_set_msi_vectors(struct pic *, pci_intr_handle_t *, int); + +bool msipic_is_msi_pic(struct pic *); +int msipic_get_devid(struct pic *); + +void msipic_init(void); + +#endif /* _X86_PCI_MSIPIC_H_ */ diff --git a/sys/arch/x86/pci/pci_intr_machdep.c b/sys/arch/x86/pci/pci_intr_machdep.c index 52ead8f..c8da3e2 100644 --- a/sys/arch/x86/pci/pci_intr_machdep.c +++ b/sys/arch/x86/pci/pci_intr_machdep.c @@ -79,9 +79,11 @@ __KERNEL_RCSID(0, "$NetBSD: pci_intr_machdep.c,v 1.27 2014/03/29 19:28:30 christ #include #include #include +#include #include #include #include +#include #include #include @@ -91,6 +93,7 @@ __KERNEL_RCSID(0, "$NetBSD: pci_intr_machdep.c,v 1.27 2014/03/29 19:28:30 christ #include "acpica.h" #include "opt_mpbios.h" #include "opt_acpi.h" +#include "opt_pci_msi_msix.h" #include @@ -110,16 +113,14 @@ __KERNEL_RCSID(0, "$NetBSD: pci_intr_machdep.c,v 1.27 2014/03/29 19:28:30 christ #include #endif -#define MPSAFE_MASK 0x80000000 - int pci_intr_map(const struct pci_attach_args *pa, pci_intr_handle_t *ihp) { - int pin = pa->pa_intrpin; - int line = pa->pa_intrline; + pci_intr_pin_t pin = pa->pa_intrpin; + pci_intr_line_t line = pa->pa_intrline; pci_chipset_tag_t ipc, pc = pa->pa_pc; #if NIOAPIC > 0 || NACPICA > 0 - int rawpin = pa->pa_rawintrpin; + pci_intr_pin_t rawpin = pa->pa_rawintrpin; int bus, dev, func; #endif @@ -225,6 +226,11 @@ pci_intr_string(pci_chipset_tag_t pc, pci_intr_handle_t ih, char *buf, { pci_chipset_tag_t ipc; +#ifdef PCI_MSI_MSIX + if (INT_VIA_MSI(ih)) + return pci_msi_string(pc, ih, buf, len); +#endif + for (ipc = pc; ipc != NULL; ipc = ipc->pc_super) { if ((ipc->pc_present & PCI_OVERRIDE_INTR_STRING) == 0) continue; @@ -290,7 +296,7 @@ pci_intr_establish(pci_chipset_tag_t pc, pci_intr_handle_t ih, } pic = &i8259_pic; - pin = irq = (ih & ~MPSAFE_MASK); + pin = irq = APIC_IRQ_LEGACY_IRQ(ih); mpsafe = ((ih & MPSAFE_MASK) != 0); #if NIOAPIC > 0 @@ -329,105 +335,60 @@ pci_intr_disestablish(pci_chipset_tag_t pc, void *cookie) } #if NIOAPIC > 0 -/* - * experimental support for MSI, does support a single vector, - * no MSI-X, 8-bit APIC IDs - * (while it doesn't need the ioapic technically, it borrows - * from its kernel support) - */ - -/* dummies, needed by common intr_establish code */ -static void -msipic_hwmask(struct pic *pic, int pin) -{ -} -static void -msipic_addroute(struct pic *pic, struct cpu_info *ci, - int pin, int vec, int type) +int +pci_intx_alloc(const struct pci_attach_args *pa, pci_intr_handle_t **pih) { -} + struct intrsource *isp; + pci_intr_handle_t *handle; + int error; + char intrstr_buf[INTRIDBUF]; + const char *intrstr; + + handle = kmem_zalloc(sizeof(*handle), KM_SLEEP); + if (handle == NULL) { + aprint_normal("cannot allocate pci_intr_handle_t\n"); + return ENOMEM; + } -static struct pic msi_pic = { - .pic_name = "msi", - .pic_type = PIC_SOFT, - .pic_vecbase = 0, - .pic_apicid = 0, - .pic_lock = __SIMPLELOCK_UNLOCKED, - .pic_hwmask = msipic_hwmask, - .pic_hwunmask = msipic_hwmask, - .pic_addroute = msipic_addroute, - .pic_delroute = msipic_addroute, - .pic_edge_stubs = ioapic_edge_stubs, -}; - -struct msi_hdl { - struct intrhand *ih; - pci_chipset_tag_t pc; - pcitag_t tag; - int co; -}; + if (pci_intr_map(pa, handle) != 0) { + aprint_normal("cannot set up pci_intr_handle_t\n"); + error = EINVAL; + goto error; + } -void * -pci_msi_establish(struct pci_attach_args *pa, int level, - int (*func)(void *), void *arg) -{ - int co; - struct intrhand *ih; - struct msi_hdl *msih; - struct cpu_info *ci; - struct intrsource *is; - pcireg_t reg; - - if (!pci_get_capability(pa->pa_pc, pa->pa_tag, PCI_CAP_MSI, &co, 0)) - return NULL; - - ih = intr_establish(-1, &msi_pic, -1, IST_EDGE, level, func, arg, 0); - if (ih == NULL) - return NULL; - - msih = malloc(sizeof(*msih), M_DEVBUF, M_WAITOK); - msih->ih = ih; - msih->pc = pa->pa_pc; - msih->tag = pa->pa_tag; - msih->co = co; - - ci = ih->ih_cpu; - is = ci->ci_isources[ih->ih_slot]; - reg = pci_conf_read(pa->pa_pc, pa->pa_tag, co + PCI_MSI_CTL); - pci_conf_write(pa->pa_pc, pa->pa_tag, co + PCI_MSI_MADDR64_LO, - LAPIC_MSIADDR_BASE | - __SHIFTIN(ci->ci_cpuid, LAPIC_MSIADDR_DSTID_MASK)); - if (reg & PCI_MSI_CTL_64BIT_ADDR) { - pci_conf_write(pa->pa_pc, pa->pa_tag, co + PCI_MSI_MADDR64_HI, - 0); - /* XXX according to the manual, ASSERT is unnecessary if - * EDGE - */ - pci_conf_write(pa->pa_pc, pa->pa_tag, co + PCI_MSI_MDATA64, - __SHIFTIN(is->is_idtvec, LAPIC_MSIDATA_VECTOR_MASK) | - LAPIC_MSIDATA_TRGMODE_EDGE | LAPIC_MSIDATA_LEVEL_ASSERT | - LAPIC_MSIDATA_DM_FIXED); - } else { - /* XXX according to the manual, ASSERT is unnecessary if - * EDGE - */ - pci_conf_write(pa->pa_pc, pa->pa_tag, co + PCI_MSI_MDATA, - __SHIFTIN(is->is_idtvec, LAPIC_MSIDATA_VECTOR_MASK) | - LAPIC_MSIDATA_TRGMODE_EDGE | LAPIC_MSIDATA_LEVEL_ASSERT | - LAPIC_MSIDATA_DM_FIXED); + intrstr = pci_intr_string(pa->pa_pc, *handle, + intrstr_buf, sizeof(intrstr_buf)); + mutex_enter(&cpu_lock); + isp = intr_allocate_io_intrsource(intrstr); + mutex_exit(&cpu_lock); + if (isp == NULL) { + aprint_normal("can't allocate io_intersource\n"); + error = ENOMEM; + goto error; } - pci_conf_write(pa->pa_pc, pa->pa_tag, co + PCI_MSI_CTL, - PCI_MSI_CTL_MSI_ENABLE); - return msih; + + *pih = handle; + return 0; + +error: + kmem_free(handle, sizeof(*handle)); + return error; } void -pci_msi_disestablish(void *ih) +pci_intx_release(pci_chipset_tag_t pc, pci_intr_handle_t *pih) { - struct msi_hdl *msih = ih; + char intrstr_buf[INTRIDBUF]; + const char *intrstr; + + if (pih == NULL) + return; + + intrstr = pci_intr_string(NULL, *pih, intrstr_buf, sizeof(intrstr_buf)); + mutex_enter(&cpu_lock); + intr_free_io_intrsource(intrstr); + mutex_exit(&cpu_lock); - pci_conf_write(msih->pc, msih->tag, msih->co + PCI_MSI_CTL, 0); - intr_disestablish(msih->ih); - free(msih, M_DEVBUF); + kmem_free(pih, sizeof(*pih)); } #endif diff --git a/sys/arch/x86/pci/pci_machdep.c b/sys/arch/x86/pci/pci_machdep.c index 27f5490..22a6d84 100644 --- a/sys/arch/x86/pci/pci_machdep.c +++ b/sys/arch/x86/pci/pci_machdep.c @@ -110,6 +110,7 @@ __KERNEL_RCSID(0, "$NetBSD: pci_machdep.c,v 1.69 2014/11/07 12:48:21 christos Ex #include "opt_acpi.h" #include "opt_ddb.h" #include "opt_mpbios.h" +#include "opt_pci_msi_msix.h" #include "opt_puc.h" #include "opt_vga.h" #include "pci.h" @@ -126,6 +127,8 @@ __KERNEL_RCSID(0, "$NetBSD: pci_machdep.c,v 1.69 2014/11/07 12:48:21 christos Ex #include #endif +#include + #include #include @@ -210,6 +213,57 @@ const struct { #undef _tag #undef _qe +#ifdef PCI_MSI_MSIX +#define PCI_QUIRK_DISABLE_MSI 1 /* Neigher MSI nor MSI-X work */ +#define PCI_QUIRK_DISABLE_MSIX 2 /* MSI-X does not work */ +#define PCI_QUIRK_ENABLE_MSI_VM 3 /* Older chipset in VM where MSI and MSI-X works */ + +#define _dme(vend, prod) \ + { PCI_QUIRK_DISABLE_MSI, PCI_ID_CODE(vend, prod) } +#define _dmxe(vend, prod) \ + { PCI_QUIRK_DISABLE_MSIX, PCI_ID_CODE(vend, prod) } +#define _emve(vend, prod) \ + { PCI_QUIRK_ENABLE_MSI_VM, PCI_ID_CODE(vend, prod) } +const struct { + int type; + pcireg_t id; +} pci_msi_quirk_tbl[] = { + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_PCMC), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82437FX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82437MX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82437VX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82439HX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82439TX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443GX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443GX_AGP), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82440MX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82441FX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443BX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443BX_AGP), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443BX_NOAGP), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443GX_NOAGP), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443LX), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443LX_AGP), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82810_MCH), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82810E_MCH), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82815_FULL_HUB), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82820_MCH), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82830MP_IO_1), + _dme(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82840_HB), + _dme(PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE_PCHB), + _dme(PCI_VENDOR_NVIDIA, PCI_PRODUCT_NVIDIA_NFORCE2_PCHB), + _dme(PCI_VENDOR_AMD, PCI_PRODUCT_AMD_SC751_SC), + _dme(PCI_VENDOR_AMD, PCI_PRODUCT_AMD_SC761_SC), + _dme(PCI_VENDOR_AMD, PCI_PRODUCT_AMD_SC762_NB), + + _emve(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82441FX), /* QEMU */ + _emve(PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_82443BX), /* VMWare */ +}; +#undef _dme +#undef _dmxe +#undef _emve +#endif /* PCI_MSI_MSIX */ + /* * PCI doesn't have any special needs; just use the generic versions * of these functions. @@ -370,9 +424,30 @@ pci_conf_select(uint32_t sel) } } +#ifdef PCI_MSI_MSIX +static int +pci_has_msi_quirk(pcireg_t id, int type) +{ + int i; + + for (i = 0; i < __arraycount(pci_msi_quirk_tbl); i++) { + if (id == pci_msi_quirk_tbl[i].id && + type == pci_msi_quirk_tbl[i].type) + return 1; + } + + return 0; +} +#endif + void pci_attach_hook(device_t parent, device_t self, struct pcibus_attach_args *pba) { +#ifdef PCI_MSI_MSIX + pci_chipset_tag_t pc = pba->pba_pc; + pcitag_t tag; + pcireg_t id, class; +#endif if (pba->pba_bus == 0) aprint_normal(": configuration mode %d", pci_mode); @@ -382,6 +457,58 @@ pci_attach_hook(device_t parent, device_t self, struct pcibus_attach_args *pba) #if NACPICA > 0 mpacpi_pci_attach_hook(parent, self, pba); #endif + +#ifdef PCI_MSI_MSIX + /* + * In order to decide whether the system supports MSI we look + * at the host bridge, which should be device 0 function 0 on + * bus 0. It is better to not enable MSI on systems that + * support it than the other way around, so be conservative + * here. So we don't enable MSI if we don't find a host + * bridge there. We also deliberately don't enable MSI on + * chipsets from low-end manifacturers like VIA and SiS. + */ + tag = pci_make_tag(pc, 0, 0, 0); + id = pci_conf_read(pc, tag, PCI_ID_REG); + class = pci_conf_read(pc, tag, PCI_CLASS_REG); + + if (PCI_CLASS(class) != PCI_CLASS_BRIDGE || + PCI_SUBCLASS(class) != PCI_SUBCLASS_BRIDGE_HOST) + return; + + if (pci_has_msi_quirk(id, PCI_QUIRK_DISABLE_MSI)) { + pba->pba_flags &= ~PCI_FLAGS_MSI_OKAY; + pba->pba_flags &= ~PCI_FLAGS_MSIX_OKAY; + } else if (pci_has_msi_quirk(id, PCI_QUIRK_DISABLE_MSIX)) { + pba->pba_flags |= PCI_FLAGS_MSI_OKAY; + pba->pba_flags &= ~PCI_FLAGS_MSIX_OKAY; + } else { + pba->pba_flags |= PCI_FLAGS_MSI_OKAY; + pba->pba_flags |= PCI_FLAGS_MSIX_OKAY; + } + + /* VMware and KVM use old chipset, but they can use MSI/MSI-X */ + if (cpu_feature[1] & CPUID2_RAZ) { + if (pci_has_msi_quirk(id, PCI_QUIRK_ENABLE_MSI_VM)) { + pba->pba_flags |= PCI_FLAGS_MSI_OKAY; + pba->pba_flags |= PCI_FLAGS_MSIX_OKAY; + } + } + + /* + * Don't enable MSI on a HyperTransport bus. In order to + * determine that bus 0 is a HyperTransport bus, we look at + * device 24 function 0, which is the HyperTransport + * host/primary interface integrated on most 64-bit AMD CPUs. + * If that device has a HyperTransport capability, bus 0 must + * be a HyperTransport bus and we disable MSI. + */ + tag = pci_make_tag(pc, 0, 24, 0); + if (pci_get_capability(pc, tag, PCI_CAP_LDT, NULL, NULL)) { + pba->pba_flags &= ~PCI_FLAGS_MSI_OKAY; + pba->pba_flags &= ~PCI_FLAGS_MSIX_OKAY; + } +#endif /* PCI_MSI_MSIX */ } int diff --git a/sys/arch/x86/pci/pci_msi_machdep.c b/sys/arch/x86/pci/pci_msi_machdep.c new file mode 100644 index 0000000..de65942 --- /dev/null +++ b/sys/arch/x86/pci/pci_msi_machdep.c @@ -0,0 +1,686 @@ +/* $NetBSD$ */ + +/* + * Copyright (c) 2014 Internet Initiative Japan Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * TODO + * + * - PBA (Pending Bit Array) support + * - HyperTransport mapping support + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +#ifdef INTRDEBUG +#define MSIDEBUG +#endif + +#ifdef MSIDEBUG +#define DPRINTF(msg) printf msg +#else +#define DPRINTF(msg) +#endif + +/* + * Return intrid for a MSI/MSI-X device. + * "buf" must be allocated by caller. + */ +const char * +pci_msi_string(pci_chipset_tag_t pc, pci_intr_handle_t ih, char *buf, + size_t len) +{ + int dev, vec; + + KASSERT(INT_VIA_MSI(ih)); + + dev = MSI_INT_DEV(ih); + vec = MSI_INT_VEC(ih); + if (MSI_INT_IS_MSIX(ih)) + snprintf(buf, len, "msix%d vec %d", dev, vec); + else + snprintf(buf, len, "msi%d vec %d", dev, vec); + + return buf; +} + +static pci_intr_handle_t +pci_msi_calculate_handle(struct pic *msi_pic, int vector) +{ + pci_intr_handle_t pih; + + KASSERT(msipic_is_msi_pic(msi_pic)); + + pih = __SHIFTIN((uint64_t)msipic_get_devid(msi_pic), MSI_INT_DEV_MASK) + | __SHIFTIN((uint64_t)vector, MSI_INT_VEC_MASK) + | APIC_INT_VIA_MSI; + if (msi_pic->pic_type == PIC_MSI) + MSI_INT_MAKE_MSI(pih); + else if (msi_pic->pic_type == PIC_MSIX) + MSI_INT_MAKE_MSIX(pih); + else + panic("%s: Unexpected pic_type: %d\n", __func__, + msi_pic->pic_type); + + return pih; +} + +static pci_intr_handle_t * +pci_msi_alloc_vectors(struct pic *msi_pic, uint *table_indexes, int *count) +{ + struct intrsource *isp; + pci_intr_handle_t *vectors, pih; + int i; + const char *intrstr; + char intrstr_buf[INTRIDBUF]; + + vectors = kmem_zalloc(sizeof(vectors[0]) * (*count), KM_SLEEP); + if (vectors == NULL) { + DPRINTF(("cannot allocate vectors\n")); + return NULL; + } + + mutex_enter(&cpu_lock); + for (i = 0; i < *count; i++) { + u_int table_index; + + if (table_indexes == NULL) + table_index = i; + else + table_index = table_indexes[i]; + + pih = pci_msi_calculate_handle(msi_pic, table_index); + + intrstr = pci_msi_string(NULL, pih, intrstr_buf, + sizeof(intrstr_buf)); + isp = intr_allocate_io_intrsource(intrstr); + if (isp == NULL) { + mutex_exit(&cpu_lock); + DPRINTF(("can't allocate io_intersource\n")); + kmem_free(vectors, sizeof(vectors[0]) * (*count)); + return NULL; + } + + vectors[i] = pih; + } + mutex_exit(&cpu_lock); + + return vectors; +} + +static void +pci_msi_free_vectors(struct pic *msi_pic, pci_intr_handle_t *pihs, int count) +{ + pci_intr_handle_t pih; + int i; + const char *intrstr; + char intrstr_buf[INTRIDBUF]; + + mutex_enter(&cpu_lock); + for (i = 0; i < count; i++) { + pih = pci_msi_calculate_handle(msi_pic, i); + intrstr = pci_msi_string(NULL, pih, intrstr_buf, + sizeof(intrstr_buf)); + intr_free_io_intrsource(intrstr); + } + mutex_exit(&cpu_lock); + + kmem_free(pihs, sizeof(pihs[0]) * count); +} + +static int +pci_msi_alloc_md_common(pci_intr_handle_t **ihps, int *count, + struct pci_attach_args *pa, bool exact) +{ + struct pic *msi_pic; + pci_intr_handle_t *vectors; + int error, i; + + if ((pa->pa_flags & PCI_FLAGS_MSI_OKAY) == 0) { + DPRINTF(("PCI host bridge does not support MSI.\n")); + return ENODEV; + } + + msi_pic = msipic_construct_msi_pic(pa); + if (msi_pic == NULL) { + DPRINTF(("cannot allocate MSI pic.\n")); + return EINVAL; + } + + vectors = NULL; + while (*count > 0) { + vectors = pci_msi_alloc_vectors(msi_pic, NULL, count); + if (vectors != NULL) + break; + + if (exact) { + DPRINTF(("cannot allocate MSI vectors.\n")); + msipic_destruct_msi_pic(msi_pic); + return ENOMEM; + } else { + (*count) >>= 1; /* must be power of 2. */ + continue; + } + } + if (vectors == NULL) { + DPRINTF(("cannot allocate MSI vectors.\n")); + msipic_destruct_msi_pic(msi_pic); + return ENOMEM; + } + + for (i = 0; i < *count; i++) { + MSI_INT_MAKE_MSI(vectors[i]); + } + + error = msipic_set_msi_vectors(msi_pic, NULL, *count); + if (error) { + pci_msi_free_vectors(msi_pic, vectors, *count); + msipic_destruct_msi_pic(msi_pic); + return error; + } + + *ihps = vectors; + return 0; +} + +static int +pci_msi_alloc_md(pci_intr_handle_t **ihps, int *count, + struct pci_attach_args *pa) +{ + + return pci_msi_alloc_md_common(ihps, count, pa, false); +} + +static int +pci_msi_alloc_exact_md(pci_intr_handle_t **ihps, int count, + struct pci_attach_args *pa) +{ + + return pci_msi_alloc_md_common(ihps, &count, pa, true); +} + +static void +pci_msi_release_md(pci_intr_handle_t **pihs, int count) +{ + struct pic *pic; + pci_intr_handle_t *vectors; + + vectors = *pihs; + pic = msipic_find_msi_pic(MSI_INT_DEV(vectors[0])); + if (pic == NULL) + return; + + pci_msi_free_vectors(pic, vectors, count); + msipic_destruct_msi_pic(pic); +} + +static void * +pci_msi_common_establish(pci_chipset_tag_t pc, pci_intr_handle_t ih, + int level, int (*func)(void *), void *arg, struct pic *pic) +{ + int irq, pin; + bool mpsafe; + + KASSERT(INT_VIA_MSI(ih)); + + irq = -1; + pin = MSI_INT_VEC(ih); + mpsafe = ((ih & MPSAFE_MASK) != 0); + + return intr_establish(irq, pic, pin, IST_EDGE, level, func, arg, + mpsafe); +} + +static void +pci_msi_common_disestablish(pci_chipset_tag_t pc, void *cookie) +{ + + intr_disestablish(cookie); +} + +static int +pci_msix_alloc_md_common(pci_intr_handle_t **ihps, u_int *table_indexes, + int *count, struct pci_attach_args *pa, bool exact) +{ + struct pic *msix_pic; + pci_intr_handle_t *vectors; + int error, i; + + if (((pa->pa_flags & PCI_FLAGS_MSI_OKAY) == 0) + || ((pa->pa_flags & PCI_FLAGS_MSIX_OKAY) == 0)) { + DPRINTF(("PCI host bridge does not support MSI-X.\n")); + return ENODEV; + } + + msix_pic = msipic_construct_msix_pic(pa); + if (msix_pic == NULL) + return EINVAL; + + vectors = NULL; + while (*count > 0) { + vectors = pci_msi_alloc_vectors(msix_pic, table_indexes, count); + if (vectors != NULL) + break; + + if (exact) { + DPRINTF(("cannot allocate MSI-X vectors.\n")); + msipic_destruct_msix_pic(msix_pic); + return ENOMEM; + } else { + (*count)--; + continue; + } + } + if (vectors == NULL) { + DPRINTF(("cannot allocate MSI-X vectors.\n")); + msipic_destruct_msix_pic(msix_pic); + return ENOMEM; + } + + for (i = 0; i < *count; i++) { + MSI_INT_MAKE_MSIX(vectors[i]); + } + + error = msipic_set_msi_vectors(msix_pic, vectors, *count); + if (error) { + pci_msi_free_vectors(msix_pic, vectors, *count); + msipic_destruct_msix_pic(msix_pic); + return error; + } + + *ihps = vectors; + return 0; +} + +static int +pci_msix_alloc_md(pci_intr_handle_t **ihps, int *count, + struct pci_attach_args *pa) +{ + + return pci_msix_alloc_md_common(ihps, NULL, count, pa, false); +} + +static int +pci_msix_alloc_exact_md(pci_intr_handle_t **ihps, int count, + struct pci_attach_args *pa) +{ + + return pci_msix_alloc_md_common(ihps, NULL, &count, pa, true); +} + +static int +pci_msix_alloc_map_md(pci_intr_handle_t **ihps, u_int *table_indexes, int count, + struct pci_attach_args *pa) +{ + + return pci_msix_alloc_md_common(ihps, table_indexes, &count, pa, true); +} + +static void +pci_msix_release_md(pci_intr_handle_t **pihs, int count) +{ + struct pic *pic; + pci_intr_handle_t *vectors; + + vectors = *pihs; + pic = msipic_find_msi_pic(MSI_INT_DEV(vectors[0])); + if (pic == NULL) + return; + + pci_msi_free_vectors(pic, vectors, count); + msipic_destruct_msix_pic(pic); +} + +/*****************************************************************************/ +/* + * these APIs may be MI code. + */ + +/* + * return number of the devices's MSI vectors + * return 0 if the device does not support MSI + */ +int +pci_msi_count(struct pci_attach_args *pa) +{ + pci_chipset_tag_t pc; + pcitag_t tag; + pcireg_t reg; + uint32_t mmc; + int count, offset; + + pc = pa->pa_pc; + tag = pa->pa_tag; + if (pci_get_capability(pc, tag, PCI_CAP_MSI, &offset, NULL) == 0) + return 0; + + reg = pci_conf_read(pc, tag, offset + PCI_MSI_CTL); + mmc = PCI_MSI_CTL_MMC(reg); + count = 1 << mmc; + if (count > PCI_MSI_MAX_VECTORS) { + aprint_error("detect an illegal device! The device use reserved MMC values.\n"); + return 0; + } + + return count; +} + +/* + * This function is used by device drivers like pci_intr_map(). + * + * "ihps" is the array of vector numbers which MSI used instead of IRQ number. + * "count" must be power of 2. + * "count" can decrease if struct intrsource cannot be allocated. + * if count == 0, return non-zero value. + */ +int +pci_msi_alloc(struct pci_attach_args *pa, pci_intr_handle_t **ihps, int *count) +{ + int hw_max; + + /* MSI vector count must be power of 2. */ + KASSERT(*count > 0); + KASSERT(((*count - 1) & *count) == 0); + + hw_max = pci_msi_count(pa); + if (hw_max == 0) + return ENODEV; + + if (*count > hw_max) { + DPRINTF(("cut off MSI count to %d\n", hw_max)); + *count = hw_max; /* cut off hw_max */ + } + + return pci_msi_alloc_md(ihps, count, pa); +} + +/* + * This function is used by device drivers like pci_intr_map(). + * + * "ihps" is the array of vector numbers which MSI used instead of IRQ number. + * "count" must be power of 2. + * "count" can not decrease. + * If "count" struct intrsources cannot be allocated, return non-zero value. + */ +int +pci_msi_alloc_exact(struct pci_attach_args *pa, pci_intr_handle_t **ihps, + int count) +{ + int hw_max; + + /* MSI vector count must be power of 2. */ + KASSERT(count > 0); + KASSERT(((count - 1) & count) == 0); + + hw_max = pci_msi_count(pa); + if (hw_max == 0) + return ENODEV; + + if (count > hw_max) { + DPRINTF(("over hardware max MSI count %d\n", hw_max)); + return EINVAL; + } + + return pci_msi_alloc_exact_md(ihps, count, pa); +} + +/* + * Release MSI handles. + */ +void +pci_msi_release(pci_chipset_tag_t pc, pci_intr_handle_t **pihs, int count) +{ + + if (count < 1) + return; + + return pci_msi_release_md(pihs, count); +} + +/* + * Establish a MSI handle. + * If multiple MSI handle is requied to establish, device driver must call + * this function for each handle. + */ +void * +pci_msi_establish(pci_chipset_tag_t pc, pci_intr_handle_t ih, + int level, int (*func)(void *), void *arg) +{ + struct pic *pic; + + pic = msipic_find_msi_pic(MSI_INT_DEV(ih)); + if (pic == NULL) { + DPRINTF(("pci_intr_handler has no msi_pic\n")); + return NULL; + } + + return pci_msi_common_establish(pc, ih, level, func, arg, pic); +} + +/* + * Disestablish a MSI handle. + * If multiple MSI handle is requied to disestablish, device driver must call + * this function for each handle. + */ +void +pci_msi_disestablish(pci_chipset_tag_t pc, void *cookie) +{ + + pci_msi_common_disestablish(pc, cookie); +} + +/* + * return number of the devices's MSI-X vectors + * return 0 if the device does not support MSI-X + */ +int +pci_msix_count(struct pci_attach_args *pa) +{ + pci_chipset_tag_t pc; + pcitag_t tag; + pcireg_t reg; + int offset; + + pc = pa->pa_pc; + tag = pa->pa_tag; + if (pci_get_capability(pc, tag, PCI_CAP_MSIX, &offset, NULL) == 0) + return 0; + + reg = pci_conf_read(pc, tag, offset + PCI_MSIX_CTL); + + return PCI_MSIX_CTL_TBLSIZE(reg); +} + +/* + * This function is used by device drivers like pci_intr_map(). + * + * "ihps" is the array of vector numbers which MSI-X used instead of IRQ number. + * "count" can decrease if enough struct intrsources cannot be allocated. + * if count == 0, return non-zero value. + */ +int +pci_msix_alloc(struct pci_attach_args *pa, pci_intr_handle_t **ihps, int *count) +{ + int hw_max; + + KASSERT(*count > 0); + + hw_max = pci_msix_count(pa); + if (hw_max == 0) + return ENODEV; + + if (*count > hw_max) { + DPRINTF(("cut off MSI-X count to %d\n", hw_max)); + *count = hw_max; /* cut off hw_max */ + } + + return pci_msix_alloc_md(ihps, count, pa); +} + +/* + * This function is used by device drivers like pci_intr_map(). + * + * "ihps" is the array of vector numbers which MSI-X used instead of IRQ number. + * "count" can not decrease. + * If "count" struct intrsource cannot be allocated, return non-zero value. + */ +int +pci_msix_alloc_exact(struct pci_attach_args *pa, pci_intr_handle_t **ihps, + int count) +{ + int hw_max; + + KASSERT(count > 0); + + hw_max = pci_msix_count(pa); + if (hw_max == 0) + return ENODEV; + + if (count > hw_max) { + DPRINTF(("over hardware max MSI-X count %d\n", hw_max)); + return EINVAL; + } + + return pci_msix_alloc_exact_md(ihps, count, pa); +} + + +/* + * This function is used by device drivers like pci_intr_map(). + * Futhermore, this function can map each handle to a MSI-X table index. + * + * "ihps" is the array of vector numbers which MSI-X used instead of IRQ number. + * "count" can not decrease. + * "map" size must be equal to "count". + * If "count" struct intrsource cannot be allocated, return non-zero value. + * e.g. + * If "map" = { 1, 4, 0 }, + * 1st handle is bound to MSI-X index 1 + * 2nd handle is bound to MSI-X index 4 + * 3rd handle is bound to MSI-X index 0 + */ +int +pci_msix_alloc_map(struct pci_attach_args *pa, pci_intr_handle_t **ihps, + u_int *table_indexes, int count) +{ + int hw_max, i, j; + + KASSERT(count > 0); + + hw_max = pci_msix_count(pa); + if (hw_max == 0) + return ENODEV; + + if (count > hw_max) { + DPRINTF(("over hardware max MSI-X count %d\n", hw_max)); + return EINVAL; + } + + /* check not to duplicate table_index */ + for (i = 0; i < count; i++) { + u_int basing = table_indexes[i]; + + KASSERT(table_indexes[i] < PCI_MSIX_MAX_VECTORS); + if (basing >= hw_max) { + DPRINTF(("table index is over hardware max MSI-X index %d\n", + hw_max - 1)); + return EINVAL; + } + + for (j = i + 1; j < count; j++) { + if (basing == table_indexes[j]) { + DPRINTF(("MSI-X table index duplicated\n")); + return EINVAL; + } + } + } + + return pci_msix_alloc_map_md(ihps, table_indexes, count, pa); +} + +/* + * Release MSI-X handles. + */ +void +pci_msix_release(pci_chipset_tag_t pc, pci_intr_handle_t **pihs, int count) +{ + + if (count < 1) + return; + + return pci_msix_release_md(pihs, count); +} + +/* + * Establish a MSI-X handle. + * If multiple MSI-X handle is requied to establish, device driver must call + * this function for each handle. + */ +void * +pci_msix_establish(pci_chipset_tag_t pc, pci_intr_handle_t ih, + int level, int (*func)(void *), void *arg) +{ + struct pic *pic; + + pic = msipic_find_msi_pic(MSI_INT_DEV(ih)); + if (pic == NULL) { + DPRINTF(("pci_intr_handler has no msi_pic\n")); + return NULL; + } + + return pci_msi_common_establish(pc, ih, level, func, arg, pic); +} + +/* + * Disestablish a MSI-X handle. + * If multiple MSI-X handle is requied to disestablish, device driver must call + * this function for each handle. + */ +void +pci_msix_disestablish(pci_chipset_tag_t pc, void *cookie) +{ + + pci_msi_common_disestablish(pc, cookie); +} diff --git a/sys/arch/x86/pci/pciide_machdep.c b/sys/arch/x86/pci/pciide_machdep.c index 28a9449..46c0be4 100644 --- a/sys/arch/x86/pci/pciide_machdep.c +++ b/sys/arch/x86/pci/pciide_machdep.c @@ -69,7 +69,7 @@ pciide_machdep_compat_intr_establish(device_t dev, int irq; void *cookie; #if NIOAPIC > 0 - int mpih; + intr_handle_t mpih; char buf[PCI_INTRSTR_LEN]; #endif diff --git a/sys/arch/x86/x86/intr.c b/sys/arch/x86/x86/intr.c index 63c7ceb..0155bc6 100644 --- a/sys/arch/x86/x86/intr.c +++ b/sys/arch/x86/x86/intr.c @@ -138,6 +138,7 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.78 2015/04/08 05:52:41 knakahara Exp $"); #include "opt_intrdebug.h" #include "opt_multiprocessor.h" #include "opt_acpi.h" +#include "opt_pci_msi_msix.h" #include #include @@ -176,10 +177,20 @@ __KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.78 2015/04/08 05:52:41 knakahara Exp $"); #include #endif +#ifdef PCI_MSI_MSIX +#include +#endif + #ifdef DDB #include #endif +#ifdef INTRDEBUG +#define DPRINTF(msg) printf msg +#else +#define DPRINTF(msg) +#endif + struct pic softintr_pic = { .pic_name = "softintr_fakepic", .pic_type = PIC_SOFT, @@ -188,24 +199,37 @@ struct pic softintr_pic = { .pic_lock = __SIMPLELOCK_UNLOCKED, }; +static SIMPLEQ_HEAD(, intrsource) io_interrupt_sources = + SIMPLEQ_HEAD_INITIALIZER(io_interrupt_sources); + static void intr_calculatemasks(struct cpu_info *); #if NIOAPIC > 0 || NACPICA > 0 -static int intr_scan_bus(int, int, int *); +static int intr_scan_bus(int, int, intr_handle_t *); #if NPCI > 0 static int intr_find_pcibridge(int, pcitag_t *, pci_chipset_tag_t *); #endif #endif -static int intr_allocate_slot_cpu(struct cpu_info *, struct pic *, int, int *); +static const char *create_intrid(int, struct pic *, int, char *, size_t); + +static struct intrsource *intr_get_io_intrsource(const char *); +static void intr_free_io_intrsource_direct(struct intrsource *); +static int intr_num_handlers(struct intrsource *); + +static int intr_allocate_slot_cpu(struct cpu_info *, struct pic *, int, int *, + struct intrsource *); static int __noinline intr_allocate_slot(struct pic *, int, int, - struct cpu_info **, int *, int *); + struct cpu_info **, int *, int *, + struct intrsource *); static void intr_source_free(struct cpu_info *, int, struct pic *, int); static void intr_establish_xcall(void *, void *); static void intr_disestablish_xcall(void *, void *); +static const char *legacy_intr_string(int, char *, size_t, struct pic *); + static inline bool redzone_const_or_false(bool); static inline int redzone_const_or_zero(int); @@ -382,7 +406,7 @@ intr_find_pcibridge(int bus, pcitag_t *pci_bridge_tag, * 'pin' argument pci bus_pin encoding of a device/pin combination. */ int -intr_find_mpmapping(int bus, int pin, int *handle) +intr_find_mpmapping(int bus, int pin, intr_handle_t *handle) { #if NPCI > 0 @@ -406,7 +430,7 @@ intr_find_mpmapping(int bus, int pin, int *handle) } static int -intr_scan_bus(int bus, int pin, int *handle) +intr_scan_bus(int bus, int pin, intr_handle_t *handle) { struct mp_intr_map *mip, *intrs; @@ -432,9 +456,154 @@ intr_scan_bus(int bus, int pin, int *handle) } #endif +/* + * Create an interrupt id such as "ioapic0 pin 9". This interrupt id is used + * by MI code and intrctl(8). + */ +static const char * +create_intrid(int legacy_irq, struct pic *pic, int pin, char *buf, size_t len) +{ + int ih; + +#ifdef PCI_MSI_MSIX + if ((pic->pic_type == PIC_MSI) || (pic->pic_type == PIC_MSIX)) { + uint64_t pih; + int dev, vec; + + dev = msipic_get_devid(pic); + vec = pin; + pih = __SHIFTIN((uint64_t)dev, MSI_INT_DEV_MASK) + | __SHIFTIN((uint64_t)vec, MSI_INT_VEC_MASK) + | APIC_INT_VIA_MSI; + if (pic->pic_type == PIC_MSI) + MSI_INT_MAKE_MSI(pih); + else if (pic->pic_type == PIC_MSIX) + MSI_INT_MAKE_MSIX(pih); + + return pci_msi_string(NULL, pih, buf, len); + } +#endif + + /* + * If the device is pci, "legacy_irq" is alway -1. Least 8 bit of "ih" + * is only used in intr_string() to show the irq number. + * If the device is "legacy"(such as floppy), it should not use + * intr_string(). + */ + if (pic->pic_type == PIC_I8259) { + ih = legacy_irq; + return legacy_intr_string(ih, buf, len, pic); + } + + ih = ((pic->pic_apicid << APIC_INT_APIC_SHIFT) & APIC_INT_APIC_MASK) + | ((pin << APIC_INT_PIN_SHIFT) & APIC_INT_PIN_MASK); + if (pic->pic_type == PIC_IOAPIC) { + ih |= APIC_INT_VIA_APIC; + } + ih |= pin; + return intr_string(ih, buf, len); +} + +/* + * Find intrsource from io_interrupt_sources list. + */ +static struct intrsource * +intr_get_io_intrsource(const char *intrid) +{ + struct intrsource *isp; + + KASSERT(mutex_owned(&cpu_lock)); + + SIMPLEQ_FOREACH(isp, &io_interrupt_sources, is_list) { + KASSERT(isp->is_intrid != NULL); + if (strncmp(intrid, isp->is_intrid, INTRIDBUF - 1) == 0) + return isp; + } + return NULL; +} + +/* + * Allocate intrsource and add to io_interrupt_sources list. + */ +struct intrsource * +intr_allocate_io_intrsource(const char *intrid) +{ + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + struct intrsource *isp; + struct percpu_evcnt *pep; + + KASSERT(mutex_owned(&cpu_lock)); + + if (intrid == NULL) + return NULL; + + isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); + if (isp == NULL) { + return NULL; + } + + pep = kmem_zalloc(sizeof(*pep) * ncpu, KM_SLEEP); + if (pep == NULL) { + kmem_free(isp, sizeof(*isp)); + return NULL; + } + isp->is_saved_evcnt = pep; + for (CPU_INFO_FOREACH(cii, ci)) { + pep->cpuid = ci->ci_cpuid; + pep++; + } + strncpy(isp->is_intrid, intrid, sizeof(isp->is_intrid)); + + SIMPLEQ_INSERT_TAIL(&io_interrupt_sources, isp, is_list); + + return isp; +} + +/* + * Remove from io_interrupt_sources list and free by the intrsource pointer. + */ +static void +intr_free_io_intrsource_direct(struct intrsource *isp) +{ + KASSERT(mutex_owned(&cpu_lock)); + + SIMPLEQ_REMOVE(&io_interrupt_sources, isp, intrsource, is_list); + + /* Is this interrupt established? */ + if (isp->is_evname != '\0') + evcnt_detach(&isp->is_evcnt); + + kmem_free(isp->is_saved_evcnt, + sizeof(*(isp->is_saved_evcnt)) * ncpu); + + kmem_free(isp, sizeof(*isp)); +} + +/* + * Remove from io_interrupt_sources list and free by the interrupt id. + * This function can be used by MI code. + */ +void +intr_free_io_intrsource(const char *intrid) +{ + struct intrsource *isp; + + KASSERT(mutex_owned(&cpu_lock)); + + if (intrid == NULL) + return; + + if ((isp = intr_get_io_intrsource(intrid)) == NULL) { + return; + } + + intr_free_io_intrsource_direct(isp); +} + static int intr_allocate_slot_cpu(struct cpu_info *ci, struct pic *pic, int pin, - int *index) + int *index, struct intrsource *chained) { int slot, i; struct intrsource *isp; @@ -445,13 +614,19 @@ intr_allocate_slot_cpu(struct cpu_info *ci, struct pic *pic, int pin, KASSERT(CPU_IS_PRIMARY(ci)); slot = pin; } else { + int start = 0; slot = -1; +#ifdef PCI_MSI_MSIX + /* avoid reserved slots for legacy interrupts. */ + if (CPU_IS_PRIMARY(ci) && msipic_is_msi_pic(pic)) + start = NUM_LEGACY_IRQS; +#endif /* * intr_allocate_slot has checked for an existing mapping. * Now look for a free slot. */ - for (i = 0; i < MAX_INTR_SOURCES ; i++) { + for (i = start; i < MAX_INTR_SOURCES ; i++) { if (ci->ci_isources[i] == NULL) { slot = i; break; @@ -464,14 +639,21 @@ intr_allocate_slot_cpu(struct cpu_info *ci, struct pic *pic, int pin, isp = ci->ci_isources[slot]; if (isp == NULL) { - isp = kmem_zalloc(sizeof(*isp), KM_SLEEP); - if (isp == NULL) { - return ENOMEM; - } + const char *via; + + isp = chained; + KASSERT(isp != NULL); +#ifdef PCI_MSI_MSIX + if (pic->pic_type == PIC_MSI || pic->pic_type == PIC_MSIX) + via = "vec"; + else +#endif + via = "pin"; snprintf(isp->is_evname, sizeof (isp->is_evname), - "pin %d", pin); + "%s %d", via, pin); evcnt_attach_dynamic(&isp->is_evcnt, EVCNT_TYPE_INTR, NULL, pic->pic_name, isp->is_evname); + isp->is_active_cpu = ci->ci_cpuid; ci->ci_isources[slot] = isp; } @@ -484,7 +666,8 @@ intr_allocate_slot_cpu(struct cpu_info *ci, struct pic *pic, int pin, */ static int __noinline intr_allocate_slot(struct pic *pic, int pin, int level, - struct cpu_info **cip, int *index, int *idt_slot) + struct cpu_info **cip, int *index, int *idt_slot, + struct intrsource *chained) { CPU_INFO_ITERATOR cii; struct cpu_info *ci, *lci; @@ -523,7 +706,7 @@ intr_allocate_slot(struct pic *pic, int pin, int level, * Must be directed to BP. */ ci = &cpu_info_primary; - error = intr_allocate_slot_cpu(ci, pic, pin, &slot); + error = intr_allocate_slot_cpu(ci, pic, pin, &slot, chained); } else { /* * Find least loaded AP/BP and try to allocate there. @@ -543,7 +726,7 @@ intr_allocate_slot(struct pic *pic, int pin, int level, #endif } KASSERT(ci != NULL); - error = intr_allocate_slot_cpu(ci, pic, pin, &slot); + error = intr_allocate_slot_cpu(ci, pic, pin, &slot, chained); /* * If that did not work, allocate anywhere. @@ -555,7 +738,7 @@ intr_allocate_slot(struct pic *pic, int pin, int level, continue; } error = intr_allocate_slot_cpu(ci, pic, - pin, &slot); + pin, &slot, chained); if (error == 0) { break; } @@ -578,7 +761,6 @@ intr_allocate_slot(struct pic *pic, int pin, int level, } if (idtvec == 0) { evcnt_detach(&ci->ci_isources[slot]->is_evcnt); - kmem_free(ci->ci_isources[slot], sizeof(*(ci->ci_isources[slot]))); ci->ci_isources[slot] = NULL; return EBUSY; } @@ -599,9 +781,6 @@ intr_source_free(struct cpu_info *ci, int slot, struct pic *pic, int idtvec) if (isp->is_handlers != NULL) return; ci->ci_isources[slot] = NULL; - evcnt_detach(&isp->is_evcnt); - kmem_free(isp, sizeof(*isp)); - ci->ci_isources[slot] = NULL; if (pic != &i8259_pic) idt_vec_free(idtvec); } @@ -705,11 +884,13 @@ intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level, struct intrhand **p, *q, *ih; struct cpu_info *ci; int slot, error, idt_vec; - struct intrsource *source; + struct intrsource *chained, *source; #ifdef MULTIPROCESSOR bool mpsafe = (known_mpsafe || level != IPL_VM); #endif /* MULTIPROCESSOR */ uint64_t where; + const char *intrstr; + char intrstr_buf[INTRIDBUF]; #ifdef DIAGNOSTIC if (legacy_irq != -1 && (legacy_irq < 0 || legacy_irq > 15)) @@ -725,9 +906,34 @@ intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level, return NULL; } + intrstr = create_intrid(legacy_irq, pic, pin, intrstr_buf, + sizeof(intrstr_buf)); + KASSERT(intrstr != NULL); + mutex_enter(&cpu_lock); - error = intr_allocate_slot(pic, pin, level, &ci, &slot, &idt_vec); + + /* allocate intrsource pool, if not yet. */ + chained = intr_get_io_intrsource(intrstr); + if (chained == NULL) { +#ifdef PCI_MSI_MSIX + if (msipic_is_msi_pic(pic)) { + mutex_exit(&cpu_lock); + printf("%s: %s has no intrsource\n", __func__, intrstr); + return NULL; + } +#endif + chained = intr_allocate_io_intrsource(intrstr); + if (chained == NULL) { + mutex_exit(&cpu_lock); + printf("%s: can't allocate io_intersource\n", __func__); + return NULL; + } + } + + error = intr_allocate_slot(pic, pin, level, &ci, &slot, &idt_vec, + chained); if (error != 0) { + intr_free_io_intrsource_direct(chained); mutex_exit(&cpu_lock); kmem_free(ih, sizeof(*ih)); printf("failed to allocate interrupt slot for PIC %s pin %d\n", @@ -739,6 +945,7 @@ intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level, if (source->is_handlers != NULL && source->is_pic->pic_type != pic->pic_type) { + intr_free_io_intrsource_direct(chained); mutex_exit(&cpu_lock); kmem_free(ih, sizeof(*ih)); printf("%s: can't share intr source between " @@ -761,9 +968,10 @@ intr_establish(int legacy_irq, struct pic *pic, int pin, int type, int level, /* FALLTHROUGH */ case IST_PULSE: if (type != IST_NONE) { + intr_source_free(ci, slot, pic, idt_vec); + intr_free_io_intrsource_direct(chained); mutex_exit(&cpu_lock); kmem_free(ih, sizeof(*ih)); - intr_source_free(ci, slot, pic, idt_vec); printf("%s: pic %s pin %d: can't share " "type %d with %d\n", __func__, pic->pic_name, pin, @@ -902,6 +1110,19 @@ intr_disestablish_xcall(void *arg1, void *arg2) #endif } +static int +intr_num_handlers(struct intrsource *isp) +{ + struct intrhand *ih; + int num; + + num = 0; + for (ih = isp->is_handlers; ih != NULL; ih = ih->ih_next) + num++; + + return num; +} + /* * Deregister an interrupt handler. */ @@ -909,6 +1130,7 @@ void intr_disestablish(struct intrhand *ih) { struct cpu_info *ci; + struct intrsource *isp; uint64_t where; /* @@ -920,26 +1142,51 @@ intr_disestablish(struct intrhand *ih) ci = ih->ih_cpu; (ci->ci_nintrhand)--; KASSERT(ci->ci_nintrhand >= 0); + isp = ci->ci_isources[ih->ih_slot]; if (ci == curcpu() || !mp_online) { intr_disestablish_xcall(ih, NULL); } else { where = xc_unicast(0, intr_disestablish_xcall, ih, NULL, ci); xc_wait(where); } +#ifndef PCI_MSI_MSIX + if (intr_num_handlers(isp) < 1) +#else + if (!msipic_is_msi_pic(isp->is_pic) && intr_num_handlers(isp) < 1) +#endif + { + intr_free_io_intrsource_direct(isp); + } mutex_exit(&cpu_lock); + kmem_free(ih, sizeof(*ih)); } +static const char * +legacy_intr_string(int ih, char *buf, size_t len, struct pic *pic) +{ + int legacy_irq; + + KASSERT(pic->pic_type == PIC_I8259); + KASSERT(APIC_IRQ_ISLEGACY(ih)); + + legacy_irq = APIC_IRQ_LEGACY_IRQ(ih); + KASSERT(legacy_irq >= 0 && legacy_irq < 16); + + snprintf(buf, len, "%s pin %d", pic->pic_name, legacy_irq); + + return buf; +} + const char * -intr_string(int ih, char *buf, size_t len) +intr_string(intr_handle_t ih, char *buf, size_t len) { #if NIOAPIC > 0 struct ioapic_softc *pic; #endif if (ih == 0) - panic("%s: bogus handle 0x%x", __func__, ih); - + panic("%s: bogus handle 0x%" PRIx64, __func__, ih); #if NIOAPIC > 0 if (ih & APIC_INT_VIA_APIC) { @@ -952,13 +1199,13 @@ intr_string(int ih, char *buf, size_t len) "apic %d int %d (irq %d)", APIC_IRQ_APIC(ih), APIC_IRQ_PIN(ih), - ih&0xff); + APIC_IRQ_LEGACY_IRQ(ih)); } } else - snprintf(buf, len, "irq %d", ih&0xff); + snprintf(buf, len, "irq %d", APIC_IRQ_LEGACY_IRQ(ih)); #else - snprintf(buf, len, "irq %d", ih&0xff); + snprintf(buf, len, "irq %d" APIC_IRQ_LEGACY_IRQ(ih)); #endif return buf; @@ -1179,6 +1426,46 @@ softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep) intr_calculatemasks(ci); } +/* + * Save current affinitied cpu's interrupt count. + */ +static void +intr_save_evcnt(struct intrsource *source, cpuid_t cpuid) +{ + struct percpu_evcnt *pep; + uint64_t curcnt; + int i; + + curcnt = source->is_evcnt.ev_count; + pep = source->is_saved_evcnt; + + for (i = 0; i < ncpu; i++) { + if (pep[i].cpuid == cpuid) { + pep[i].count = curcnt; + break; + } + } +} + +/* + * Restore current affinitied cpu's interrupt count. + */ +static void +intr_restore_evcnt(struct intrsource *source, cpuid_t cpuid) +{ + struct percpu_evcnt *pep; + int i; + + pep = source->is_saved_evcnt; + + for (i = 0; i < ncpu; i++) { + if (pep[i].cpuid == cpuid) { + source->is_evcnt.ev_count = pep[i].count; + break; + } + } +} + static void intr_redistribute_xc_t(void *arg1, void *arg2) { @@ -1352,6 +1639,9 @@ intr_redistribute(struct cpu_info *oci) nci->ci_nintrhand++; ih->ih_cpu = nci; } + intr_save_evcnt(isp, oci->ci_cpuid); + intr_restore_evcnt(isp, nci->ci_cpuid); + isp->is_active_cpu = nci->ci_cpuid; return true; } @@ -1386,3 +1676,259 @@ cpu_intr_count(struct cpu_info *ci) return ci->ci_nintrhand; } + +static int +intr_find_unused_slot(struct cpu_info *ci, int *index) +{ + int slot, i; + + KASSERT(mutex_owned(&cpu_lock)); + + slot = -1; + for (i = 0; i < MAX_INTR_SOURCES ; i++) { + if (ci->ci_isources[i] == NULL) { + slot = i; + break; + } + } + if (slot == -1) { + DPRINTF(("cannot allocate ci_isources\n")); + return EBUSY; + } + + *index = slot; + return 0; +} + +/* + * Let cpu_info ready to accept the interrupt. + */ +static void +intr_activate_xcall(void *arg1, void *arg2) +{ + struct cpu_info *ci; + struct intrsource *source; + struct intrstub *stubp; + struct intrhand *ih; + u_long psl; + int idt_vec; + int slot; + + ih = arg1; + + kpreempt_disable(); + + KASSERT(ih->ih_cpu == curcpu() || !mp_online); + + ci = ih->ih_cpu; + slot = ih->ih_slot; + source = ci->ci_isources[slot]; + idt_vec = source->is_idtvec; + + psl = x86_read_psl(); + x86_disable_intr(); + + intr_calculatemasks(ci); + + if (source->is_type == IST_LEVEL) { + stubp = &source->is_pic->pic_level_stubs[slot]; + } else { + stubp = &source->is_pic->pic_edge_stubs[slot]; + } + source->is_resume = stubp->ist_resume; + source->is_recurse = stubp->ist_recurse; + setgate(&idt[idt_vec], stubp->ist_entry, 0, SDT_SYS386IGT, + SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + x86_write_psl(psl); + + kpreempt_enable(); +} + +/* + * Let cpu_info not accept the interrupt. + */ +static void +intr_deactivate_xcall(void *arg1, void *arg2) +{ + struct cpu_info *ci; + struct intrhand *ih, *lih; + u_long psl; + int slot; + + ih = arg1; + + kpreempt_disable(); + + KASSERT(ih->ih_cpu == curcpu() || !mp_online); + + ci = ih->ih_cpu; + slot = ih->ih_slot; + + psl = x86_read_psl(); + x86_disable_intr(); + + /* Move all devices sharing IRQ number. */ + ci->ci_isources[slot] = NULL; + for (lih = ih; lih != NULL; lih = lih->ih_next) { + ci->ci_nintrhand--; + } + + intr_calculatemasks(ci); + + /* + * Skip unsetgate(), because the same itd[] entry is overwritten in + * intr_activate_xcall(). + */ + + x86_write_psl(psl); + + kpreempt_enable(); +} + +static void +intr_get_affinity(void *ich, kcpuset_t *cpuset) +{ + struct cpu_info *ci; + struct intrsource *isp; + + if (ich == NULL) { + kcpuset_zero(cpuset); + return; + } + + isp = ich; + ci = isp->is_handlers->ih_cpu; + if (ci == NULL) { + kcpuset_zero(cpuset); + return; + } + + kcpuset_set(cpuset, cpu_index(ci)); + return; +} + +static int +intr_set_affinity(void *ich, const kcpuset_t *cpuset) +{ + struct cpu_info *oldci, *newci; + struct intrsource *isp; + struct intrhand *ih, *lih; + struct pic *pic; + u_int cpu_idx; + int idt_vec; + int oldslot, newslot; + int err; + int pin; + + KASSERT(mutex_owned(&cpu_lock)); + + /* XXX + * logical destination mode is not supported, use lowest index cpu. + */ + cpu_idx = kcpuset_ffs(cpuset) - 1; + newci = cpu_lookup(cpu_idx); + if (newci == NULL) { + DPRINTF(("invalid cpu index: %u\n", cpu_idx)); + return EINVAL; + } + if ((newci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) { + DPRINTF(("the cpu is set nointr shield. index:%u\n", cpu_idx)); + return EINVAL; + } + + isp = ich; + if (isp == NULL) { + DPRINTF(("invalid intrctl handler\n")); + return EINVAL; + } + + /* i8259_pic supports only primary cpu, see i8259.c. */ + pic = isp->is_pic; + if (pic == &i8259_pic) { + DPRINTF(("i8259 pic does not support set_affinity\n")); + return ENOTSUP; + } + + ih = isp->is_handlers; + oldci = ih->ih_cpu; + if (newci == oldci) /* nothing to do */ + return 0; + + oldslot = ih->ih_slot; + idt_vec = isp->is_idtvec; + + err = intr_find_unused_slot(newci, &newslot); + if (err) { + DPRINTF(("failed to allocate interrupt slot for PIC %s intrid %s\n", + isp->is_pic->pic_name, isp->is_intrid)); + return err; + } + + pin = isp->is_pin; + (*pic->pic_hwmask)(pic, pin); /* for ci_ipending check */ + if (oldci->ci_ipending & (1 << oldslot)) { + (*pic->pic_hwunmask)(pic, pin); + DPRINTF(("pin %d on cpuid %ld has pending interrupts.\n", + pin, oldci->ci_cpuid)); + return EBUSY; + } + + kpreempt_disable(); + + /* deactivate old interrupt setting */ + if (oldci == curcpu() || !mp_online) { + intr_deactivate_xcall(ih, NULL); + } else { + uint64_t where; + where = xc_unicast(0, intr_deactivate_xcall, ih, + NULL, oldci); + xc_wait(where); + } + intr_save_evcnt(isp, oldci->ci_cpuid); + (*pic->pic_delroute)(pic, oldci, pin, idt_vec, isp->is_type); + + /* activate new interrupt setting */ + newci->ci_isources[newslot] = isp; + for (lih = ih; lih != NULL; lih = lih->ih_next) { + newci->ci_nintrhand++; + lih->ih_cpu = newci; + lih->ih_slot = newslot; + } + if (newci == curcpu() || !mp_online) { + intr_activate_xcall(ih, NULL); + } else { + uint64_t where; + where = xc_unicast(0, intr_activate_xcall, ih, + NULL, newci); + xc_wait(where); + } + intr_restore_evcnt(isp, newci->ci_cpuid); + isp->is_active_cpu = newci->ci_cpuid; + (*pic->pic_addroute)(pic, newci, pin, idt_vec, isp->is_type); + + kpreempt_enable(); + + (*pic->pic_hwunmask)(pic, pin); + + return err; +} + +int +intr_distribute(void *ich, const kcpuset_t *newset, kcpuset_t *oldset) +{ + struct intrsource *isp; + struct intrhand *ih; + int slot; + + KASSERT(mutex_owned(&cpu_lock)); + + ih = ich; + slot = ih->ih_slot; + isp = ih->ih_cpu->ci_isources[slot]; + + if (oldset != NULL) + intr_get_affinity(isp, oldset); + + return intr_set_affinity(isp, newset); +} diff --git a/sys/arch/xen/include/intr.h b/sys/arch/xen/include/intr.h index eb283cb0..7fee6ae 100644 --- a/sys/arch/xen/include/intr.h +++ b/sys/arch/xen/include/intr.h @@ -154,6 +154,8 @@ splraiseipl(ipl_cookie_t icookie) struct pcibus_attach_args; +typedef int intr_handle_t; + #ifdef MULTIPROCESSOR int intr_biglock_wrapper(void *); #endif @@ -163,7 +165,7 @@ int x86_nmi(void); void *intr_establish(int, struct pic *, int, int, int, int (*)(void *), void *, bool); void intr_disestablish(struct intrhand *); -const char *intr_string(int); +const char *intr_string(intr_handle_t); void cpu_intr_init(struct cpu_info *); int xen_intr_map(int *, int); #ifdef INTRDEBUG diff --git a/sys/arch/xen/x86/mainbus.c b/sys/arch/xen/x86/mainbus.c index 26a7ca8..c683d3d 100644 --- a/sys/arch/xen/x86/mainbus.c +++ b/sys/arch/xen/x86/mainbus.c @@ -46,6 +46,7 @@ __KERNEL_RCSID(0, "$NetBSD: mainbus.c,v 1.18 2014/03/03 22:09:32 dsl Exp $"); #include "opt_xen.h" #include "opt_mpbios.h" #include "opt_pcifixup.h" +#include "opt_pci_msi_msix.h" #include "acpica.h" #include "ioapic.h" @@ -76,6 +77,9 @@ __KERNEL_RCSID(0, "$NetBSD: mainbus.c,v 1.18 2014/03/03 22:09:32 dsl Exp $"); #ifdef PCI_ADDR_FIXUP #include #endif +#ifdef PCI_MSI_MSIX +#include +#endif #endif #if defined(MPBIOS) || NACPICA > 0 @@ -153,6 +157,10 @@ mainbus_attach(device_t parent, device_t self, void *aux) mpbios_present = mpbios_probe(self); #endif #if NPCI > 0 +#ifdef PCI_MSI_MSIX + msipic_init(); +#endif + /* ACPI needs to be able to access PCI configuration space. */ pci_mode_detect(); #ifdef PCI_BUS_FIXUP diff --git a/sys/dev/pci/files.pci b/sys/dev/pci/files.pci index da1f97b..0333d3b 100644 --- a/sys/dev/pci/files.pci +++ b/sys/dev/pci/files.pci @@ -6,7 +6,6 @@ defflag opt_pci.h PCIVERBOSE PCI_CONFIG_DUMP PCI_NETBSD_CONFIGURE defparam opt_pci.h PCI_NETBSD_ENABLE_IDE - defflag opt_bktr.h BKTR_430_FX_MODE BKTR_GPIO_ACCESS BKTR_NO_MSP_RESET BKTR_REVERSE_MUTE BKTR_SIS_VIA_MODE BKTR_USE_PLL defparam opt_bktr.h BKTR_OVERRIDE_CARD BKTR_OVERRIDE_TUNER BKTR_OVERRIDE_DBX @@ -14,6 +13,7 @@ defparam opt_bktr.h BKTR_OVERRIDE_CARD BKTR_OVERRIDE_TUNER BKTR_OVERRIDE_DBX defflag opt_pciide.h PCIIDE_CMD064x_DISABLE PCIIDE_AMD756_ENABLEDMA PCIIDE_CMD0646U_ENABLEUDMA PCIIDE_I31244_DISABLEDMA +defflag opt_pci_msi_msix.h PCI_MSI_MSIX device pci {[dev = -1], [function = -1]} attach pci at pcibus diff --git a/sys/dev/pci/if_wm.c b/sys/dev/pci/if_wm.c index 8373810..8c3e097 100644 --- a/sys/dev/pci/if_wm.c +++ b/sys/dev/pci/if_wm.c @@ -85,6 +85,7 @@ __KERNEL_RCSID(0, "$NetBSD: if_wm.c,v 1.314 2015/03/28 17:35:59 msaitoh Exp $"); #ifdef _KERNEL_OPT #include "opt_net_mpsafe.h" +#include "opt_pci_msi_msix.h" #endif #include @@ -99,6 +100,7 @@ __KERNEL_RCSID(0, "$NetBSD: if_wm.c,v 1.314 2015/03/28 17:35:59 msaitoh Exp $"); #include #include #include +#include /* for cpu_lock */ #include @@ -154,6 +156,26 @@ int wm_debug = WM_DEBUG_TX | WM_DEBUG_RX | WM_DEBUG_LINK | WM_DEBUG_GMII #define WM_MPSAFE 1 #endif +#ifdef PCI_MSI_MSIX +/* + * This device driver divides interrupt to TX, RX and link state. + * Each MSI-X vector indexes are below. + */ +#define WM_NINTR 3 +#define WM_TX_INTR_INDEX 0 +#define WM_RX_INTR_INDEX 1 +#define WM_LINK_INTR_INDEX 2 + +/* + * This device driver set affinity to each interrupts like below (round-robin). + * If the number CPUs is less than the number of interrupts, this driver usase + * the same CPU for multiple interrupts. + */ +#define WM_TX_INTR_CPUID 0 +#define WM_RX_INTR_CPUID 1 +#define WM_LINK_INTR_CPUID 2 +#endif + /* * Transmit descriptor list size. Due to errata, we can only have * 256 hardware descriptors in the ring on < 82544, but we use 4096 @@ -291,7 +313,9 @@ struct wm_softc { int sc_flowflags; /* 802.3x flow control flags */ int sc_align_tweak; +#ifndef PCI_MSI_MSIX void *sc_ih; /* interrupt cookie */ +#endif callout_t sc_tick_ch; /* tick callout */ bool sc_stopping; @@ -397,6 +421,16 @@ struct wm_softc { kmutex_t *sc_tx_lock; /* lock for tx operations */ kmutex_t *sc_rx_lock; /* lock for rx operations */ + +#ifdef PCI_MSI_MSIX + int sc_intr_type; +#define INTR_TYPE_INTX 0 +#define INTR_TYPE_MSI 1 +#define INTR_TYPE_MSIX 2 + + pci_intr_handle_t *sc_intrs; /* legacy and msi use sc_intrs[0] */ + void *sc_ihs[WM_NINTR]; /* interrupt cookie */ +#endif }; #define WM_TX_LOCK(_sc) if ((_sc)->sc_tx_lock) mutex_enter((_sc)->sc_tx_lock) @@ -592,6 +626,11 @@ static void wm_linkintr_gmii(struct wm_softc *, uint32_t); static void wm_linkintr_tbi(struct wm_softc *, uint32_t); static void wm_linkintr(struct wm_softc *, uint32_t); static int wm_intr(void *); +#ifdef PCI_MSI_MSIX +static int wm_tx_intr(void *); +static int wm_rx_intr(void *); +static int wm_link_intr(void *); +#endif /* * Media related. @@ -1345,7 +1384,9 @@ wm_attach(device_t parent, device_t self, void *aux) prop_dictionary_t dict; struct ifnet *ifp = &sc->sc_ethercom.ec_if; pci_chipset_tag_t pc = pa->pa_pc; +#ifndef PCI_MSI_MSIX pci_intr_handle_t ih; +#endif const char *intrstr = NULL; const char *eetype, *xname; bus_space_tag_t memt; @@ -1494,6 +1535,7 @@ wm_attach(device_t parent, device_t self, void *aux) return; } +#ifndef PCI_MSI_MSIX /* * Map and establish our interrupt. */ @@ -1514,6 +1556,155 @@ wm_attach(device_t parent, device_t self, void *aux) return; } aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr); +#else /* PCI_MSI_MSIX */ + if (pci_msix_alloc_exact(pa, &sc->sc_intrs, WM_NINTR) == 0) { + /* 1st, try to use MSI-X */ + void *vih; + kcpuset_t *affinity; + + kcpuset_create(&affinity, false); + + /* + * for TX + */ + intrstr = pci_intr_string(pc, sc->sc_intrs[WM_TX_INTR_INDEX], + intrbuf, sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[WM_TX_INTR_INDEX], PCI_INTR_MPSAFE, + true); +#endif + vih = pci_msix_establish(pc, sc->sc_intrs[WM_TX_INTR_INDEX], + IPL_NET, wm_tx_intr, sc); + if (vih == NULL) { + aprint_error_dev(sc->sc_dev, + "unable to establish MSI-X(for TX)%s%s\n", + intrstr ? " at " : "", intrstr ? intrstr : ""); + error = EBUSY; + return; + } + kcpuset_zero(affinity); + /* Round-robin affinity */ + kcpuset_set(affinity, WM_TX_INTR_CPUID % ncpu); + mutex_enter(&cpu_lock); + error = intr_distribute(vih, affinity, NULL); + mutex_exit(&cpu_lock); + if (error == 0) { + aprint_normal_dev(sc->sc_dev, + "for TX interrupting at %s affinity to %u\n", + intrstr, WM_TX_INTR_CPUID % ncpu); + } else { + aprint_normal_dev(sc->sc_dev, + "for TX interrupting at %s\n", + intrstr); + } + sc->sc_ihs[WM_TX_INTR_INDEX] = vih; + + /* + * for RX + */ + intrstr = pci_intr_string(pc, sc->sc_intrs[WM_RX_INTR_INDEX], + intrbuf, sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[WM_RX_INTR_INDEX], + PCI_INTR_MPSAFE, true); +#endif + vih = pci_msix_establish(pc, sc->sc_intrs[WM_RX_INTR_INDEX], + IPL_NET, wm_rx_intr, sc); + if (vih == NULL) { + aprint_error_dev(sc->sc_dev, + "unable to establish MSI-X(for RX)%s%s\n", + intrstr ? " at " : "", intrstr ? intrstr : ""); + error = EBUSY; + return; + } + kcpuset_zero(affinity); + kcpuset_set(affinity, WM_RX_INTR_CPUID % ncpu); + mutex_enter(&cpu_lock); + error = intr_distribute(vih, affinity, NULL); + mutex_exit(&cpu_lock); + if (error == 0) { + aprint_normal_dev(sc->sc_dev, + "for RX interrupting at %s affinity to %u\n", + intrstr, WM_RX_INTR_CPUID % ncpu); + } else { + aprint_normal_dev(sc->sc_dev, + "for RX interrupting at %s\n", + intrstr); + } + sc->sc_ihs[WM_RX_INTR_INDEX] = vih; + + /* + * for link state changing + */ + intrstr = pci_intr_string(pc, sc->sc_intrs[WM_LINK_INTR_INDEX], + intrbuf, sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[WM_LINK_INTR_INDEX], + PCI_INTR_MPSAFE, true); +#endif + vih = pci_msix_establish(pc, sc->sc_intrs[WM_LINK_INTR_INDEX], + IPL_NET, wm_link_intr, sc); + if (vih == NULL) { + aprint_error_dev(sc->sc_dev, + "unable to establish MSI-X(for LINK)%s%s\n", + intrstr ? " at " : "", intrstr ? intrstr : ""); + error = EBUSY; + return; + } + kcpuset_zero(affinity); + kcpuset_set(affinity, WM_LINK_INTR_CPUID % ncpu); + mutex_enter(&cpu_lock); + error = intr_distribute(vih, affinity, NULL); + mutex_exit(&cpu_lock); + if (error == 0) { + aprint_normal_dev(sc->sc_dev, + "for LINK interrupting at %s affinity to %u\n", + intrstr, WM_LINK_INTR_CPUID % ncpu); + } else { + aprint_normal_dev(sc->sc_dev, + "for LINK interrupting at %s\n", + intrstr); + } + sc->sc_ihs[WM_LINK_INTR_INDEX] = vih; + + sc->sc_intr_type = INTR_TYPE_MSIX; + kcpuset_destroy(affinity); + } else if (pci_msi_alloc_exact(pa, &sc->sc_intrs, 1) == 0) { + /* 2nd, try to use MSI */ + intrstr = pci_intr_string(pc, sc->sc_intrs[0], intrbuf, + sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[0], PCI_INTR_MPSAFE, true); +#endif + sc->sc_ihs[0] = pci_msix_establish(pc, sc->sc_intrs[0], + IPL_NET, wm_intr, sc); + if (sc->sc_ihs[0] == NULL) { + aprint_error_dev(sc->sc_dev, "unable to establish MSI\n"); + error = EBUSY; + return; + } + aprint_normal_dev(sc->sc_dev, "MSI at %s\n", intrstr); + + sc->sc_intr_type = INTR_TYPE_MSI; + } else if (pci_intx_alloc(pa, &sc->sc_intrs)) { + /* Last, try to use INTx */ + intrstr = pci_intr_string(pc, sc->sc_intrs[0], intrbuf, + sizeof(intrbuf)); +#ifdef WM_MPSAFE + pci_intr_setattr(pc, &sc->sc_intrs[0], PCI_INTR_MPSAFE, true); +#endif + sc->sc_ihs[0] = pci_intr_establish(pc, sc->sc_intrs[0], + IPL_NET, wm_intr, sc); + if (sc->sc_ihs[0] == NULL) { + aprint_error_dev(sc->sc_dev, "unable to establish MSI\n"); + error = EBUSY; + return; + } + aprint_normal_dev(sc->sc_dev, "interrupting at %s\n", intrstr); + + sc->sc_intr_type = INTR_TYPE_INTX; + } +#endif /* PCI_MSI_MSIX */ /* * Check the function ID (unit number of the chip). @@ -2492,11 +2683,25 @@ wm_detach(device_t self, int flags __unused) sc->sc_cd_size); bus_dmamem_free(sc->sc_dmat, &sc->sc_cd_seg, sc->sc_cd_rseg); +#ifndef PCI_MSI_MSIX /* Disestablish the interrupt handler */ if (sc->sc_ih != NULL) { pci_intr_disestablish(sc->sc_pc, sc->sc_ih); sc->sc_ih = NULL; } +#else /* PCI_MSI_MSIX */ + if (sc->sc_intr_type == INTR_TYPE_MSIX) { + for (i = 0; i < WM_NINTR; i++) + pci_msix_disestablish(sc->sc_pc, sc->sc_ihs[i]); + pci_msix_release(sc->sc_pc, &sc->sc_intrs, WM_NINTR); + } else if (sc->sc_intr_type == INTR_TYPE_MSI) { + pci_msi_disestablish(sc->sc_pc, sc->sc_ihs[0]); + pci_msi_release(sc->sc_pc, &sc->sc_intrs, 1); + } else if (sc->sc_intr_type == INTR_TYPE_INTX) { + pci_intr_disestablish(sc->sc_pc, sc->sc_ihs[0]); + pci_intx_release(sc->sc_pc, &sc->sc_intrs[0]); + } +#endif /* PCI_MSI_MSIX */ /* Unmap the registers */ if (sc->sc_ss) { @@ -3523,6 +3728,12 @@ wm_reset(struct wm_softc *sc) /* Clear interrupt */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); +#ifdef PCI_MSI_MSIX + if (sc->sc_intr_type == INTR_TYPE_MSIX) { + CSR_WRITE(sc, WMREG_EIMC, 0xffffffffU); + CSR_WRITE(sc, WMREG_EIAC, 0); + } +#endif /* Stop the transmit and receive processes. */ CSR_WRITE(sc, WMREG_RCTL, 0); @@ -3767,6 +3978,12 @@ wm_reset(struct wm_softc *sc) /* Clear any pending interrupt events. */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); reg = CSR_READ(sc, WMREG_ICR); +#ifdef PCI_MSI_MSIX + if (sc->sc_intr_type == INTR_TYPE_MSIX) { + CSR_WRITE(sc, WMREG_EIMC, 0xffffffffU); + CSR_WRITE(sc, WMREG_EIAC, 0); + } +#endif /* reload sc_ctrl */ sc->sc_ctrl = CSR_READ(sc, WMREG_CTRL); @@ -4164,11 +4381,50 @@ wm_init_locked(struct ifnet *ifp) reg |= RXCSUM_IPV6OFL | RXCSUM_TUOFL; CSR_WRITE(sc, WMREG_RXCSUM, reg); +#ifdef PCI_MSI_MSIX + /* Set up MSI-X */ + if (sc->sc_intr_type == INTR_TYPE_MSIX) { + uint32_t ivar; + + CSR_WRITE(sc, WMREG_GPIE, WMREG_GPIE_NSICR | WMREG_GPIE_MSIX_MODE | + WMREG_GPIE_EIAME | WMREG_GPIE_PBA); + + /* TX */ + ivar = CSR_READ(sc, WMREG_IVAR0); + ivar &= 0xFFFF00FF; + ivar |= (WM_TX_INTR_INDEX | WMREG_IVAR_VALID) << 8; + CSR_WRITE(sc, WMREG_IVAR0, ivar); + + /* RX */ + ivar = CSR_READ(sc, WMREG_IVAR0); + ivar &= 0xFFFFFF00; + ivar |= WM_RX_INTR_INDEX | WMREG_IVAR_VALID; + CSR_WRITE(sc, WMREG_IVAR0, ivar); + + /* LINK */ + ivar = (WM_LINK_INTR_INDEX | WMREG_IVAR_VALID) << 8; + CSR_WRITE(sc, WMREG_IVAR_MISC, ivar); + } +#endif /* PCI_MSI_MSIX */ + /* Set up the interrupt registers. */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); sc->sc_icr = ICR_TXDW | ICR_LSC | ICR_RXSEQ | ICR_RXDMT0 | ICR_RXO | ICR_RXT0; +#ifndef PCI_MSI_MSIX CSR_WRITE(sc, WMREG_IMS, sc->sc_icr); +#else /* PCI_MSI_MSIX */ + if (sc->sc_intr_type == INTR_TYPE_MSIX) { + uint32_t mask = (1 << WM_RX_INTR_INDEX) | (1 << WM_TX_INTR_INDEX) | + (1 << WM_LINK_INTR_INDEX); + CSR_WRITE(sc, WMREG_EIAC, mask); + CSR_WRITE(sc, WMREG_EIAM, mask); + CSR_WRITE(sc, WMREG_EIMS, mask); + CSR_WRITE(sc, WMREG_IMS, ICR_LSC); + } else { + CSR_WRITE(sc, WMREG_IMS, sc->sc_icr); + } +#endif /* PCI_MSI_MSIX */ if ((sc->sc_type == WM_T_ICH8) || (sc->sc_type == WM_T_ICH9) || (sc->sc_type == WM_T_ICH10) || (sc->sc_type == WM_T_PCH) @@ -4389,6 +4645,12 @@ wm_stop_locked(struct ifnet *ifp, int disable) */ CSR_WRITE(sc, WMREG_IMC, 0xffffffffU); sc->sc_icr = 0; +#ifdef PCI_MSI_MSIX + if (sc->sc_intr_type > INTR_TYPE_MSIX) { + CSR_WRITE(sc, WMREG_EIMC, 0xffffffffU); + CSR_WRITE(sc, WMREG_EIAC, 0); + } +#endif /* Release any queued transmit buffers. */ for (i = 0; i < WM_TXQUEUELEN(sc); i++) { @@ -6061,6 +6323,145 @@ wm_intr(void *arg) return handled; } +#ifdef PCI_MSI_MSIX +/* + * wm_tx_intr: + * + * Interrupt service routine. + */ +static int +wm_tx_intr(void *arg) +{ + struct wm_softc *sc = arg; + struct ifnet *ifp = &sc->sc_ethercom.ec_if; + uint32_t icr; + int handled = 0; + + CSR_WRITE(sc, WMREG_EIMC, 1 << WM_TX_INTR_INDEX); + + WM_TX_LOCK(sc); + + if (sc->sc_stopping) + goto out; + + while (1 /* CONSTCOND */) { + icr = CSR_READ(sc, WMREG_ICR); /* XXXX no need? */ + if ((icr & sc->sc_icr) == 0) + break; + rnd_add_uint32(&sc->rnd_source, icr); + + handled = 1; + + +#if defined(WM_DEBUG) || defined(WM_EVENT_COUNTERS) + if (icr & ICR_TXDW) { + DPRINTF(WM_DEBUG_TX, + ("%s: TX: got TXDW interrupt\n", + device_xname(sc->sc_dev))); + WM_EVCNT_INCR(&sc->sc_ev_txdw); + } +#endif + + wm_txintr(sc); + } +out: + WM_TX_UNLOCK(sc); + + CSR_WRITE(sc, WMREG_EIMS, 1 << WM_TX_INTR_INDEX); + + if (handled) { + /* Try to get more packets going. */ + ifp->if_start(ifp); + } + + return handled; +} + +/* + * wm_rx_intr: + * + * Interrupt service routine. + */ +static int +wm_rx_intr(void *arg) +{ + struct wm_softc *sc = arg; + uint32_t icr; + + CSR_WRITE(sc, WMREG_EIMC, 1 << WM_RX_INTR_INDEX); + WM_RX_LOCK(sc); + + if (sc->sc_stopping) + goto out; + + while (1 /* CONSTCOND */) { + icr = CSR_READ(sc, WMREG_ICR); + if ((icr & sc->sc_icr) == 0) + break; + rnd_add_uint32(&sc->rnd_source, icr); + +#if defined(WM_DEBUG) || defined(WM_EVENT_COUNTERS) + if (icr & (ICR_RXDMT0|ICR_RXT0)) { + DPRINTF(WM_DEBUG_RX, + ("%s: RX: got Rx intr 0x%08x\n", + device_xname(sc->sc_dev), + icr & (ICR_RXDMT0|ICR_RXT0))); + WM_EVCNT_INCR(&sc->sc_ev_rxintr); + } +#endif + wm_rxintr(sc); + + if (icr & ICR_RXO) { +#if defined(WM_DEBUG) + log(LOG_WARNING, "%s: Receive overrun\n", + device_xname(sc->sc_dev)); +#endif /* defined(WM_DEBUG) */ + } + } +out: + WM_RX_UNLOCK(sc); + + CSR_WRITE(sc, WMREG_EIMS, 1 << WM_RX_INTR_INDEX); + + return 1; +} + +/* + * wm_link_intr: + * + * Interrupt service routine. + */ +static int +wm_link_intr(void *arg) +{ + struct wm_softc *sc = arg; + uint32_t icr; + + CSR_WRITE(sc, WMREG_EIMC, 1 << WM_LINK_INTR_INDEX); + WM_TX_LOCK(sc); + if (sc->sc_stopping) + goto out; + + while (1 /* CONSTCOND */) { + icr = CSR_READ(sc, WMREG_ICR); + if ((icr & sc->sc_icr) == 0) + break; + rnd_add_uint32(&sc->rnd_source, icr); + + if (icr & (ICR_LSC|ICR_RXSEQ)) { + WM_EVCNT_INCR(&sc->sc_ev_linkintr); + wm_linkintr(sc, icr); + } + } + +out: + WM_TX_UNLOCK(sc); + CSR_WRITE(sc, WMREG_EIMS, 1 << WM_LINK_INTR_INDEX); + + return 1; +} +#endif /* PCI_MSI_MSIX */ + /* * Media related. * GMII, SGMII, TBI (and SERDES) diff --git a/sys/dev/pci/if_wmreg.h b/sys/dev/pci/if_wmreg.h index fa44a14..cfe4ae2 100644 --- a/sys/dev/pci/if_wmreg.h +++ b/sys/dev/pci/if_wmreg.h @@ -438,6 +438,18 @@ struct livengood_tcpip_ctxdesc { #define WMREG_ICS 0x00c8 /* Interrupt Cause Set Register */ /* See ICR bits. */ +#define WMREG_IVAL 0x00e4 /* Interrupt Vector Allocation Register */ +#define WMREG_IVAR0 0x01700 /* Interrupt Vector Allocation */ +#define WMREG_IVAR_MISC 0x01740 /* IVAR for other causes */ + +#define WMREG_GPIE 0x01514 /* General Purpose Interrupt Enable */ +#define WMREG_GPIE_NSICR 0x00000001 +#define WMREG_GPIE_MSIX_MODE 0x00000010 +#define WMREG_GPIE_EIAME 0x40000000 +#define WMREG_GPIE_PBA 0x80000000 + +#define WMREG_IVAR_VALID 0x80 + #define WMREG_IMS 0x00d0 /* Interrupt Mask Set Register */ /* See ICR bits. */ diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index b8e2ece..af50dde 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -275,6 +275,10 @@ pci_probe_device(struct pci_softc *sc, pcitag_t tag, pci_chipset_tag_t pc = sc->sc_pc; struct pci_attach_args pa; pcireg_t id, /* csr, */ pciclass, intr, bhlcr, bar, endbar; +#ifdef PCI_MSI_MSIX /* defined in */ + pcireg_t cap; + int off; +#endif int ret, pin, bus, device, function, i, width; int locs[PCICF_NLOCS]; @@ -406,6 +410,34 @@ pci_probe_device(struct pci_softc *sc, pcitag_t tag, } pa.pa_intrline = PCI_INTERRUPT_LINE(intr); +#ifdef PCI_MSI_MSIX + if (pci_get_ht_capability(pc, tag, PCI_HT_CAP_MSIMAP, &off, &cap)) { + /* + * XXX Should we enable MSI mapping ourselves on + * systems that have it disabled? + */ + if (cap & PCI_HT_MSI_ENABLED) { + uint64_t addr; + if ((cap & PCI_HT_MSI_FIXED) == 0) { + addr = pci_conf_read(pc, tag, + off + PCI_HT_MSI_ADDR_LO); + addr |= (uint64_t)pci_conf_read(pc, tag, + off + PCI_HT_MSI_ADDR_HI) << 32; + } else + addr = PCI_HT_MSI_FIXED_ADDR; + + /* + * XXX This will fail to enable MSI on systems + * that don't use the canonical address. + */ + if (addr == PCI_HT_MSI_FIXED_ADDR) { + pa.pa_flags |= PCI_FLAGS_MSI_OKAY; + pa.pa_flags |= PCI_FLAGS_MSIX_OKAY; + } + } + } +#endif + if (match != NULL) { ret = (*match)(&pa); if (ret != 0 && pap != NULL) @@ -508,6 +540,35 @@ pci_get_capability(pci_chipset_tag_t pc, pcitag_t tag, int capid, } int +pci_get_ht_capability(pci_chipset_tag_t pc, pcitag_t tag, int capid, + int *offset, pcireg_t *value) +{ + pcireg_t reg; + unsigned int ofs; + + if (pci_get_capability(pc, tag, PCI_CAP_LDT, &ofs, NULL) == 0) + return 0; + + while (ofs != 0) { +#ifdef DIAGNOSTIC + if ((ofs & 3) || (ofs < 0x40)) + panic("pci_get_ht_capability"); +#endif + reg = pci_conf_read(pc, tag, ofs); + if (PCI_HT_CAP(reg) == capid) { + if (offset) + *offset = ofs; + if (value) + *value = reg; + return 1; + } + ofs = PCI_CAPLIST_NEXT(reg); + } + + return 0; +} + +int pci_find_device(struct pci_attach_args *pa, int (*match)(const struct pci_attach_args *)) { diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h index 621598a..7ee5df7 100644 --- a/sys/dev/pci/pcireg.h +++ b/sys/dev/pci/pcireg.h @@ -653,6 +653,9 @@ typedef u_int8_t pci_revision_t; * MSI Pending Bits (32 bit field) */ + /* Max number of MSI vectors. See PCI-SIG specification. */ +#define PCI_MSI_MAX_VECTORS 32 + /* * Capability ID: 0x07 * PCI-X capability. @@ -1069,6 +1072,9 @@ struct pci_msix_table_entry { }; #define PCI_MSIX_VECTCTL_HWMASK_MASK 0x00000001 + /* Max number of MSI-X vectors. See PCI-SIG specification. */ +#define PCI_MSIX_MAX_VECTORS 2048 + /* * Capability ID: 0x12 * SATA diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h index 3b58a67..a175b78 100644 --- a/sys/dev/pci/pcivar.h +++ b/sys/dev/pci/pcivar.h @@ -279,6 +279,7 @@ int pci_find_rom(const struct pci_attach_args *, bus_space_tag_t, int, bus_space_handle_t *, bus_size_t *); int pci_get_capability(pci_chipset_tag_t, pcitag_t, int, int *, pcireg_t *); +int pci_get_ht_capability(pci_chipset_tag_t, pcitag_t, int, int *, pcireg_t *); /* * Helper functions for autoconfiguration. diff --git a/sys/kern/kern_stub.c b/sys/kern/kern_stub.c index 79dbaa6..d675829 100644 --- a/sys/kern/kern_stub.c +++ b/sys/kern/kern_stub.c @@ -147,6 +147,8 @@ __weak_alias(userconf_prompt, voidop); __weak_alias(kobj_renamespace, nullop); +__weak_alias(intr_distribute, eopnotsupp); + /* * Scheduler activations system calls. These need to remain until libc's * major version is bumped. diff --git a/sys/sys/intr.h b/sys/sys/intr.h index 3904fdd..52f8285 100644 --- a/sys/sys/intr.h +++ b/sys/sys/intr.h @@ -32,11 +32,15 @@ #ifndef _SYS_INTR_H_ #define _SYS_INTR_H_ +#define INTRIDBUF 64 + #ifdef _KERNEL #include struct cpu_info; +struct kcpuset; +typedef struct kcpuset kcpuset_t; /* Public interface. */ void *softint_establish(u_int, void (*)(void *), void *); @@ -57,6 +61,8 @@ void softint_trigger(uintptr_t); #endif void softint_dispatch(lwp_t *, int); +int intr_distribute(void *, const kcpuset_t *, kcpuset_t *); + /* Flags for softint_establish(). */ #define SOFTINT_BIO 0x0000 #define SOFTINT_CLOCK 0x0001