390 lines
11 KiB
C
390 lines
11 KiB
C
/*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved.
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/pci.h>
|
|
#include <asm/sn/addrs.h>
|
|
#include <asm/sn/geo.h>
|
|
#include <asm/sn/pcibr_provider.h>
|
|
#include <asm/sn/pcibus_provider_defs.h>
|
|
#include <asm/sn/pcidev.h>
|
|
#include <asm/sn/pic.h>
|
|
#include <asm/sn/sn_sal.h>
|
|
#include <asm/sn/tiocp.h>
|
|
#include "tio.h"
|
|
#include "xtalk/xwidgetdev.h"
|
|
#include "xtalk/hubdev.h"
|
|
|
|
extern int sn_ioif_inited;
|
|
|
|
/* =====================================================================
|
|
* DMA MANAGEMENT
|
|
*
|
|
* The Bridge ASIC provides three methods of doing DMA: via a "direct map"
|
|
* register available in 32-bit PCI space (which selects a contiguous 2G
|
|
* address space on some other widget), via "direct" addressing via 64-bit
|
|
* PCI space (all destination information comes from the PCI address,
|
|
* including transfer attributes), and via a "mapped" region that allows
|
|
* a bunch of different small mappings to be established with the PMU.
|
|
*
|
|
* For efficiency, we most prefer to use the 32bit direct mapping facility,
|
|
* since it requires no resource allocations. The advantage of using the
|
|
* PMU over the 64-bit direct is that single-cycle PCI addressing can be
|
|
* used; the advantage of using 64-bit direct over PMU addressing is that
|
|
* we do not have to allocate entries in the PMU.
|
|
*/
|
|
|
|
static dma_addr_t
|
|
pcibr_dmamap_ate32(struct pcidev_info *info,
|
|
uint64_t paddr, size_t req_size, uint64_t flags)
|
|
{
|
|
|
|
struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
|
|
struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
|
|
pdi_pcibus_info;
|
|
uint8_t internal_device = (PCI_SLOT(pcidev_info->pdi_host_pcidev_info->
|
|
pdi_linux_pcidev->devfn)) - 1;
|
|
int ate_count;
|
|
int ate_index;
|
|
uint64_t ate_flags = flags | PCI32_ATE_V;
|
|
uint64_t ate;
|
|
uint64_t pci_addr;
|
|
uint64_t xio_addr;
|
|
uint64_t offset;
|
|
|
|
/* PIC in PCI-X mode does not supports 32bit PageMap mode */
|
|
if (IS_PIC_SOFT(pcibus_info) && IS_PCIX(pcibus_info)) {
|
|
return 0;
|
|
}
|
|
|
|
/* Calculate the number of ATEs needed. */
|
|
if (!(MINIMAL_ATE_FLAG(paddr, req_size))) {
|
|
ate_count = IOPG((IOPGSIZE - 1) /* worst case start offset */
|
|
+req_size /* max mapping bytes */
|
|
- 1) + 1; /* round UP */
|
|
} else { /* assume requested target is page aligned */
|
|
ate_count = IOPG(req_size /* max mapping bytes */
|
|
- 1) + 1; /* round UP */
|
|
}
|
|
|
|
/* Get the number of ATEs required. */
|
|
ate_index = pcibr_ate_alloc(pcibus_info, ate_count);
|
|
if (ate_index < 0)
|
|
return 0;
|
|
|
|
/* In PCI-X mode, Prefetch not supported */
|
|
if (IS_PCIX(pcibus_info))
|
|
ate_flags &= ~(PCI32_ATE_PREF);
|
|
|
|
xio_addr =
|
|
IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
|
|
PHYS_TO_TIODMA(paddr);
|
|
offset = IOPGOFF(xio_addr);
|
|
ate = ate_flags | (xio_addr - offset);
|
|
|
|
/* If PIC, put the targetid in the ATE */
|
|
if (IS_PIC_SOFT(pcibus_info)) {
|
|
ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT);
|
|
}
|
|
ate_write(pcibus_info, ate_index, ate_count, ate);
|
|
|
|
/*
|
|
* Set up the DMA mapped Address.
|
|
*/
|
|
pci_addr = PCI32_MAPPED_BASE + offset + IOPGSIZE * ate_index;
|
|
|
|
/*
|
|
* If swap was set in device in pcibr_endian_set()
|
|
* we need to turn swapping on.
|
|
*/
|
|
if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR)
|
|
ATE_SWAP_ON(pci_addr);
|
|
|
|
return pci_addr;
|
|
}
|
|
|
|
static dma_addr_t
|
|
pcibr_dmatrans_direct64(struct pcidev_info * info, uint64_t paddr,
|
|
uint64_t dma_attributes)
|
|
{
|
|
struct pcibus_info *pcibus_info = (struct pcibus_info *)
|
|
((info->pdi_host_pcidev_info)->pdi_pcibus_info);
|
|
uint64_t pci_addr;
|
|
|
|
/* Translate to Crosstalk View of Physical Address */
|
|
pci_addr = (IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
|
|
PHYS_TO_TIODMA(paddr)) | dma_attributes;
|
|
|
|
/* Handle Bus mode */
|
|
if (IS_PCIX(pcibus_info))
|
|
pci_addr &= ~PCI64_ATTR_PREF;
|
|
|
|
/* Handle Bridge Chipset differences */
|
|
if (IS_PIC_SOFT(pcibus_info)) {
|
|
pci_addr |=
|
|
((uint64_t) pcibus_info->
|
|
pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT);
|
|
} else
|
|
pci_addr |= TIOCP_PCI64_CMDTYPE_MEM;
|
|
|
|
/* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */
|
|
if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn))
|
|
pci_addr |= PCI64_ATTR_VIRTUAL;
|
|
|
|
return pci_addr;
|
|
|
|
}
|
|
|
|
static dma_addr_t
|
|
pcibr_dmatrans_direct32(struct pcidev_info * info,
|
|
uint64_t paddr, size_t req_size, uint64_t flags)
|
|
{
|
|
|
|
struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
|
|
struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
|
|
pdi_pcibus_info;
|
|
uint64_t xio_addr;
|
|
|
|
uint64_t xio_base;
|
|
uint64_t offset;
|
|
uint64_t endoff;
|
|
|
|
if (IS_PCIX(pcibus_info)) {
|
|
return 0;
|
|
}
|
|
|
|
xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
|
|
PHYS_TO_TIODMA(paddr);
|
|
|
|
xio_base = pcibus_info->pbi_dir_xbase;
|
|
offset = xio_addr - xio_base;
|
|
endoff = req_size + offset;
|
|
if ((req_size > (1ULL << 31)) || /* Too Big */
|
|
(xio_addr < xio_base) || /* Out of range for mappings */
|
|
(endoff > (1ULL << 31))) { /* Too Big */
|
|
return 0;
|
|
}
|
|
|
|
return PCI32_DIRECT_BASE | offset;
|
|
|
|
}
|
|
|
|
/*
|
|
* Wrapper routine for free'ing DMA maps
|
|
* DMA mappings for Direct 64 and 32 do not have any DMA maps.
|
|
*/
|
|
void
|
|
pcibr_dma_unmap(struct pci_dev *hwdev, dma_addr_t dma_handle, int direction)
|
|
{
|
|
struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
|
|
struct pcibus_info *pcibus_info =
|
|
(struct pcibus_info *)pcidev_info->pdi_pcibus_info;
|
|
|
|
if (IS_PCI32_MAPPED(dma_handle)) {
|
|
int ate_index;
|
|
|
|
ate_index =
|
|
IOPG((ATE_SWAP_OFF(dma_handle) - PCI32_MAPPED_BASE));
|
|
pcibr_ate_free(pcibus_info, ate_index);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* On SN systems there is a race condition between a PIO read response and
|
|
* DMA's. In rare cases, the read response may beat the DMA, causing the
|
|
* driver to think that data in memory is complete and meaningful. This code
|
|
* eliminates that race. This routine is called by the PIO read routines
|
|
* after doing the read. For PIC this routine then forces a fake interrupt
|
|
* on another line, which is logically associated with the slot that the PIO
|
|
* is addressed to. It then spins while watching the memory location that
|
|
* the interrupt is targetted to. When the interrupt response arrives, we
|
|
* are sure that the DMA has landed in memory and it is safe for the driver
|
|
* to proceed. For TIOCP use the Device(x) Write Request Buffer Flush
|
|
* Bridge register since it ensures the data has entered the coherence domain,
|
|
* unlike the PIC Device(x) Write Request Buffer Flush register.
|
|
*/
|
|
|
|
void sn_dma_flush(uint64_t addr)
|
|
{
|
|
nasid_t nasid;
|
|
int is_tio;
|
|
int wid_num;
|
|
int i, j;
|
|
uint64_t flags;
|
|
uint64_t itte;
|
|
struct hubdev_info *hubinfo;
|
|
volatile struct sn_flush_device_list *p;
|
|
struct sn_flush_nasid_entry *flush_nasid_list;
|
|
|
|
if (!sn_ioif_inited)
|
|
return;
|
|
|
|
nasid = NASID_GET(addr);
|
|
if (-1 == nasid_to_cnodeid(nasid))
|
|
return;
|
|
|
|
hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo;
|
|
|
|
if (!hubinfo) {
|
|
BUG();
|
|
}
|
|
|
|
flush_nasid_list = &hubinfo->hdi_flush_nasid_list;
|
|
if (flush_nasid_list->widget_p == NULL)
|
|
return;
|
|
|
|
is_tio = (nasid & 1);
|
|
if (is_tio) {
|
|
int itte_index;
|
|
|
|
if (TIO_HWIN(addr))
|
|
itte_index = 0;
|
|
else if (TIO_BWIN_WINDOWNUM(addr))
|
|
itte_index = TIO_BWIN_WINDOWNUM(addr);
|
|
else
|
|
itte_index = -1;
|
|
|
|
if (itte_index >= 0) {
|
|
itte = flush_nasid_list->iio_itte[itte_index];
|
|
if (! TIO_ITTE_VALID(itte))
|
|
return;
|
|
wid_num = TIO_ITTE_WIDGET(itte);
|
|
} else
|
|
wid_num = TIO_SWIN_WIDGETNUM(addr);
|
|
} else {
|
|
if (BWIN_WINDOWNUM(addr)) {
|
|
itte = flush_nasid_list->iio_itte[BWIN_WINDOWNUM(addr)];
|
|
wid_num = IIO_ITTE_WIDGET(itte);
|
|
} else
|
|
wid_num = SWIN_WIDGETNUM(addr);
|
|
}
|
|
if (flush_nasid_list->widget_p[wid_num] == NULL)
|
|
return;
|
|
p = &flush_nasid_list->widget_p[wid_num][0];
|
|
|
|
/* find a matching BAR */
|
|
for (i = 0; i < DEV_PER_WIDGET; i++) {
|
|
for (j = 0; j < PCI_ROM_RESOURCE; j++) {
|
|
if (p->sfdl_bar_list[j].start == 0)
|
|
break;
|
|
if (addr >= p->sfdl_bar_list[j].start
|
|
&& addr <= p->sfdl_bar_list[j].end)
|
|
break;
|
|
}
|
|
if (j < PCI_ROM_RESOURCE && p->sfdl_bar_list[j].start != 0)
|
|
break;
|
|
p++;
|
|
}
|
|
|
|
/* if no matching BAR, return without doing anything. */
|
|
if (i == DEV_PER_WIDGET)
|
|
return;
|
|
|
|
/*
|
|
* For TIOCP use the Device(x) Write Request Buffer Flush Bridge
|
|
* register since it ensures the data has entered the coherence
|
|
* domain, unlike PIC.
|
|
*/
|
|
if (is_tio) {
|
|
/*
|
|
* Note: devices behind TIOCE should never be matched in the
|
|
* above code, and so the following code is PIC/CP centric.
|
|
* If CE ever needs the sn_dma_flush mechanism, we will have
|
|
* to account for that here and in tioce_bus_fixup().
|
|
*/
|
|
uint32_t tio_id = HUB_L(TIO_IOSPACE_ADDR(nasid, TIO_NODE_ID));
|
|
uint32_t revnum = XWIDGET_PART_REV_NUM(tio_id);
|
|
|
|
/* TIOCP BRINGUP WAR (PV907516): Don't write buffer flush reg */
|
|
if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) {
|
|
return;
|
|
} else {
|
|
pcireg_wrb_flush_get(p->sfdl_pcibus_info,
|
|
(p->sfdl_slot - 1));
|
|
}
|
|
} else {
|
|
spin_lock_irqsave(&((struct sn_flush_device_list *)p)->
|
|
sfdl_flush_lock, flags);
|
|
|
|
*p->sfdl_flush_addr = 0;
|
|
|
|
/* force an interrupt. */
|
|
*(volatile uint32_t *)(p->sfdl_force_int_addr) = 1;
|
|
|
|
/* wait for the interrupt to come back. */
|
|
while (*(p->sfdl_flush_addr) != 0x10f)
|
|
cpu_relax();
|
|
|
|
/* okay, everything is synched up. */
|
|
spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, flags);
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* DMA interfaces. Called from pci_dma.c routines.
|
|
*/
|
|
|
|
dma_addr_t
|
|
pcibr_dma_map(struct pci_dev * hwdev, unsigned long phys_addr, size_t size)
|
|
{
|
|
dma_addr_t dma_handle;
|
|
struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
|
|
|
|
/* SN cannot support DMA addresses smaller than 32 bits. */
|
|
if (hwdev->dma_mask < 0x7fffffff) {
|
|
return 0;
|
|
}
|
|
|
|
if (hwdev->dma_mask == ~0UL) {
|
|
/*
|
|
* Handle the most common case: 64 bit cards. This
|
|
* call should always succeed.
|
|
*/
|
|
|
|
dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
|
|
PCI64_ATTR_PREF);
|
|
} else {
|
|
/* Handle 32-63 bit cards via direct mapping */
|
|
dma_handle = pcibr_dmatrans_direct32(pcidev_info, phys_addr,
|
|
size, 0);
|
|
if (!dma_handle) {
|
|
/*
|
|
* It is a 32 bit card and we cannot do direct mapping,
|
|
* so we use an ATE.
|
|
*/
|
|
|
|
dma_handle = pcibr_dmamap_ate32(pcidev_info, phys_addr,
|
|
size, PCI32_ATE_PREF);
|
|
}
|
|
}
|
|
|
|
return dma_handle;
|
|
}
|
|
|
|
dma_addr_t
|
|
pcibr_dma_map_consistent(struct pci_dev * hwdev, unsigned long phys_addr,
|
|
size_t size)
|
|
{
|
|
dma_addr_t dma_handle;
|
|
struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(hwdev);
|
|
|
|
if (hwdev->dev.coherent_dma_mask == ~0UL) {
|
|
dma_handle = pcibr_dmatrans_direct64(pcidev_info, phys_addr,
|
|
PCI64_ATTR_BAR);
|
|
} else {
|
|
dma_handle = (dma_addr_t) pcibr_dmamap_ate32(pcidev_info,
|
|
phys_addr, size,
|
|
PCI32_ATE_BAR);
|
|
}
|
|
|
|
return dma_handle;
|
|
}
|
|
|
|
EXPORT_SYMBOL(sn_dma_flush);
|