2 * Driver for Altera PCIe core chaining DMA reference design.
4 * Copyright (C) 2008 Leon Woestenberg <leon.woestenberg@axon.tv>
5 * Copyright (C) 2008 Nickolas Heppermann <heppermannwdt@gmail.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 * Rationale: This driver exercises the chaining DMA read and write engine
23 * in the reference design. It is meant as a complementary reference
24 * driver that can be used for testing early designs as well as a basis to
25 * write your custom driver.
27 * Status: Test results from Leon Woestenberg <leon.woestenberg@axon.tv>:
29 * Sendero Board w/ Cyclone II EP2C35F672C6N, PX1011A PCIe x1 PHY on a
30 * Dell Precision 370 PC, x86, kernel 2.6.20 from Ubuntu 7.04.
32 * Sendero Board w/ Cyclone II EP2C35F672C6N, PX1011A PCIe x1 PHY on a
33 * Freescale MPC8313E-RDB board, PowerPC, 2.6.24 w/ Freescale patches.
35 * Driver tests passed with PCIe Compiler 8.1. With PCIe 8.0 the DMA
36 * loopback test had reproducable compare errors. I assume a change
37 * in the compiler or reference design, but could not find evidence nor
38 * documentation on a change or fix in that direction.
40 * The reference design does not have readable locations and thus a
41 * dummy read, used to flush PCI posted writes, cannot be performed.
45 #include <linux/kernel.h>
46 #include <linux/cdev.h>
47 #include <linux/delay.h>
48 #include <linux/dma-mapping.h>
49 #include <linux/delay.h>
50 #include <linux/init.h>
51 #include <linux/interrupt.h>
53 #include <linux/jiffies.h>
54 #include <linux/module.h>
55 #include <linux/pci.h>
58 /* by default do not build the character device interface */
59 /* XXX It is non-functional yet */
60 #ifndef ALTPCIECHDMA_CDEV
61 # define ALTPCIECHDMA_CDEV 0
64 /* build the character device interface? */
66 # define MAX_CHDMA_SIZE (8 * 1024 * 1024)
67 # include "mapper_user_to_sg.h"
70 /** driver name, mimicks Altera naming of the reference design */
71 #define DRV_NAME "altpciechdma"
72 /** number of BARs on the device */
73 #define APE_BAR_NUM (6)
74 /** BAR number where the RCSLAVE memory sits */
75 #define APE_BAR_RCSLAVE (0)
76 /** BAR number where the Descriptor Header sits */
77 #define APE_BAR_HEADER (2)
79 /** maximum size in bytes of the descriptor table, chdma logic limit */
80 #define APE_CHDMA_TABLE_SIZE (4096)
81 /* single transfer must not exceed 255 table entries. worst case this can be
82 * achieved by 255 scattered pages, with only a single byte in the head and
83 * tail pages. 253 * PAGE_SIZE is a safe upper bound for the transfer size.
85 #define APE_CHDMA_MAX_TRANSFER_LEN (253 * PAGE_SIZE)
88 * Specifies those BARs to be mapped and the length of each mapping.
90 * Zero (0) means do not map, otherwise specifies the BAR lengths to be mapped.
91 * If the actual BAR length is less, this is considered an error; then
92 * reconfigure your PCIe core.
94 * @see ug_pci_express 8.0, table 7-2 at page 7-13.
96 static const unsigned long bar_min_len
[APE_BAR_NUM
] =
97 { 32768, 0, 256, 0, 32768, 0 };
100 * Descriptor Header, controls the DMA read engine or write engine.
102 * The descriptor header is the main data structure for starting DMA transfers.
104 * It sits in End Point (FPGA) memory BAR[2] for 32-bit or BAR[3:2] for 64-bit.
105 * It references a descriptor table which exists in Root Complex (PC) memory.
106 * Writing the rclast field starts the DMA operation, thus all other structures
107 * and fields must be setup before doing so.
109 * @see ug_pci_express 8.0, tables 7-3, 7-4 and 7-5 at page 7-14.
110 * @note This header must be written in four 32-bit (PCI DWORD) writes.
112 struct ape_chdma_header
{
114 * w0 consists of two 16-bit fields:
115 * lsb u16 number; number of descriptors in ape_chdma_table
116 * msb u16 control; global control flags
119 /* bus address to ape_chdma_table in Root Complex memory */
123 * w3 consists of two 16-bit fields:
124 * - lsb u16 rclast; last descriptor number available in Root Complex
125 * - zero (0) means the first descriptor is ready,
126 * - one (1) means two descriptors are ready, etc.
127 * - msb u16 reserved;
129 * @note writing to this memory location starts the DMA operation!
132 } __attribute__ ((packed
));
135 * Descriptor Entry, describing a (non-scattered) single memory block transfer.
137 * There is one descriptor for each memory block involved in the transfer, a
138 * block being a contiguous address range on the bus.
140 * Multiple descriptors are chained by means of the ape_chdma_table data
143 * @see ug_pci_express 8.0, tables 7-6, 7-7 and 7-8 at page 7-14 and page 7-15.
145 struct ape_chdma_desc
{
147 * w0 consists of two 16-bit fields:
148 * number of DWORDS to transfer
154 /* address of memory in the End Point */
156 /* bus address of source or destination memory in the Root Complex */
159 } __attribute__ ((packed
));
162 * Descriptor Table, an array of descriptors describing a chained transfer.
164 * An array of descriptors, preceded by workspace for the End Point.
165 * It exists in Root Complex memory.
167 * The End Point can update its last completed descriptor number in the
168 * eplast field if requested by setting the EPLAST_ENA bit either
169 * globally in the header's or locally in any descriptor's control field.
171 * @note this structure may not exceed 4096 bytes. This results in a
172 * maximum of 4096 / (4 * 4) - 1 = 255 descriptors per chained transfer.
174 * @see ug_pci_express 8.0, tables 7-9, 7-10 and 7-11 at page 7-17 and page 7-18.
176 struct ape_chdma_table
{
177 /* workspace 0x00-0x0b, reserved */
179 /* workspace 0x0c-0x0f, last descriptor handled by End Point */
181 /* the actual array of descriptors
182 * 0x10-0x1f, 0x20-0x2f, ... 0xff0-0xfff (255 entries)
184 struct ape_chdma_desc desc
[255];
185 } __attribute__ ((packed
));
188 * Altera PCI Express ('ape') board specific book keeping data
190 * Keeps state of the PCIe core and the Chaining DMA controller
194 /** the kernel pci device data structure provided by probe() */
195 struct pci_dev
*pci_dev
;
197 * kernel virtual address of the mapped BAR memory and IO regions of
198 * the End Point. Used by map_bars()/unmap_bars().
200 void * __iomem bar
[APE_BAR_NUM
];
201 /** kernel virtual address for Descriptor Table in Root Complex memory */
202 struct ape_chdma_table
*table_virt
;
204 * bus address for the Descriptor Table in Root Complex memory, in
205 * CPU-native endianess
207 dma_addr_t table_bus
;
208 /* if the device regions could not be allocated, assume and remember it
209 * is in use by another driver; this driver must not disable the device.
212 /* whether this driver enabled msi for the device */
214 /* whether this driver could obtain the regions */
216 /* irq line succesfully requested by this driver, -1 otherwise */
220 /* interrupt count, incremented by the interrupt handler */
222 #if ALTPCIECHDMA_CDEV
223 /* character device */
226 /* user space scatter gather mapper */
227 struct sg_mapping_t
*sgm
;
232 * Using the subsystem vendor id and subsystem id, it is possible to
233 * distinguish between different cards bases around the same
234 * (third-party) logic core.
236 * Default Altera vendor and device ID's, and some (non-reserved)
237 * ID's are now used here that are used amongst the testers/developers.
239 static const struct pci_device_id ids
[] = {
240 { PCI_DEVICE(0x1172, 0xE001), },
241 { PCI_DEVICE(0x2071, 0x2071), },
244 MODULE_DEVICE_TABLE(pci
, ids
);
246 #if ALTPCIECHDMA_CDEV
247 /* prototypes for character device */
248 static int sg_init(struct ape_dev
*ape
);
249 static void sg_exit(struct ape_dev
*ape
);
253 * altpciechdma_isr() - Interrupt handler
256 static irqreturn_t
altpciechdma_isr(int irq
, void *dev_id
)
258 struct ape_dev
*ape
= (struct ape_dev
*)dev_id
;
265 static int __devinit
scan_bars(struct ape_dev
*ape
, struct pci_dev
*dev
)
268 for (i
= 0; i
< APE_BAR_NUM
; i
++) {
269 unsigned long bar_start
= pci_resource_start(dev
, i
);
271 unsigned long bar_end
= pci_resource_end(dev
, i
);
272 unsigned long bar_flags
= pci_resource_flags(dev
, i
);
273 printk(KERN_DEBUG
"BAR%d 0x%08lx-0x%08lx flags 0x%08lx\n",
274 i
, bar_start
, bar_end
, bar_flags
);
281 * Unmap the BAR regions that had been mapped earlier using map_bars()
283 static void unmap_bars(struct ape_dev
*ape
, struct pci_dev
*dev
)
286 for (i
= 0; i
< APE_BAR_NUM
; i
++) {
287 /* is this BAR mapped? */
290 pci_iounmap(dev
, ape
->bar
[i
]);
297 * Map the device memory regions into kernel virtual address space after
298 * verifying their sizes respect the minimum sizes needed, given by the
299 * bar_min_len[] array.
301 static int __devinit
map_bars(struct ape_dev
*ape
, struct pci_dev
*dev
)
305 /* iterate through all the BARs */
306 for (i
= 0; i
< APE_BAR_NUM
; i
++) {
307 unsigned long bar_start
= pci_resource_start(dev
, i
);
308 unsigned long bar_end
= pci_resource_end(dev
, i
);
309 unsigned long bar_length
= bar_end
- bar_start
+ 1;
311 /* do not map, and skip, BARs with length 0 */
314 /* do not map BARs with address 0 */
315 if (!bar_start
|| !bar_end
) {
316 printk(KERN_DEBUG
"BAR #%d is not present?!\n", i
);
320 bar_length
= bar_end
- bar_start
+ 1;
321 /* BAR length is less than driver requires? */
322 if (bar_length
< bar_min_len
[i
]) {
323 printk(KERN_DEBUG
"BAR #%d length = %lu bytes but driver "
324 "requires at least %lu bytes\n", i
, bar_length
, bar_min_len
[i
]);
328 /* map the device memory or IO region into kernel virtual
330 ape
->bar
[i
] = pci_iomap(dev
, i
, bar_min_len
[i
]);
332 printk(KERN_DEBUG
"Could not map BAR #%d.\n", i
);
336 printk(KERN_DEBUG
"BAR[%d] mapped at 0x%p with length %lu(/%lu).\n", i
,
337 ape
->bar
[i
], bar_min_len
[i
], bar_length
);
339 /* succesfully mapped all required BAR regions */
343 /* unmap any BARs that we did map */
344 unmap_bars(ape
, dev
);
349 #if 0 /* not yet implemented fully FIXME add opcode */
350 static void __devinit
rcslave_test(struct ape_dev
*ape
, struct pci_dev
*dev
)
352 u32
*rcslave_mem
= (u32
*)ape
->bar
[APE_BAR_RCSLAVE
];
354 /** this number is assumed to be different each time this test runs */
355 u32 seed
= (u32
)jiffies
;
361 for (i
= 1024; i
< 32768 / 4 ; i
++) {
362 printk(KERN_DEBUG
"Writing 0x%08x to 0x%p.\n",
363 (u32
)value
, (void *)rcslave_mem
+ i
);
364 iowrite32(value
, rcslave_mem
+ i
);
369 for (i
= 1024; i
< 32768 / 4; i
++) {
370 result
= ioread32(rcslave_mem
+ i
);
371 if (result
!= value
) {
372 printk(KERN_DEBUG
"Wrote 0x%08x to 0x%p, but read back 0x%08x.\n",
373 (u32
)value
, (void *)rcslave_mem
+ i
, (u32
)result
);
381 /* obtain the 32 most significant (high) bits of a 32-bit or 64-bit address */
382 #define pci_dma_h(addr) ((addr >> 16) >> 16)
383 /* obtain the 32 least significant (low) bits of a 32-bit or 64-bit address */
384 #define pci_dma_l(addr) (addr & 0xffffffffUL)
386 /* ape_fill_chdma_desc() - Fill a Altera PCI Express Chaining DMA descriptor
388 * @desc pointer to descriptor to be filled
389 * @addr root complex address
390 * @ep_addr end point address
391 * @len number of bytes, must be a multiple of 4.
393 static inline void ape_chdma_desc_set(struct ape_chdma_desc
*desc
, dma_addr_t addr
, u32 ep_addr
, int len
)
396 desc
->w0
= cpu_to_le32(len
/ 4);
397 desc
->ep_addr
= cpu_to_le32(ep_addr
);
398 desc
->rc_addr_h
= cpu_to_le32(pci_dma_h(addr
));
399 desc
->rc_addr_l
= cpu_to_le32(pci_dma_l(addr
));
403 * ape_sg_to_chdma_table() - Create a device descriptor table from a scatterlist.
405 * The scatterlist must have been mapped by pci_map_sg(sgm->sgl).
408 * @nents Number of entries in the scatterlist.
409 * @first Start index in the scatterlist sgm->sgl.
410 * @ep_addr End Point address for the scatter/gather transfer.
411 * @desc pointer to first descriptor
413 * Returns Number of entries in the table on success, -1 on error.
415 static int ape_sg_to_chdma_table(struct scatterlist
*sgl
, int nents
, int first
, struct ape_chdma_desc
*desc
, u32 ep_addr
)
417 int i
= first
, j
= 0;
418 /* inspect first entry */
419 dma_addr_t addr
= sg_dma_address(&sgl
[i
]);
420 unsigned int len
= sg_dma_len(&sgl
[i
]);
421 /* contiguous block */
422 dma_addr_t cont_addr
= addr
;
423 unsigned int cont_len
= len
;
424 /* iterate over remaining entries */
425 for (; j
< 25 && i
< nents
- 1; i
++) {
426 /* bus address of next entry i + 1 */
427 dma_addr_t next
= sg_dma_address(&sgl
[i
+ 1]);
428 /* length of this entry i */
429 len
= sg_dma_len(&sgl
[i
]);
430 printk(KERN_DEBUG
"%04d: addr=0x%08x length=0x%08x\n", i
, addr
, len
);
431 /* entry i + 1 is non-contiguous with entry i? */
432 if (next
!= addr
+ len
) {
433 /* TODO create entry here (we could overwrite i) */
434 printk(KERN_DEBUG
"%4d: cont_addr=0x%08x cont_len=0x%08x\n", j
, cont_addr
, cont_len
);
435 /* set descriptor for contiguous transfer */
436 ape_chdma_desc_set(&desc
[j
], cont_addr
, ep_addr
, cont_len
);
437 /* next end point memory address */
439 /* start new contiguous block */
444 /* add entry i + 1 to current contiguous block */
446 /* goto entry i + 1 */
449 /* TODO create entry here (we could overwrite i) */
450 printk(KERN_DEBUG
"%04d: addr=0x%08x length=0x%08x\n", i
, addr
, len
);
451 printk(KERN_DEBUG
"%4d: cont_addr=0x%08x length=0x%08x\n", j
, cont_addr
, cont_len
);
456 /* compare buffers */
457 static inline int compare(u32
*p
, u32
*q
, int len
)
462 for (i
= 0; i
< len
/ 4; i
++) {
464 /* every so many u32 words, show equals */
466 printk(KERN_DEBUG
"[%p] = 0x%08x [%p] = 0x%08x\n", p
, *p
, q
, *q
);
469 /* show the first few miscompares */
471 printk(KERN_DEBUG
"[%p] = 0x%08x != [%p] = 0x%08x ?!\n", p
, *p
, q
, *q
);
472 /* but stop after a while */
473 } else if (fail
== 10) {
474 printk(KERN_DEBUG
"---more errors follow! not printed---\n");
476 /* stop compare after this many errors */
488 /* dma_test() - Perform DMA loop back test to end point and back to root complex.
490 * Allocate a cache-coherent buffer in host memory, consisting of four pages.
492 * Fill the four memory pages such that each 32-bit word contains its own address.
494 * Now perform a loop back test, have the end point device copy the first buffer
495 * half to end point memory, then have it copy back into the second half.
497 * Create a descriptor table to copy the first buffer half into End Point
498 * memory. Instruct the End Point to do a DMA read using that table.
500 * Create a descriptor table to copy End Point memory to the second buffer
501 * half. Instruct the End Point to do a DMA write using that table.
503 * Compare results, fail or pass.
506 static int __devinit
dma_test(struct ape_dev
*ape
, struct pci_dev
*dev
)
508 /* test result; guilty until proven innocent */
510 /* the DMA read header sits at address 0x00 of the DMA engine BAR */
511 struct ape_chdma_header
*write_header
= (struct ape_chdma_header
*)ape
->bar
[APE_BAR_HEADER
];
512 /* the write DMA header sits after the read header at address 0x10 */
513 struct ape_chdma_header
*read_header
= write_header
+ 1;
514 /* virtual address of the allocated buffer */
516 /* bus address of the allocated buffer */
517 dma_addr_t buffer_bus
= 0;
518 int i
, n
= 0, irq_count
;
520 /* temporary value used to construct 32-bit data words */
523 printk(KERN_DEBUG
"bar_tests(), PAGE_SIZE = 0x%0x\n", (int)PAGE_SIZE
);
524 printk(KERN_DEBUG
"write_header = 0x%p.\n", write_header
);
525 printk(KERN_DEBUG
"read_header = 0x%p.\n", read_header
);
526 printk(KERN_DEBUG
"&write_header->w3 = 0x%p\n", &write_header
->w3
);
527 printk(KERN_DEBUG
"&read_header->w3 = 0x%p\n", &read_header
->w3
);
528 printk(KERN_DEBUG
"ape->table_virt = 0x%p.\n", ape
->table_virt
);
530 if (!write_header
|| !read_header
|| !ape
->table_virt
)
533 /* allocate and map coherently-cached memory for a DMA-able buffer */
534 /* @see Documentation/PCI/PCI-DMA-mapping.txt, near line 318 */
535 buffer_virt
= (u8
*)pci_alloc_consistent(dev
, PAGE_SIZE
* 4, &buffer_bus
);
537 printk(KERN_DEBUG
"Could not allocate coherent DMA buffer.\n");
540 printk(KERN_DEBUG
"Allocated cache-coherent DMA buffer (virtual address = 0x%016llx, bus address = 0x%016llx).\n",
541 (u64
)buffer_virt
, (u64
)buffer_bus
);
543 /* fill first half of buffer with its virtual address as data */
544 for (i
= 0; i
< 4 * PAGE_SIZE
; i
+= 4)
546 *(u32
*)(buffer_virt
+ i
) = i
/ PAGE_SIZE
+ 1;
548 *(u32
*)(buffer_virt
+ i
) = (buffer_virt
+ i
);
551 compare((u32
*)buffer_virt
, (u32
*)(buffer_virt
+ 2 * PAGE_SIZE
), 8192);
555 /* fill second half of buffer with zeroes */
556 for (i
= 2 * PAGE_SIZE
; i
< 4 * PAGE_SIZE
; i
+= 4)
557 *(u32
*)(buffer_virt
+ i
) = 0;
560 /* invalidate EPLAST, outside 0-255, 0xFADE is from the testbench */
561 ape
->table_virt
->w3
= cpu_to_le32(0x0000FADE);
563 /* fill in first descriptor */
565 /* read 8192 bytes from RC buffer to EP address 4096 */
566 ape_chdma_desc_set(&ape
->table_virt
->desc
[n
], buffer_bus
, 4096, 2 * PAGE_SIZE
);
568 for (i
= 0; i
< 255; i
++) {
569 ape_chdma_desc_set(&ape
->table_virt
->desc
[i
], buffer_bus
, 4096, 2 * PAGE_SIZE
);
571 /* index of last descriptor */
575 /* fill in next descriptor */
577 /* read 1024 bytes from RC buffer to EP address 4096 + 1024 */
578 ape_chdma_desc_set(&ape
->table_virt
->desc
[n
], buffer_bus
+ 1024, 4096 + 1024, 1024);
582 /* enable MSI after the last descriptor is completed */
583 if (ape
->msi_enabled
)
584 ape
->table_virt
->desc
[n
].w0
|= cpu_to_le32(1UL << 16)/*local MSI*/;
587 /* dump descriptor table for debugging */
588 printk(KERN_DEBUG
"Descriptor Table (Read, in Root Complex Memory, # = %d)\n", n
+ 1);
589 for (i
= 0; i
< 4 + (n
+ 1) * 4; i
+= 4) {
590 u32
*p
= (u32
*)ape
->table_virt
;
592 printk(KERN_DEBUG
"0x%08x/0x%02x: 0x%08x (LEN=0x%x)\n", (u32
)p
, (u32
)p
& 15, *p
, 4 * le32_to_cpu(*p
));
594 printk(KERN_DEBUG
"0x%08x/0x%02x: 0x%08x (EPA=0x%x)\n", (u32
)p
, (u32
)p
& 15, *p
, le32_to_cpu(*p
));
596 printk(KERN_DEBUG
"0x%08x/0x%02x: 0x%08x (RCH=0x%x)\n", (u32
)p
, (u32
)p
& 15, *p
, le32_to_cpu(*p
));
598 printk(KERN_DEBUG
"0x%08x/0x%02x: 0x%08x (RCL=0x%x)\n", (u32
)p
, (u32
)p
& 15, *p
, le32_to_cpu(*p
));
601 /* set available number of descriptors in table */
603 w
|= (1UL << 18)/*global EPLAST_EN*/;
605 if (ape
->msi_enabled
)
606 w
|= (1UL << 17)/*global MSI*/;
608 printk(KERN_DEBUG
"writing 0x%08x to 0x%p\n", w
, (void *)&read_header
->w0
);
609 iowrite32(w
, &read_header
->w0
);
611 /* write table address (higher 32-bits) */
612 printk(KERN_DEBUG
"writing 0x%08x to 0x%p\n", (u32
)((ape
->table_bus
>> 16) >> 16), (void *)&read_header
->bdt_addr_h
);
613 iowrite32(pci_dma_h(ape
->table_bus
), &read_header
->bdt_addr_h
);
615 /* write table address (lower 32-bits) */
616 printk(KERN_DEBUG
"writing 0x%08x to 0x%p\n", (u32
)(ape
->table_bus
& 0xffffffffUL
), (void *)&read_header
->bdt_addr_l
);
617 iowrite32(pci_dma_l(ape
->table_bus
), &read_header
->bdt_addr_l
);
619 /* memory write barrier */
621 printk(KERN_DEBUG
"Flush posted writes\n");
622 /** FIXME Add dummy read to flush posted writes but need a readable location! */
627 /* remember IRQ count before the transfer */
628 irq_count
= ape
->irq_count
;
629 /* write number of descriptors - this starts the DMA */
630 printk(KERN_DEBUG
"\nStart DMA read\n");
631 printk(KERN_DEBUG
"writing 0x%08x to 0x%p\n", (u32
)n
, (void *)&read_header
->w3
);
632 iowrite32(n
, &read_header
->w3
);
633 printk(KERN_DEBUG
"EPLAST = %lu\n", le32_to_cpu(*(u32
*)&ape
->table_virt
->w3
) & 0xffffUL
);
635 /** memory write barrier */
637 /* dummy read to flush posted writes */
638 /* FIXME Need a readable location! */
642 printk(KERN_DEBUG
"POLL FOR READ:\n");
643 /* poll for chain completion, 1000 times 1 millisecond */
644 for (i
= 0; i
< 100; i
++) {
645 volatile u32
*p
= &ape
->table_virt
->w3
;
646 u32 eplast
= le32_to_cpu(*p
) & 0xffffUL
;
647 printk(KERN_DEBUG
"EPLAST = %u, n = %d\n", eplast
, n
);
649 printk(KERN_DEBUG
"DONE\n");
650 /* print IRQ count before the transfer */
651 printk(KERN_DEBUG
"#IRQs during transfer: %d\n", ape
->irq_count
- irq_count
);
657 /* invalidate EPLAST, outside 0-255, 0xFADE is from the testbench */
658 ape
->table_virt
->w3
= cpu_to_le32(0x0000FADE);
660 /* setup first descriptor */
662 ape_chdma_desc_set(&ape
->table_virt
->desc
[n
], buffer_bus
+ 8192, 4096, 2 * PAGE_SIZE
);
664 for (i
= 0; i
< 255; i
++) {
665 ape_chdma_desc_set(&ape
->table_virt
->desc
[i
], buffer_bus
+ 8192, 4096, 2 * PAGE_SIZE
);
667 /* index of last descriptor */
670 #if 1 /* test variable, make a module option later */
671 if (ape
->msi_enabled
)
672 ape
->table_virt
->desc
[n
].w0
|= cpu_to_le32(1UL << 16)/*local MSI*/;
675 /* dump descriptor table for debugging */
676 printk(KERN_DEBUG
"Descriptor Table (Write, in Root Complex Memory, # = %d)\n", n
+ 1);
677 for (i
= 0; i
< 4 + (n
+ 1) * 4; i
+= 4) {
678 u32
*p
= (u32
*)ape
->table_virt
;
680 printk(KERN_DEBUG
"0x%08x/0x%02x: 0x%08x (LEN=0x%x)\n", (u32
)p
, (u32
)p
& 15, *p
, 4 * le32_to_cpu(*p
));
682 printk(KERN_DEBUG
"0x%08x/0x%02x: 0x%08x (EPA=0x%x)\n", (u32
)p
, (u32
)p
& 15, *p
, le32_to_cpu(*p
));
684 printk(KERN_DEBUG
"0x%08x/0x%02x: 0x%08x (RCH=0x%x)\n", (u32
)p
, (u32
)p
& 15, *p
, le32_to_cpu(*p
));
686 printk(KERN_DEBUG
"0x%08x/0x%02x: 0x%08x (RCL=0x%x)\n", (u32
)p
, (u32
)p
& 15, *p
, le32_to_cpu(*p
));
690 /* set number of available descriptors in the table */
692 /* enable updates of eplast for each descriptor completion */
693 w
|= (u32
)(1UL << 18)/*global EPLAST_EN*/;
694 #if 0 // test variable, make a module option later
695 /* enable MSI for each descriptor completion */
696 if (ape
->msi_enabled
)
697 w
|= (1UL << 17)/*global MSI*/;
699 iowrite32(w
, &write_header
->w0
);
700 iowrite32(pci_dma_h(ape
->table_bus
), &write_header
->bdt_addr_h
);
701 iowrite32(pci_dma_l(ape
->table_bus
), &write_header
->bdt_addr_l
);
703 /** memory write barrier and flush posted writes */
705 /* dummy read to flush posted writes */
706 /* FIXME Need a readable location! */
710 irq_count
= ape
->irq_count
;
712 printk(KERN_DEBUG
"\nStart DMA write\n");
713 iowrite32(n
, &write_header
->w3
);
715 /** memory write barrier */
717 /** dummy read to flush posted writes */
720 printk(KERN_DEBUG
"POLL FOR WRITE:\n");
721 /* poll for completion, 1000 times 1 millisecond */
722 for (i
= 0; i
< 100; i
++) {
723 volatile u32
*p
= &ape
->table_virt
->w3
;
724 u32 eplast
= le32_to_cpu(*p
) & 0xffffUL
;
725 printk(KERN_DEBUG
"EPLAST = %u, n = %d\n", eplast
, n
);
727 printk(KERN_DEBUG
"DONE\n");
728 /* print IRQ count before the transfer */
729 printk(KERN_DEBUG
"#IRQs during transfer: %d\n", ape
->irq_count
- irq_count
);
734 /* soft-reset DMA write engine */
735 iowrite32(0x0000ffffUL
, &write_header
->w0
);
736 /* soft-reset DMA read engine */
737 iowrite32(0x0000ffffUL
, &read_header
->w0
);
739 /** memory write barrier */
741 /* dummy read to flush posted writes */
742 /* FIXME Need a readable location! */
746 /* compare first half of buffer with second half, should be identical */
747 result
= compare((u32
*)buffer_virt
, (u32
*)(buffer_virt
+ 2 * PAGE_SIZE
), 8192);
748 printk(KERN_DEBUG
"DMA loop back test %s.\n", result
? "FAILED" : "PASSED");
750 pci_free_consistent(dev
, 4 * PAGE_SIZE
, buffer_virt
, buffer_bus
);
752 printk(KERN_DEBUG
"bar_tests() end, result %d\n", result
);
756 /* Called when the PCI sub system thinks we can control the given device.
757 * Inspect if we can support the device and if so take control of it.
759 * Return 0 when we have taken control of the given device.
761 * - allocate board specific bookkeeping
762 * - allocate coherently-mapped memory for the descriptor table
764 * - verify board revision
767 * - obtain and request irq
768 * - map regions into kernel address space
770 static int __devinit
probe(struct pci_dev
*dev
, const struct pci_device_id
*id
)
773 struct ape_dev
*ape
= NULL
;
774 u8 irq_pin
, irq_line
;
775 printk(KERN_DEBUG
"probe(dev = 0x%p, pciid = 0x%p)\n", dev
, id
);
777 /* allocate memory for per-board book keeping */
778 ape
= kzalloc(sizeof(struct ape_dev
), GFP_KERNEL
);
780 printk(KERN_DEBUG
"Could not kzalloc()ate memory.\n");
784 dev
->dev
.driver_data
= (void *)ape
;
785 printk(KERN_DEBUG
"probe() ape = 0x%p\n", ape
);
787 printk(KERN_DEBUG
"sizeof(struct ape_chdma_table) = %d.\n",
788 (int)sizeof(struct ape_chdma_table
));
789 /* the reference design has a size restriction on the table size */
790 BUG_ON(sizeof(struct ape_chdma_table
) > APE_CHDMA_TABLE_SIZE
);
792 /* allocate and map coherently-cached memory for a descriptor table */
793 /* @see LDD3 page 446 */
794 ape
->table_virt
= (struct ape_chdma_table
*)pci_alloc_consistent(dev
,
795 APE_CHDMA_TABLE_SIZE
, &ape
->table_bus
);
796 /* could not allocate table? */
797 if (!ape
->table_virt
) {
798 printk(KERN_DEBUG
"Could not dma_alloc()ate_coherent memory.\n");
802 printk(KERN_DEBUG
"table_virt = 0x%16llx, table_bus = 0x%16llx.\n",
803 (u64
)ape
->table_virt
, (u64
)ape
->table_bus
);
806 rc
= pci_enable_device(dev
);
808 printk(KERN_DEBUG
"pci_enable_device() failed\n");
812 /* enable bus master capability on device */
814 /* enable message signaled interrupts */
815 rc
= pci_enable_msi(dev
);
816 /* could not use MSI? */
818 /* resort to legacy interrupts */
819 printk(KERN_DEBUG
"Could not enable MSI interrupting.\n");
820 ape
->msi_enabled
= 0;
821 /* MSI enabled, remember for cleanup */
823 printk(KERN_DEBUG
"Enabled MSI interrupting.\n");
824 ape
->msi_enabled
= 1;
827 pci_read_config_byte(dev
, PCI_REVISION_ID
, &ape
->revision
);
829 /* (for example) this driver does not support revision 0x42 */
830 if (ape
->revision
== 0x42) {
831 printk(KERN_DEBUG
"Revision 0x42 is not supported by this driver.\n");
836 /** XXX check for native or legacy PCIe endpoint? */
838 rc
= pci_request_regions(dev
, DRV_NAME
);
839 /* could not request all regions? */
841 /* assume device is in use (and do not disable it later!) */
845 ape
->got_regions
= 1;
847 #if 1 // @todo For now, disable 64-bit, because I do not understand the implications (DAC!)
848 /* query for DMA transfer */
849 /* @see Documentation/PCI/PCI-DMA-mapping.txt */
850 if (!pci_set_dma_mask(dev
, DMA_64BIT_MASK
)) {
851 pci_set_consistent_dma_mask(dev
, DMA_64BIT_MASK
);
853 printk(KERN_DEBUG
"Using a 64-bit DMA mask.\n");
856 if (!pci_set_dma_mask(dev
, DMA_32BIT_MASK
)) {
857 printk(KERN_DEBUG
"Could not set 64-bit DMA mask.\n");
858 pci_set_consistent_dma_mask(dev
, DMA_32BIT_MASK
);
860 printk(KERN_DEBUG
"Using a 32-bit DMA mask.\n");
862 printk(KERN_DEBUG
"No suitable DMA possible.\n");
863 /** @todo Choose proper error return code */
868 rc
= pci_read_config_byte(dev
, PCI_INTERRUPT_PIN
, &irq_pin
);
869 /* could not read? */
872 printk(KERN_DEBUG
"IRQ pin #%d (0=none, 1=INTA#...4=INTD#).\n", irq_pin
);
874 /* @see LDD3, page 318 */
875 rc
= pci_read_config_byte(dev
, PCI_INTERRUPT_LINE
, &irq_line
);
876 /* could not read? */
878 printk(KERN_DEBUG
"Could not query PCI_INTERRUPT_LINE, error %d\n", rc
);
881 printk(KERN_DEBUG
"IRQ line #%d.\n", irq_line
);
884 /* @see LDD3, page 259 */
885 rc
= request_irq(irq_line
, altpciechdma_isr
, IRQF_SHARED
, DRV_NAME
, (void *)ape
);
887 printk(KERN_DEBUG
"Could not request IRQ #%d, error %d\n", irq_line
, rc
);
891 /* remember which irq we allocated */
892 ape
->irq_line
= (int)irq_line
;
893 printk(KERN_DEBUG
"Succesfully requested IRQ #%d with dev_id 0x%p\n", irq_line
, ape
);
898 rc
= map_bars(ape
, dev
);
901 #if ALTPCIECHDMA_CDEV
902 /* initialize character device */
907 /* perform DMA engines loop back test */
908 rc
= dma_test(ape
, dev
);
910 /* succesfully took the device */
912 printk(KERN_DEBUG
"probe() successful.\n");
916 unmap_bars(ape
, dev
);
918 /* free allocated irq */
919 if (ape
->irq_line
>= 0)
920 free_irq(ape
->irq_line
, (void *)ape
);
922 if (ape
->msi_enabled
)
923 pci_disable_msi(dev
);
924 /* disable the device iff it is not in use */
926 pci_disable_device(dev
);
927 if (ape
->got_regions
)
928 pci_release_regions(dev
);
932 /* clean up everything before device enable() */
935 pci_free_consistent(dev
, APE_CHDMA_TABLE_SIZE
, ape
->table_virt
, ape
->table_bus
);
936 /* clean up everything before allocating descriptor table */
945 static void __devexit
remove(struct pci_dev
*dev
)
948 printk(KERN_DEBUG
"remove(0x%p)\n", dev
);
949 if ((dev
== 0) || (dev
->dev
.driver_data
== 0)) {
950 printk(KERN_DEBUG
"remove(dev = 0x%p) dev->dev.driver_data = 0x%p\n", dev
, dev
->dev
.driver_data
);
953 ape
= (struct ape_dev
*)dev
->dev
.driver_data
;
954 printk(KERN_DEBUG
"remove(dev = 0x%p) where dev->dev.driver_data = 0x%p\n", dev
, ape
);
955 if (ape
->pci_dev
!= dev
) {
956 printk(KERN_DEBUG
"dev->dev.driver_data->pci_dev (0x%08lx) != dev (0x%08lx)\n",
957 (unsigned long)ape
->pci_dev
, (unsigned long)dev
);
959 /* remove character device */
960 #if ALTPCIECHDMA_CDEV
965 pci_free_consistent(dev
, APE_CHDMA_TABLE_SIZE
, ape
->table_virt
, ape
->table_bus
);
970 if (ape
->irq_line
>= 0) {
971 printk(KERN_DEBUG
"Freeing IRQ #%d for dev_id 0x%08lx.\n",
972 ape
->irq_line
, (unsigned long)ape
);
973 free_irq(ape
->irq_line
, (void *)ape
);
975 /* MSI was enabled? */
976 if (ape
->msi_enabled
) {
977 /* Disable MSI @see Documentation/MSI-HOWTO.txt */
978 pci_disable_msi(dev
);
979 ape
->msi_enabled
= 0;
982 unmap_bars(ape
, dev
);
984 pci_disable_device(dev
);
985 if (ape
->got_regions
)
986 /* to be called after device disable */
987 pci_release_regions(dev
);
990 #if ALTPCIECHDMA_CDEV
993 * Called when the device goes from unused to used.
995 static int sg_open(struct inode
*inode
, struct file
*file
)
998 printk(KERN_DEBUG DRV_NAME
"_open()\n");
999 /* pointer to containing data structure of the character device inode */
1000 ape
= container_of(inode
->i_cdev
, struct ape_dev
, cdev
);
1001 /* create a reference to our device state in the opened file */
1002 file
->private_data
= ape
;
1003 /* create virtual memory mapper */
1004 ape
->sgm
= sg_create_mapper(MAX_CHDMA_SIZE
);
1009 * Called when the device goes from used to unused.
1011 static int sg_close(struct inode
*inode
, struct file
*file
)
1013 /* fetch device specific data stored earlier during open */
1014 struct ape_dev
*ape
= (struct ape_dev
*)file
->private_data
;
1015 printk(KERN_DEBUG DRV_NAME
"_close()\n");
1016 /* destroy virtual memory mapper */
1017 sg_destroy_mapper(ape
->sgm
);
1021 static ssize_t
sg_read(struct file
*file
, char __user
*buf
, size_t count
, loff_t
*pos
)
1023 /* fetch device specific data stored earlier during open */
1024 struct ape_dev
*ape
= (struct ape_dev
*)file
->private_data
;
1026 printk(KERN_DEBUG DRV_NAME
"_read(buf=0x%p, count=%lld, pos=%llu)\n", buf
, (s64
)count
, (u64
)*pos
);
1030 /* sg_write() - Write to the device
1032 * @buf userspace buffer
1033 * @count number of bytes in the userspace buffer
1035 * Iterate over the userspace buffer, taking at most 255 * PAGE_SIZE bytes for
1036 * each DMA transfer.
1037 * For each transfer, get the user pages, build a sglist, map, build a
1038 * descriptor table. submit the transfer. wait for the interrupt handler
1039 * to wake us on completion.
1041 static ssize_t
sg_write(struct file
*file
, const char __user
*buf
, size_t count
, loff_t
*pos
)
1044 size_t transfer_len
, remaining
= count
, done
= 0;
1045 u64 transfer_addr
= (u64
)buf
;
1046 /* fetch device specific data stored earlier during open */
1047 struct ape_dev
*ape
= (struct ape_dev
*)file
->private_data
;
1048 printk(KERN_DEBUG DRV_NAME
"_write(buf=0x%p, count=%lld, pos=%llu)\n",
1049 buf
, (s64
)count
, (u64
)*pos
);
1050 /* TODO transfer boundaries at PAGE_SIZE granularity */
1051 while (remaining
> 0)
1053 /* limit DMA transfer size */
1054 transfer_len
= (remaining
< APE_CHDMA_MAX_TRANSFER_LEN
)? remaining
:
1055 APE_CHDMA_MAX_TRANSFER_LEN
;
1056 /* get all user space buffer pages and create a scattergather list */
1057 sgm_map_user_pages(ape
->sgm
, transfer_addr
, transfer_len
, 0/*read from userspace*/);
1058 printk(KERN_DEBUG DRV_NAME
"mapped_pages=%d\n", ape
->sgm
->mapped_pages
);
1059 /* map all entries in the scattergather list */
1060 hwnents
= pci_map_sg(ape
->pci_dev
, ape
->sgm
->sgl
, ape
->sgm
->mapped_pages
, DMA_TO_DEVICE
);
1061 printk(KERN_DEBUG DRV_NAME
"hwnents=%d\n", hwnents
);
1062 /* build device descriptor tables and submit them to the DMA engine */
1063 tents
= ape_sg_to_chdma_table(ape
->sgm
->sgl
, hwnents
, 0, &ape
->table_virt
->desc
[0], 4096);
1064 printk(KERN_DEBUG DRV_NAME
"tents=%d\n", hwnents
);
1067 /* TODO build table */
1068 /* TODO submit table to the device */
1069 /* if engine stopped and unfinished work then start engine */
1071 put ourselves on wait queue
1074 dma_unmap_sg(NULL
, ape
->sgm
->sgl
, ape
->sgm
->mapped_pages
, DMA_TO_DEVICE
);
1075 /* dirty and free the pages */
1076 sgm_unmap_user_pages(ape
->sgm
, 1/*dirtied*/);
1078 transfer_addr
+= transfer_len
;
1079 remaining
-= transfer_len
;
1080 done
+= transfer_len
;
1086 * character device file operations
1088 static struct file_operations sg_fops
= {
1089 .owner
= THIS_MODULE
,
1091 .release
= sg_close
,
1096 /* sg_init() - Initialize character device
1098 * XXX Should ideally be tied to the device, on device probe, not module init.
1100 static int sg_init(struct ape_dev
*ape
)
1103 printk(KERN_DEBUG DRV_NAME
" sg_init()\n");
1104 /* allocate a dynamically allocated character device node */
1105 rc
= alloc_chrdev_region(&ape
->cdevno
, 0/*requested minor*/, 1/*count*/, DRV_NAME
);
1106 /* allocation failed? */
1108 printk("alloc_chrdev_region() = %d\n", rc
);
1111 /* couple the device file operations to the character device */
1112 cdev_init(&ape
->cdev
, &sg_fops
);
1113 ape
->cdev
.owner
= THIS_MODULE
;
1114 /* bring character device live */
1115 rc
= cdev_add(&ape
->cdev
, ape
->cdevno
, 1/*count*/);
1117 printk("cdev_add() = %d\n", rc
);
1120 printk(KERN_DEBUG
"altpciechdma = %d:%d\n", MAJOR(ape
->cdevno
), MINOR(ape
->cdevno
));
1123 /* free the dynamically allocated character device node */
1124 unregister_chrdev_region(ape
->cdevno
, 1/*count*/);
1129 /* sg_exit() - Cleanup character device
1131 * XXX Should ideally be tied to the device, on device remove, not module exit.
1134 static void sg_exit(struct ape_dev
*ape
)
1136 printk(KERN_DEBUG DRV_NAME
" sg_exit()\n");
1137 /* remove the character device */
1138 cdev_del(&ape
->cdev
);
1139 /* free the dynamically allocated character device node */
1140 unregister_chrdev_region(ape
->cdevno
, 1/*count*/);
1143 #endif /* ALTPCIECHDMA_CDEV */
1145 /* used to register the driver with the PCI kernel sub system
1146 * @see LDD3 page 311
1148 static struct pci_driver pci_driver
= {
1153 /* resume, suspend are optional */
1157 * alterapciechdma_init() - Module initialization, registers devices.
1159 static int __init
alterapciechdma_init(void)
1162 printk(KERN_DEBUG DRV_NAME
" init(), built at " __DATE__
" " __TIME__
"\n");
1163 /* register this driver with the PCI bus driver */
1164 rc
= pci_register_driver(&pci_driver
);
1171 * alterapciechdma_init() - Module cleanup, unregisters devices.
1173 static void __exit
alterapciechdma_exit(void)
1175 printk(KERN_DEBUG DRV_NAME
" exit(), built at " __DATE__
" " __TIME__
"\n");
1176 /* unregister this driver from the PCI bus driver */
1177 pci_unregister_driver(&pci_driver
);
1180 MODULE_LICENSE("GPL");
1182 module_init(alterapciechdma_init
);
1183 module_exit(alterapciechdma_exit
);