Staging: altpciechdma: Add missing __devexit_p()
[linux-2.6/verdex.git] / drivers / staging / altpciechdma / altpciechdma.c
blobac9728e067d45690a039606653d2b5d0ba3a03bb
1 /**
2 * Driver for Altera PCIe core chaining DMA reference design.
4 * Copyright (C) 2008 Leon Woestenberg <leon.woestenberg@axon.tv>
5 * Copyright (C) 2008 Nickolas Heppermann <heppermannwdt@gmail.com>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 * Rationale: This driver exercises the chaining DMA read and write engine
23 * in the reference design. It is meant as a complementary reference
24 * driver that can be used for testing early designs as well as a basis to
25 * write your custom driver.
27 * Status: Test results from Leon Woestenberg <leon.woestenberg@axon.tv>:
29 * Sendero Board w/ Cyclone II EP2C35F672C6N, PX1011A PCIe x1 PHY on a
30 * Dell Precision 370 PC, x86, kernel 2.6.20 from Ubuntu 7.04.
32 * Sendero Board w/ Cyclone II EP2C35F672C6N, PX1011A PCIe x1 PHY on a
33 * Freescale MPC8313E-RDB board, PowerPC, 2.6.24 w/ Freescale patches.
35 * Driver tests passed with PCIe Compiler 8.1. With PCIe 8.0 the DMA
36 * loopback test had reproducable compare errors. I assume a change
37 * in the compiler or reference design, but could not find evidence nor
38 * documentation on a change or fix in that direction.
40 * The reference design does not have readable locations and thus a
41 * dummy read, used to flush PCI posted writes, cannot be performed.
45 #include <linux/kernel.h>
46 #include <linux/cdev.h>
47 #include <linux/delay.h>
48 #include <linux/dma-mapping.h>
49 #include <linux/init.h>
50 #include <linux/interrupt.h>
51 #include <linux/io.h>
52 #include <linux/jiffies.h>
53 #include <linux/module.h>
54 #include <linux/pci.h>
57 /* by default do not build the character device interface */
58 /* XXX It is non-functional yet */
59 #ifndef ALTPCIECHDMA_CDEV
60 # define ALTPCIECHDMA_CDEV 0
61 #endif
63 /* build the character device interface? */
64 #if ALTPCIECHDMA_CDEV
65 # define MAX_CHDMA_SIZE (8 * 1024 * 1024)
66 # include "mapper_user_to_sg.h"
67 #endif
69 /** driver name, mimicks Altera naming of the reference design */
70 #define DRV_NAME "altpciechdma"
71 /** number of BARs on the device */
72 #define APE_BAR_NUM (6)
73 /** BAR number where the RCSLAVE memory sits */
74 #define APE_BAR_RCSLAVE (0)
75 /** BAR number where the Descriptor Header sits */
76 #define APE_BAR_HEADER (2)
78 /** maximum size in bytes of the descriptor table, chdma logic limit */
79 #define APE_CHDMA_TABLE_SIZE (4096)
80 /* single transfer must not exceed 255 table entries. worst case this can be
81 * achieved by 255 scattered pages, with only a single byte in the head and
82 * tail pages. 253 * PAGE_SIZE is a safe upper bound for the transfer size.
84 #define APE_CHDMA_MAX_TRANSFER_LEN (253 * PAGE_SIZE)
86 /**
87 * Specifies those BARs to be mapped and the length of each mapping.
89 * Zero (0) means do not map, otherwise specifies the BAR lengths to be mapped.
90 * If the actual BAR length is less, this is considered an error; then
91 * reconfigure your PCIe core.
93 * @see ug_pci_express 8.0, table 7-2 at page 7-13.
95 static const unsigned long bar_min_len[APE_BAR_NUM] =
96 { 32768, 0, 256, 0, 32768, 0 };
98 /**
99 * Descriptor Header, controls the DMA read engine or write engine.
101 * The descriptor header is the main data structure for starting DMA transfers.
103 * It sits in End Point (FPGA) memory BAR[2] for 32-bit or BAR[3:2] for 64-bit.
104 * It references a descriptor table which exists in Root Complex (PC) memory.
105 * Writing the rclast field starts the DMA operation, thus all other structures
106 * and fields must be setup before doing so.
108 * @see ug_pci_express 8.0, tables 7-3, 7-4 and 7-5 at page 7-14.
109 * @note This header must be written in four 32-bit (PCI DWORD) writes.
111 struct ape_chdma_header {
113 * w0 consists of two 16-bit fields:
114 * lsb u16 number; number of descriptors in ape_chdma_table
115 * msb u16 control; global control flags
117 u32 w0;
118 /* bus address to ape_chdma_table in Root Complex memory */
119 u32 bdt_addr_h;
120 u32 bdt_addr_l;
122 * w3 consists of two 16-bit fields:
123 * - lsb u16 rclast; last descriptor number available in Root Complex
124 * - zero (0) means the first descriptor is ready,
125 * - one (1) means two descriptors are ready, etc.
126 * - msb u16 reserved;
128 * @note writing to this memory location starts the DMA operation!
130 u32 w3;
131 } __attribute__ ((packed));
134 * Descriptor Entry, describing a (non-scattered) single memory block transfer.
136 * There is one descriptor for each memory block involved in the transfer, a
137 * block being a contiguous address range on the bus.
139 * Multiple descriptors are chained by means of the ape_chdma_table data
140 * structure.
142 * @see ug_pci_express 8.0, tables 7-6, 7-7 and 7-8 at page 7-14 and page 7-15.
144 struct ape_chdma_desc {
146 * w0 consists of two 16-bit fields:
147 * number of DWORDS to transfer
148 * - lsb u16 length;
149 * global control
150 * - msb u16 control;
152 u32 w0;
153 /* address of memory in the End Point */
154 u32 ep_addr;
155 /* bus address of source or destination memory in the Root Complex */
156 u32 rc_addr_h;
157 u32 rc_addr_l;
158 } __attribute__ ((packed));
161 * Descriptor Table, an array of descriptors describing a chained transfer.
163 * An array of descriptors, preceded by workspace for the End Point.
164 * It exists in Root Complex memory.
166 * The End Point can update its last completed descriptor number in the
167 * eplast field if requested by setting the EPLAST_ENA bit either
168 * globally in the header's or locally in any descriptor's control field.
170 * @note this structure may not exceed 4096 bytes. This results in a
171 * maximum of 4096 / (4 * 4) - 1 = 255 descriptors per chained transfer.
173 * @see ug_pci_express 8.0, tables 7-9, 7-10 and 7-11 at page 7-17 and page 7-18.
175 struct ape_chdma_table {
176 /* workspace 0x00-0x0b, reserved */
177 u32 reserved1[3];
178 /* workspace 0x0c-0x0f, last descriptor handled by End Point */
179 u32 w3;
180 /* the actual array of descriptors
181 * 0x10-0x1f, 0x20-0x2f, ... 0xff0-0xfff (255 entries)
183 struct ape_chdma_desc desc[255];
184 } __attribute__ ((packed));
187 * Altera PCI Express ('ape') board specific book keeping data
189 * Keeps state of the PCIe core and the Chaining DMA controller
190 * application.
192 struct ape_dev {
193 /** the kernel pci device data structure provided by probe() */
194 struct pci_dev *pci_dev;
196 * kernel virtual address of the mapped BAR memory and IO regions of
197 * the End Point. Used by map_bars()/unmap_bars().
199 void * __iomem bar[APE_BAR_NUM];
200 /** kernel virtual address for Descriptor Table in Root Complex memory */
201 struct ape_chdma_table *table_virt;
203 * bus address for the Descriptor Table in Root Complex memory, in
204 * CPU-native endianess
206 dma_addr_t table_bus;
207 /* if the device regions could not be allocated, assume and remember it
208 * is in use by another driver; this driver must not disable the device.
210 int in_use;
211 /* whether this driver enabled msi for the device */
212 int msi_enabled;
213 /* whether this driver could obtain the regions */
214 int got_regions;
215 /* irq line succesfully requested by this driver, -1 otherwise */
216 int irq_line;
217 /* board revision */
218 u8 revision;
219 /* interrupt count, incremented by the interrupt handler */
220 int irq_count;
221 #if ALTPCIECHDMA_CDEV
222 /* character device */
223 dev_t cdevno;
224 struct cdev cdev;
225 /* user space scatter gather mapper */
226 struct sg_mapping_t *sgm;
227 #endif
231 * Using the subsystem vendor id and subsystem id, it is possible to
232 * distinguish between different cards bases around the same
233 * (third-party) logic core.
235 * Default Altera vendor and device ID's, and some (non-reserved)
236 * ID's are now used here that are used amongst the testers/developers.
238 static const struct pci_device_id ids[] = {
239 { PCI_DEVICE(0x1172, 0xE001), },
240 { PCI_DEVICE(0x2071, 0x2071), },
241 { 0, }
243 MODULE_DEVICE_TABLE(pci, ids);
245 #if ALTPCIECHDMA_CDEV
246 /* prototypes for character device */
247 static int sg_init(struct ape_dev *ape);
248 static void sg_exit(struct ape_dev *ape);
249 #endif
252 * altpciechdma_isr() - Interrupt handler
255 static irqreturn_t altpciechdma_isr(int irq, void *dev_id)
257 struct ape_dev *ape = (struct ape_dev *)dev_id;
258 if (!ape)
259 return IRQ_NONE;
260 ape->irq_count++;
261 return IRQ_HANDLED;
264 static int __devinit scan_bars(struct ape_dev *ape, struct pci_dev *dev)
266 int i;
267 for (i = 0; i < APE_BAR_NUM; i++) {
268 unsigned long bar_start = pci_resource_start(dev, i);
269 if (bar_start) {
270 unsigned long bar_end = pci_resource_end(dev, i);
271 unsigned long bar_flags = pci_resource_flags(dev, i);
272 printk(KERN_DEBUG "BAR%d 0x%08lx-0x%08lx flags 0x%08lx\n",
273 i, bar_start, bar_end, bar_flags);
276 return 0;
280 * Unmap the BAR regions that had been mapped earlier using map_bars()
282 static void unmap_bars(struct ape_dev *ape, struct pci_dev *dev)
284 int i;
285 for (i = 0; i < APE_BAR_NUM; i++) {
286 /* is this BAR mapped? */
287 if (ape->bar[i]) {
288 /* unmap BAR */
289 pci_iounmap(dev, ape->bar[i]);
290 ape->bar[i] = NULL;
296 * Map the device memory regions into kernel virtual address space after
297 * verifying their sizes respect the minimum sizes needed, given by the
298 * bar_min_len[] array.
300 static int __devinit map_bars(struct ape_dev *ape, struct pci_dev *dev)
302 int rc;
303 int i;
304 /* iterate through all the BARs */
305 for (i = 0; i < APE_BAR_NUM; i++) {
306 unsigned long bar_start = pci_resource_start(dev, i);
307 unsigned long bar_end = pci_resource_end(dev, i);
308 unsigned long bar_length = bar_end - bar_start + 1;
309 ape->bar[i] = NULL;
310 /* do not map, and skip, BARs with length 0 */
311 if (!bar_min_len[i])
312 continue;
313 /* do not map BARs with address 0 */
314 if (!bar_start || !bar_end) {
315 printk(KERN_DEBUG "BAR #%d is not present?!\n", i);
316 rc = -1;
317 goto fail;
319 bar_length = bar_end - bar_start + 1;
320 /* BAR length is less than driver requires? */
321 if (bar_length < bar_min_len[i]) {
322 printk(KERN_DEBUG "BAR #%d length = %lu bytes but driver "
323 "requires at least %lu bytes\n",
324 i, bar_length, bar_min_len[i]);
325 rc = -1;
326 goto fail;
328 /* map the device memory or IO region into kernel virtual
329 * address space */
330 ape->bar[i] = pci_iomap(dev, i, bar_min_len[i]);
331 if (!ape->bar[i]) {
332 printk(KERN_DEBUG "Could not map BAR #%d.\n", i);
333 rc = -1;
334 goto fail;
336 printk(KERN_DEBUG "BAR[%d] mapped at 0x%p with length %lu(/%lu).\n", i,
337 ape->bar[i], bar_min_len[i], bar_length);
339 /* succesfully mapped all required BAR regions */
340 rc = 0;
341 goto success;
342 fail:
343 /* unmap any BARs that we did map */
344 unmap_bars(ape, dev);
345 success:
346 return rc;
349 #if 0 /* not yet implemented fully FIXME add opcode */
350 static void __devinit rcslave_test(struct ape_dev *ape, struct pci_dev *dev)
352 u32 *rcslave_mem = (u32 *)ape->bar[APE_BAR_RCSLAVE];
353 u32 result = 0;
354 /** this number is assumed to be different each time this test runs */
355 u32 seed = (u32)jiffies;
356 u32 value = seed;
357 int i;
359 /* write loop */
360 value = seed;
361 for (i = 1024; i < 32768 / 4 ; i++) {
362 printk(KERN_DEBUG "Writing 0x%08x to 0x%p.\n",
363 (u32)value, (void *)rcslave_mem + i);
364 iowrite32(value, rcslave_mem + i);
365 value++;
367 /* read-back loop */
368 value = seed;
369 for (i = 1024; i < 32768 / 4; i++) {
370 result = ioread32(rcslave_mem + i);
371 if (result != value) {
372 printk(KERN_DEBUG "Wrote 0x%08x to 0x%p, but read back 0x%08x.\n",
373 (u32)value, (void *)rcslave_mem + i, (u32)result);
374 break;
376 value++;
379 #endif
381 /* obtain the 32 most significant (high) bits of a 32-bit or 64-bit address */
382 #define pci_dma_h(addr) ((addr >> 16) >> 16)
383 /* obtain the 32 least significant (low) bits of a 32-bit or 64-bit address */
384 #define pci_dma_l(addr) (addr & 0xffffffffUL)
386 /* ape_fill_chdma_desc() - Fill a Altera PCI Express Chaining DMA descriptor
388 * @desc pointer to descriptor to be filled
389 * @addr root complex address
390 * @ep_addr end point address
391 * @len number of bytes, must be a multiple of 4.
393 static inline void ape_chdma_desc_set(struct ape_chdma_desc *desc, dma_addr_t addr, u32 ep_addr, int len)
395 BUG_ON(len & 3);
396 desc->w0 = cpu_to_le32(len / 4);
397 desc->ep_addr = cpu_to_le32(ep_addr);
398 desc->rc_addr_h = cpu_to_le32(pci_dma_h(addr));
399 desc->rc_addr_l = cpu_to_le32(pci_dma_l(addr));
403 * ape_sg_to_chdma_table() - Create a device descriptor table from a scatterlist.
405 * The scatterlist must have been mapped by pci_map_sg(sgm->sgl).
407 * @sgl scatterlist.
408 * @nents Number of entries in the scatterlist.
409 * @first Start index in the scatterlist sgm->sgl.
410 * @ep_addr End Point address for the scatter/gather transfer.
411 * @desc pointer to first descriptor
413 * Returns Number of entries in the table on success, -1 on error.
415 static int ape_sg_to_chdma_table(struct scatterlist *sgl, int nents, int first, struct ape_chdma_desc *desc, u32 ep_addr)
417 int i = first, j = 0;
418 /* inspect first entry */
419 dma_addr_t addr = sg_dma_address(&sgl[i]);
420 unsigned int len = sg_dma_len(&sgl[i]);
421 /* contiguous block */
422 dma_addr_t cont_addr = addr;
423 unsigned int cont_len = len;
424 /* iterate over remaining entries */
425 for (; j < 25 && i < nents - 1; i++) {
426 /* bus address of next entry i + 1 */
427 dma_addr_t next = sg_dma_address(&sgl[i + 1]);
428 /* length of this entry i */
429 len = sg_dma_len(&sgl[i]);
430 printk(KERN_DEBUG "%04d: addr=0x%Lx length=0x%08x\n", i,
431 (unsigned long long)addr, len);
432 /* entry i + 1 is non-contiguous with entry i? */
433 if (next != addr + len) {
434 /* TODO create entry here (we could overwrite i) */
435 printk(KERN_DEBUG "%4d: cont_addr=0x%Lx cont_len=0x%08x\n", j,
436 (unsigned long long)cont_addr, cont_len);
437 /* set descriptor for contiguous transfer */
438 ape_chdma_desc_set(&desc[j], cont_addr, ep_addr, cont_len);
439 /* next end point memory address */
440 ep_addr += cont_len;
441 /* start new contiguous block */
442 cont_addr = next;
443 cont_len = 0;
444 j++;
446 /* add entry i + 1 to current contiguous block */
447 cont_len += len;
448 /* goto entry i + 1 */
449 addr = next;
451 /* TODO create entry here (we could overwrite i) */
452 printk(KERN_DEBUG "%04d: addr=0x%Lx length=0x%08x\n", i,
453 (unsigned long long)addr, len);
454 printk(KERN_DEBUG "%4d: cont_addr=0x%Lx length=0x%08x\n", j,
455 (unsigned long long)cont_addr, cont_len);
456 j++;
457 return j;
460 /* compare buffers */
461 static inline int compare(u32 *p, u32 *q, int len)
463 int result = -1;
464 int fail = 0;
465 int i;
466 for (i = 0; i < len / 4; i++) {
467 if (*p == *q) {
468 /* every so many u32 words, show equals */
469 if ((i & 255) == 0)
470 printk(KERN_DEBUG "[%p] = 0x%08x [%p] = 0x%08x\n", p, *p, q, *q);
471 } else {
472 fail++;
473 /* show the first few miscompares */
474 if (fail < 10)
475 printk(KERN_DEBUG "[%p] = 0x%08x != [%p] = 0x%08x ?!\n", p, *p, q, *q);
476 /* but stop after a while */
477 else if (fail == 10)
478 printk(KERN_DEBUG "---more errors follow! not printed---\n");
479 else
480 /* stop compare after this many errors */
481 break;
483 p++;
484 q++;
486 if (!fail)
487 result = 0;
488 return result;
491 /* dma_test() - Perform DMA loop back test to end point and back to root complex.
493 * Allocate a cache-coherent buffer in host memory, consisting of four pages.
495 * Fill the four memory pages such that each 32-bit word contains its own address.
497 * Now perform a loop back test, have the end point device copy the first buffer
498 * half to end point memory, then have it copy back into the second half.
500 * Create a descriptor table to copy the first buffer half into End Point
501 * memory. Instruct the End Point to do a DMA read using that table.
503 * Create a descriptor table to copy End Point memory to the second buffer
504 * half. Instruct the End Point to do a DMA write using that table.
506 * Compare results, fail or pass.
509 static int __devinit dma_test(struct ape_dev *ape, struct pci_dev *dev)
511 /* test result; guilty until proven innocent */
512 int result = -1;
513 /* the DMA read header sits at address 0x00 of the DMA engine BAR */
514 struct ape_chdma_header *write_header = (struct ape_chdma_header *)ape->bar[APE_BAR_HEADER];
515 /* the write DMA header sits after the read header at address 0x10 */
516 struct ape_chdma_header *read_header = write_header + 1;
517 /* virtual address of the allocated buffer */
518 u8 *buffer_virt = 0;
519 /* bus address of the allocated buffer */
520 dma_addr_t buffer_bus = 0;
521 int i, n = 0, irq_count;
523 /* temporary value used to construct 32-bit data words */
524 u32 w;
526 printk(KERN_DEBUG "bar_tests(), PAGE_SIZE = 0x%0x\n", (int)PAGE_SIZE);
527 printk(KERN_DEBUG "write_header = 0x%p.\n", write_header);
528 printk(KERN_DEBUG "read_header = 0x%p.\n", read_header);
529 printk(KERN_DEBUG "&write_header->w3 = 0x%p\n", &write_header->w3);
530 printk(KERN_DEBUG "&read_header->w3 = 0x%p\n", &read_header->w3);
531 printk(KERN_DEBUG "ape->table_virt = 0x%p.\n", ape->table_virt);
533 if (!write_header || !read_header || !ape->table_virt)
534 goto fail;
536 /* allocate and map coherently-cached memory for a DMA-able buffer */
537 /* @see Documentation/PCI/PCI-DMA-mapping.txt, near line 318 */
538 buffer_virt = (u8 *)pci_alloc_consistent(dev, PAGE_SIZE * 4, &buffer_bus);
539 if (!buffer_virt) {
540 printk(KERN_DEBUG "Could not allocate coherent DMA buffer.\n");
541 goto fail;
543 printk(KERN_DEBUG "Allocated cache-coherent DMA buffer (virtual address = 0x%016llx, bus address = 0x%016llx).\n",
544 (u64)buffer_virt, (u64)buffer_bus);
546 /* fill first half of buffer with its virtual address as data */
547 for (i = 0; i < 4 * PAGE_SIZE; i += 4)
548 #if 0
549 *(u32 *)(buffer_virt + i) = i / PAGE_SIZE + 1;
550 #else
551 *(u32 *)(buffer_virt + i) = (buffer_virt + i);
552 #endif
553 #if 0
554 compare((u32 *)buffer_virt, (u32 *)(buffer_virt + 2 * PAGE_SIZE), 8192);
555 #endif
557 #if 0
558 /* fill second half of buffer with zeroes */
559 for (i = 2 * PAGE_SIZE; i < 4 * PAGE_SIZE; i += 4)
560 *(u32 *)(buffer_virt + i) = 0;
561 #endif
563 /* invalidate EPLAST, outside 0-255, 0xFADE is from the testbench */
564 ape->table_virt->w3 = cpu_to_le32(0x0000FADE);
566 /* fill in first descriptor */
567 n = 0;
568 /* read 8192 bytes from RC buffer to EP address 4096 */
569 ape_chdma_desc_set(&ape->table_virt->desc[n], buffer_bus, 4096, 2 * PAGE_SIZE);
570 #if 1
571 for (i = 0; i < 255; i++)
572 ape_chdma_desc_set(&ape->table_virt->desc[i], buffer_bus, 4096, 2 * PAGE_SIZE);
573 /* index of last descriptor */
574 n = i - 1;
575 #endif
576 #if 0
577 /* fill in next descriptor */
578 n++;
579 /* read 1024 bytes from RC buffer to EP address 4096 + 1024 */
580 ape_chdma_desc_set(&ape->table_virt->desc[n], buffer_bus + 1024, 4096 + 1024, 1024);
581 #endif
583 #if 1
584 /* enable MSI after the last descriptor is completed */
585 if (ape->msi_enabled)
586 ape->table_virt->desc[n].w0 |= cpu_to_le32(1UL << 16)/*local MSI*/;
587 #endif
588 #if 0
589 /* dump descriptor table for debugging */
590 printk(KERN_DEBUG "Descriptor Table (Read, in Root Complex Memory, # = %d)\n", n + 1);
591 for (i = 0; i < 4 + (n + 1) * 4; i += 4) {
592 u32 *p = (u32 *)ape->table_virt;
593 p += i;
594 printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (LEN=0x%x)\n", (u32)p, (u32)p & 15, *p, 4 * le32_to_cpu(*p));
595 p++;
596 printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (EPA=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p));
597 p++;
598 printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCH=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p));
599 p++;
600 printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCL=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p));
602 #endif
603 /* set available number of descriptors in table */
604 w = (u32)(n + 1);
605 w |= (1UL << 18)/*global EPLAST_EN*/;
606 #if 0
607 if (ape->msi_enabled)
608 w |= (1UL << 17)/*global MSI*/;
609 #endif
610 printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", w, (void *)&read_header->w0);
611 iowrite32(w, &read_header->w0);
613 /* write table address (higher 32-bits) */
614 printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", (u32)((ape->table_bus >> 16) >> 16), (void *)&read_header->bdt_addr_h);
615 iowrite32(pci_dma_h(ape->table_bus), &read_header->bdt_addr_h);
617 /* write table address (lower 32-bits) */
618 printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", (u32)(ape->table_bus & 0xffffffffUL), (void *)&read_header->bdt_addr_l);
619 iowrite32(pci_dma_l(ape->table_bus), &read_header->bdt_addr_l);
621 /* memory write barrier */
622 wmb();
623 printk(KERN_DEBUG "Flush posted writes\n");
624 /** FIXME Add dummy read to flush posted writes but need a readable location! */
625 #if 0
626 (void)ioread32();
627 #endif
629 /* remember IRQ count before the transfer */
630 irq_count = ape->irq_count;
631 /* write number of descriptors - this starts the DMA */
632 printk(KERN_DEBUG "\nStart DMA read\n");
633 printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", (u32)n, (void *)&read_header->w3);
634 iowrite32(n, &read_header->w3);
635 printk(KERN_DEBUG "EPLAST = %lu\n", le32_to_cpu(*(u32 *)&ape->table_virt->w3) & 0xffffUL);
637 /** memory write barrier */
638 wmb();
639 /* dummy read to flush posted writes */
640 /* FIXME Need a readable location! */
641 #if 0
642 (void)ioread32();
643 #endif
644 printk(KERN_DEBUG "POLL FOR READ:\n");
645 /* poll for chain completion, 1000 times 1 millisecond */
646 for (i = 0; i < 100; i++) {
647 volatile u32 *p = &ape->table_virt->w3;
648 u32 eplast = le32_to_cpu(*p) & 0xffffUL;
649 printk(KERN_DEBUG "EPLAST = %u, n = %d\n", eplast, n);
650 if (eplast == n) {
651 printk(KERN_DEBUG "DONE\n");
652 /* print IRQ count before the transfer */
653 printk(KERN_DEBUG "#IRQs during transfer: %d\n", ape->irq_count - irq_count);
654 break;
656 udelay(100);
659 /* invalidate EPLAST, outside 0-255, 0xFADE is from the testbench */
660 ape->table_virt->w3 = cpu_to_le32(0x0000FADE);
662 /* setup first descriptor */
663 n = 0;
664 ape_chdma_desc_set(&ape->table_virt->desc[n], buffer_bus + 8192, 4096, 2 * PAGE_SIZE);
665 #if 1
666 for (i = 0; i < 255; i++)
667 ape_chdma_desc_set(&ape->table_virt->desc[i], buffer_bus + 8192, 4096, 2 * PAGE_SIZE);
669 /* index of last descriptor */
670 n = i - 1;
671 #endif
672 #if 1 /* test variable, make a module option later */
673 if (ape->msi_enabled)
674 ape->table_virt->desc[n].w0 |= cpu_to_le32(1UL << 16)/*local MSI*/;
675 #endif
676 #if 0
677 /* dump descriptor table for debugging */
678 printk(KERN_DEBUG "Descriptor Table (Write, in Root Complex Memory, # = %d)\n", n + 1);
679 for (i = 0; i < 4 + (n + 1) * 4; i += 4) {
680 u32 *p = (u32 *)ape->table_virt;
681 p += i;
682 printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (LEN=0x%x)\n", (u32)p, (u32)p & 15, *p, 4 * le32_to_cpu(*p));
683 p++;
684 printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (EPA=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p));
685 p++;
686 printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCH=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p));
687 p++;
688 printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCL=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p));
690 #endif
692 /* set number of available descriptors in the table */
693 w = (u32)(n + 1);
694 /* enable updates of eplast for each descriptor completion */
695 w |= (u32)(1UL << 18)/*global EPLAST_EN*/;
696 #if 0 /* test variable, make a module option later */
697 /* enable MSI for each descriptor completion */
698 if (ape->msi_enabled)
699 w |= (1UL << 17)/*global MSI*/;
700 #endif
701 iowrite32(w, &write_header->w0);
702 iowrite32(pci_dma_h(ape->table_bus), &write_header->bdt_addr_h);
703 iowrite32(pci_dma_l(ape->table_bus), &write_header->bdt_addr_l);
705 /** memory write barrier and flush posted writes */
706 wmb();
707 /* dummy read to flush posted writes */
708 /* FIXME Need a readable location! */
709 #if 0
710 (void)ioread32();
711 #endif
712 irq_count = ape->irq_count;
714 printk(KERN_DEBUG "\nStart DMA write\n");
715 iowrite32(n, &write_header->w3);
717 /** memory write barrier */
718 wmb();
719 /** dummy read to flush posted writes */
720 /* (void) ioread32(); */
722 printk(KERN_DEBUG "POLL FOR WRITE:\n");
723 /* poll for completion, 1000 times 1 millisecond */
724 for (i = 0; i < 100; i++) {
725 volatile u32 *p = &ape->table_virt->w3;
726 u32 eplast = le32_to_cpu(*p) & 0xffffUL;
727 printk(KERN_DEBUG "EPLAST = %u, n = %d\n", eplast, n);
728 if (eplast == n) {
729 printk(KERN_DEBUG "DONE\n");
730 /* print IRQ count before the transfer */
731 printk(KERN_DEBUG "#IRQs during transfer: %d\n", ape->irq_count - irq_count);
732 break;
734 udelay(100);
736 /* soft-reset DMA write engine */
737 iowrite32(0x0000ffffUL, &write_header->w0);
738 /* soft-reset DMA read engine */
739 iowrite32(0x0000ffffUL, &read_header->w0);
741 /** memory write barrier */
742 wmb();
743 /* dummy read to flush posted writes */
744 /* FIXME Need a readable location! */
745 #if 0
746 (void)ioread32();
747 #endif
748 /* compare first half of buffer with second half, should be identical */
749 result = compare((u32 *)buffer_virt, (u32 *)(buffer_virt + 2 * PAGE_SIZE), 8192);
750 printk(KERN_DEBUG "DMA loop back test %s.\n", result ? "FAILED" : "PASSED");
752 pci_free_consistent(dev, 4 * PAGE_SIZE, buffer_virt, buffer_bus);
753 fail:
754 printk(KERN_DEBUG "bar_tests() end, result %d\n", result);
755 return result;
758 /* Called when the PCI sub system thinks we can control the given device.
759 * Inspect if we can support the device and if so take control of it.
761 * Return 0 when we have taken control of the given device.
763 * - allocate board specific bookkeeping
764 * - allocate coherently-mapped memory for the descriptor table
765 * - enable the board
766 * - verify board revision
767 * - request regions
768 * - query DMA mask
769 * - obtain and request irq
770 * - map regions into kernel address space
772 static int __devinit probe(struct pci_dev *dev, const struct pci_device_id *id)
774 int rc = 0;
775 struct ape_dev *ape = NULL;
776 u8 irq_pin, irq_line;
777 printk(KERN_DEBUG "probe(dev = 0x%p, pciid = 0x%p)\n", dev, id);
779 /* allocate memory for per-board book keeping */
780 ape = kzalloc(sizeof(struct ape_dev), GFP_KERNEL);
781 if (!ape) {
782 printk(KERN_DEBUG "Could not kzalloc()ate memory.\n");
783 goto err_ape;
785 ape->pci_dev = dev;
786 dev_set_drvdata(&dev->dev, ape);
787 printk(KERN_DEBUG "probe() ape = 0x%p\n", ape);
789 printk(KERN_DEBUG "sizeof(struct ape_chdma_table) = %d.\n",
790 (int)sizeof(struct ape_chdma_table));
791 /* the reference design has a size restriction on the table size */
792 BUG_ON(sizeof(struct ape_chdma_table) > APE_CHDMA_TABLE_SIZE);
794 /* allocate and map coherently-cached memory for a descriptor table */
795 /* @see LDD3 page 446 */
796 ape->table_virt = (struct ape_chdma_table *)pci_alloc_consistent(dev,
797 APE_CHDMA_TABLE_SIZE, &ape->table_bus);
798 /* could not allocate table? */
799 if (!ape->table_virt) {
800 printk(KERN_DEBUG "Could not dma_alloc()ate_coherent memory.\n");
801 goto err_table;
804 printk(KERN_DEBUG "table_virt = 0x%16llx, table_bus = 0x%16llx.\n",
805 (u64)ape->table_virt, (u64)ape->table_bus);
807 /* enable device */
808 rc = pci_enable_device(dev);
809 if (rc) {
810 printk(KERN_DEBUG "pci_enable_device() failed\n");
811 goto err_enable;
814 /* enable bus master capability on device */
815 pci_set_master(dev);
816 /* enable message signaled interrupts */
817 rc = pci_enable_msi(dev);
818 /* could not use MSI? */
819 if (rc) {
820 /* resort to legacy interrupts */
821 printk(KERN_DEBUG "Could not enable MSI interrupting.\n");
822 ape->msi_enabled = 0;
823 /* MSI enabled, remember for cleanup */
824 } else {
825 printk(KERN_DEBUG "Enabled MSI interrupting.\n");
826 ape->msi_enabled = 1;
829 pci_read_config_byte(dev, PCI_REVISION_ID, &ape->revision);
830 #if 0 /* example */
831 /* (for example) this driver does not support revision 0x42 */
832 if (ape->revision == 0x42) {
833 printk(KERN_DEBUG "Revision 0x42 is not supported by this driver.\n");
834 rc = -ENODEV;
835 goto err_rev;
837 #endif
838 /** XXX check for native or legacy PCIe endpoint? */
840 rc = pci_request_regions(dev, DRV_NAME);
841 /* could not request all regions? */
842 if (rc) {
843 /* assume device is in use (and do not disable it later!) */
844 ape->in_use = 1;
845 goto err_regions;
847 ape->got_regions = 1;
849 #if 1 /* @todo For now, disable 64-bit, because I do not understand the implications (DAC!) */
850 /* query for DMA transfer */
851 /* @see Documentation/PCI/PCI-DMA-mapping.txt */
852 if (!pci_set_dma_mask(dev, DMA_BIT_MASK(64))) {
853 pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
854 /* use 64-bit DMA */
855 printk(KERN_DEBUG "Using a 64-bit DMA mask.\n");
856 } else
857 #endif
858 if (!pci_set_dma_mask(dev, DMA_BIT_MASK(32))) {
859 printk(KERN_DEBUG "Could not set 64-bit DMA mask.\n");
860 pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(32));
861 /* use 32-bit DMA */
862 printk(KERN_DEBUG "Using a 32-bit DMA mask.\n");
863 } else {
864 printk(KERN_DEBUG "No suitable DMA possible.\n");
865 /** @todo Choose proper error return code */
866 rc = -1;
867 goto err_mask;
870 rc = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq_pin);
871 /* could not read? */
872 if (rc)
873 goto err_irq;
874 printk(KERN_DEBUG "IRQ pin #%d (0=none, 1=INTA#...4=INTD#).\n", irq_pin);
876 /* @see LDD3, page 318 */
877 rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq_line);
878 /* could not read? */
879 if (rc) {
880 printk(KERN_DEBUG "Could not query PCI_INTERRUPT_LINE, error %d\n", rc);
881 goto err_irq;
883 printk(KERN_DEBUG "IRQ line #%d.\n", irq_line);
884 #if 1
885 irq_line = dev->irq;
886 /* @see LDD3, page 259 */
887 rc = request_irq(irq_line, altpciechdma_isr, IRQF_SHARED, DRV_NAME, (void *)ape);
888 if (rc) {
889 printk(KERN_DEBUG "Could not request IRQ #%d, error %d\n", irq_line, rc);
890 ape->irq_line = -1;
891 goto err_irq;
893 /* remember which irq we allocated */
894 ape->irq_line = (int)irq_line;
895 printk(KERN_DEBUG "Succesfully requested IRQ #%d with dev_id 0x%p\n", irq_line, ape);
896 #endif
897 /* show BARs */
898 scan_bars(ape, dev);
899 /* map BARs */
900 rc = map_bars(ape, dev);
901 if (rc)
902 goto err_map;
903 #if ALTPCIECHDMA_CDEV
904 /* initialize character device */
905 rc = sg_init(ape);
906 if (rc)
907 goto err_cdev;
908 #endif
909 /* perform DMA engines loop back test */
910 rc = dma_test(ape, dev);
911 (void)rc;
912 /* succesfully took the device */
913 rc = 0;
914 printk(KERN_DEBUG "probe() successful.\n");
915 goto end;
916 err_cdev:
917 /* unmap the BARs */
918 unmap_bars(ape, dev);
919 err_map:
920 /* free allocated irq */
921 if (ape->irq_line >= 0)
922 free_irq(ape->irq_line, (void *)ape);
923 err_irq:
924 if (ape->msi_enabled)
925 pci_disable_msi(dev);
926 /* disable the device iff it is not in use */
927 if (!ape->in_use)
928 pci_disable_device(dev);
929 if (ape->got_regions)
930 pci_release_regions(dev);
931 err_mask:
932 err_regions:
933 err_rev:
934 /* clean up everything before device enable() */
935 err_enable:
936 if (ape->table_virt)
937 pci_free_consistent(dev, APE_CHDMA_TABLE_SIZE, ape->table_virt, ape->table_bus);
938 /* clean up everything before allocating descriptor table */
939 err_table:
940 if (ape)
941 kfree(ape);
942 err_ape:
943 end:
944 return rc;
947 static void __devexit remove(struct pci_dev *dev)
949 struct ape_dev *ape = dev_get_drvdata(&dev->dev);
951 printk(KERN_DEBUG "remove(0x%p)\n", dev);
952 printk(KERN_DEBUG "remove(dev = 0x%p) where ape = 0x%p\n", dev, ape);
954 /* remove character device */
955 #if ALTPCIECHDMA_CDEV
956 sg_exit(ape);
957 #endif
959 if (ape->table_virt)
960 pci_free_consistent(dev, APE_CHDMA_TABLE_SIZE, ape->table_virt, ape->table_bus);
962 /* free IRQ
963 * @see LDD3 page 279
965 if (ape->irq_line >= 0) {
966 printk(KERN_DEBUG "Freeing IRQ #%d for dev_id 0x%08lx.\n",
967 ape->irq_line, (unsigned long)ape);
968 free_irq(ape->irq_line, (void *)ape);
970 /* MSI was enabled? */
971 if (ape->msi_enabled) {
972 /* Disable MSI @see Documentation/MSI-HOWTO.txt */
973 pci_disable_msi(dev);
974 ape->msi_enabled = 0;
976 /* unmap the BARs */
977 unmap_bars(ape, dev);
978 if (!ape->in_use)
979 pci_disable_device(dev);
980 if (ape->got_regions)
981 /* to be called after device disable */
982 pci_release_regions(dev);
985 #if ALTPCIECHDMA_CDEV
988 * Called when the device goes from unused to used.
990 static int sg_open(struct inode *inode, struct file *file)
992 struct ape_dev *ape;
993 printk(KERN_DEBUG DRV_NAME "_open()\n");
994 /* pointer to containing data structure of the character device inode */
995 ape = container_of(inode->i_cdev, struct ape_dev, cdev);
996 /* create a reference to our device state in the opened file */
997 file->private_data = ape;
998 /* create virtual memory mapper */
999 ape->sgm = sg_create_mapper(MAX_CHDMA_SIZE);
1000 return 0;
1004 * Called when the device goes from used to unused.
1006 static int sg_close(struct inode *inode, struct file *file)
1008 /* fetch device specific data stored earlier during open */
1009 struct ape_dev *ape = (struct ape_dev *)file->private_data;
1010 printk(KERN_DEBUG DRV_NAME "_close()\n");
1011 /* destroy virtual memory mapper */
1012 sg_destroy_mapper(ape->sgm);
1013 return 0;
1016 static ssize_t sg_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
1018 /* fetch device specific data stored earlier during open */
1019 struct ape_dev *ape = (struct ape_dev *)file->private_data;
1020 (void)ape;
1021 printk(KERN_DEBUG DRV_NAME "_read(buf=0x%p, count=%lld, pos=%llu)\n", buf, (s64)count, (u64)*pos);
1022 return count;
1025 /* sg_write() - Write to the device
1027 * @buf userspace buffer
1028 * @count number of bytes in the userspace buffer
1030 * Iterate over the userspace buffer, taking at most 255 * PAGE_SIZE bytes for
1031 * each DMA transfer.
1032 * For each transfer, get the user pages, build a sglist, map, build a
1033 * descriptor table. submit the transfer. wait for the interrupt handler
1034 * to wake us on completion.
1036 static ssize_t sg_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
1038 int hwnents, tents;
1039 size_t transfer_len, remaining = count, done = 0;
1040 u64 transfer_addr = (u64)buf;
1041 /* fetch device specific data stored earlier during open */
1042 struct ape_dev *ape = (struct ape_dev *)file->private_data;
1043 printk(KERN_DEBUG DRV_NAME "_write(buf=0x%p, count=%lld, pos=%llu)\n",
1044 buf, (s64)count, (u64)*pos);
1045 /* TODO transfer boundaries at PAGE_SIZE granularity */
1046 while (remaining > 0) {
1047 /* limit DMA transfer size */
1048 transfer_len = (remaining < APE_CHDMA_MAX_TRANSFER_LEN) ? remaining :
1049 APE_CHDMA_MAX_TRANSFER_LEN;
1050 /* get all user space buffer pages and create a scattergather list */
1051 sgm_map_user_pages(ape->sgm, transfer_addr, transfer_len, 0/*read from userspace*/);
1052 printk(KERN_DEBUG DRV_NAME "mapped_pages=%d\n", ape->sgm->mapped_pages);
1053 /* map all entries in the scattergather list */
1054 hwnents = pci_map_sg(ape->pci_dev, ape->sgm->sgl, ape->sgm->mapped_pages, DMA_TO_DEVICE);
1055 printk(KERN_DEBUG DRV_NAME "hwnents=%d\n", hwnents);
1056 /* build device descriptor tables and submit them to the DMA engine */
1057 tents = ape_sg_to_chdma_table(ape->sgm->sgl, hwnents, 0, &ape->table_virt->desc[0], 4096);
1058 printk(KERN_DEBUG DRV_NAME "tents=%d\n", hwnents);
1059 #if 0
1060 while (tables) {
1061 /* TODO build table */
1062 /* TODO submit table to the device */
1063 /* if engine stopped and unfinished work then start engine */
1065 put ourselves on wait queue
1066 #endif
1068 dma_unmap_sg(NULL, ape->sgm->sgl, ape->sgm->mapped_pages, DMA_TO_DEVICE);
1069 /* dirty and free the pages */
1070 sgm_unmap_user_pages(ape->sgm, 1/*dirtied*/);
1071 /* book keeping */
1072 transfer_addr += transfer_len;
1073 remaining -= transfer_len;
1074 done += transfer_len;
1076 return done;
1080 * character device file operations
1082 static const struct file_operations sg_fops = {
1083 .owner = THIS_MODULE,
1084 .open = sg_open,
1085 .release = sg_close,
1086 .read = sg_read,
1087 .write = sg_write,
1090 /* sg_init() - Initialize character device
1092 * XXX Should ideally be tied to the device, on device probe, not module init.
1094 static int sg_init(struct ape_dev *ape)
1096 int rc;
1097 printk(KERN_DEBUG DRV_NAME " sg_init()\n");
1098 /* allocate a dynamically allocated character device node */
1099 rc = alloc_chrdev_region(&ape->cdevno, 0/*requested minor*/, 1/*count*/, DRV_NAME);
1100 /* allocation failed? */
1101 if (rc < 0) {
1102 printk("alloc_chrdev_region() = %d\n", rc);
1103 goto fail_alloc;
1105 /* couple the device file operations to the character device */
1106 cdev_init(&ape->cdev, &sg_fops);
1107 ape->cdev.owner = THIS_MODULE;
1108 /* bring character device live */
1109 rc = cdev_add(&ape->cdev, ape->cdevno, 1/*count*/);
1110 if (rc < 0) {
1111 printk("cdev_add() = %d\n", rc);
1112 goto fail_add;
1114 printk(KERN_DEBUG "altpciechdma = %d:%d\n", MAJOR(ape->cdevno), MINOR(ape->cdevno));
1115 return 0;
1116 fail_add:
1117 /* free the dynamically allocated character device node */
1118 unregister_chrdev_region(ape->cdevno, 1/*count*/);
1119 fail_alloc:
1120 return -1;
1123 /* sg_exit() - Cleanup character device
1125 * XXX Should ideally be tied to the device, on device remove, not module exit.
1128 static void sg_exit(struct ape_dev *ape)
1130 printk(KERN_DEBUG DRV_NAME " sg_exit()\n");
1131 /* remove the character device */
1132 cdev_del(&ape->cdev);
1133 /* free the dynamically allocated character device node */
1134 unregister_chrdev_region(ape->cdevno, 1/*count*/);
1137 #endif /* ALTPCIECHDMA_CDEV */
1139 /* used to register the driver with the PCI kernel sub system
1140 * @see LDD3 page 311
1142 static struct pci_driver pci_driver = {
1143 .name = DRV_NAME,
1144 .id_table = ids,
1145 .probe = probe,
1146 .remove = __devexit_p(remove),
1147 /* resume, suspend are optional */
1151 * alterapciechdma_init() - Module initialization, registers devices.
1153 static int __init alterapciechdma_init(void)
1155 int rc = 0;
1156 printk(KERN_DEBUG DRV_NAME " init(), built at " __DATE__ " " __TIME__ "\n");
1157 /* register this driver with the PCI bus driver */
1158 rc = pci_register_driver(&pci_driver);
1159 if (rc < 0)
1160 return rc;
1161 return 0;
1165 * alterapciechdma_init() - Module cleanup, unregisters devices.
1167 static void __exit alterapciechdma_exit(void)
1169 printk(KERN_DEBUG DRV_NAME " exit(), built at " __DATE__ " " __TIME__ "\n");
1170 /* unregister this driver from the PCI bus driver */
1171 pci_unregister_driver(&pci_driver);
1174 MODULE_LICENSE("GPL");
1176 module_init(alterapciechdma_init);
1177 module_exit(alterapciechdma_exit);