2 Copyright © 2014, The AROS Development Team. All rights reserved.
9 #include <hardware/sun4i/ccm.h>
10 #include <hardware/sun4i/pio.h>
11 #include <hardware/sun4i/uart.h>
12 #include <hardware/sun4i/dram.h>
13 #include <hardware/sun4i/tmr.h>
16 #include <asm/arm/mmu.h>
17 #include <asm/arm/cp15.h>
23 #define clrbits(addr, clear) addr = (addr & ~(clear))
24 #define setbits(addr, set) addr = (addr | (set))
25 #define clrsetbits(addr, clear, set) addr = ((addr & ~(clear)) | (set))
27 #define PLL_SETTLING_DELAY 100000 /* CPU is running on 24MHz clock when this gets used */
30 * FIXME: Use timer0, it's so simple
32 void asmdelay(uint32_t t
) {
33 asm volatile ("1: \n" \
34 " subs %0, %1, #1 \n" \
35 " bne 1b ":"=r" (t
):"0"(t
));
38 void kprintf(const char *format
, ...);
40 void bootstrapS(void);
42 " .globl bootstrapS \n"
43 " .type bootstrapS,%function \n"
45 "bootstrapS: ldr sp, =(0x8000-0x1000) \n"
49 void __attribute__((noreturn
)) bootstrapC(void) {
51 uint32_t CPU_CFG_CHIP_REV
;
53 /* Disable MMU, level one data cache and strict alignment fault checking */
54 CP15_C1CR_Clear(C1CRF_C
|C1CRF_A
|C1CRF_M
);
55 /* Enable level one data cache */
56 CP15_C1CR_Set(C1CRF_C
);
57 /* Set cp10 and cp11 for Privileged and User mode access */
58 CP15_C1CACR_All(C1CACRV_CPAP(10)|C1CACRV_CPAP(11));
59 /* Enable VFP (and NEON in our case) */
60 fmxr(cr8
, fmrx(cr8
) | 1 << 30);
63 * PLL1 output for CPU = (24MHz*N*K)/(M*P)
67 * PLL1_P = (0)=1 (1/2/4/8 or the exponent of 2 in other words)
69 PLL1_CFG
= 0xa1005000;
72 * PLL5 output for DDR = (24MHz*N*K)/M
73 * PLL5 output for others = (24MHz*N*K)/P
75 * pcDuino has four(4) Hynix DDR3 chips, U2(d0-d7), U3(d8-d15), U10(d16-d23) and U11(d24-d31) in one rank
76 * (pcDuino 408MHz, H5TQ2G83EFR-PBC 250K 4 x (256M x 8), https://www.skhynix.com/inc/pdfDownload.jsp?path=/datasheet/pdf/dram/Computing_DDR3_H5TQ2G4%288%293EFR%28Rev1.1%29.pdf)
80 * PLL5_N = (17)=17 (408MHz/24MHz)
81 * PLL5_P = (0)=1 (1/2/4/8 or the exponent of 2 in other words)
83 * PLL5 bypass disabled, PLL5 enabled and DDR clk out disabled
85 * Writes over some unknown bits, bad bad... but seems to work :)
89 //PLL5_CFG = 0x91059191; // 408MHz FIXME: document bits that are set and make it a macro
90 //PLL5_CFG = 0x91058091|(17<<8); // 408MHz DRAM N=17
91 //PLL5_CFG = 0x91058091|(18<<8); // 432MHz DRAM N=18
92 //PLL5_CFG = 0x91058091|(19<<8); // 456MHz DRAM N=19
93 //PLL5_CFG = 0x91048091|(20<<8)|(1<<17); // 480MHz DRAM N=20 P=2
94 //PLL5_CFG = 0x91048091|(24<<8)|(1<<17); // 576MHz DRAM N=24 P=2
95 PLL5_CFG
= 0x91048091|(25<<8)|(1<<17); // 600MHz DRAM N=24 P=2
97 * Setup APB1 clock and open the gate for UART0 clock, clear others
99 APB1_CLK_DIV_CFG
= (APB1_CLK_SRC_OSC24M
<< 24 | APB1_FACTOR_N_1
<< 16 | APB1_FACTOR_M_1
<< 0);
100 APB1_GATE
= (0x1<<16);
103 * Setup IO pins for UART0
105 PIO_CFG2_REG(PB
) = (PIO_CFG2_REG(PB
) & ~(0b01110111000000000000000000000000)) | 0b00100010000000000000000000000000;
108 * Setup UART0 (115200, 8bits)
111 while(UART0_USR
& 1);
112 UART0_LCR
= (UART0_LCR
| (1<<7));
113 UART0_DLL
= (13>>0) & 0xff;
114 UART0_DLH
= (13>>8) & 0xff;
115 UART0_LCR
= (UART0_LCR
& ~(1<<7));
123 * PLL clocks need time to lock on (or settle on a frequency), set both PLL clocks and then delay for both of them.
124 * It would be nice if there was a PLL lock bit to monitor.
125 * "Also, once the DLH is set, at least 8 clock cycles of the slowest UART clock should be allowed to pass before transmitting or receiving data."
127 asmdelay(PLL_SETTLING_DELAY
);
130 * Set CPU to use PLL1
132 CPU_AHB_APB0_CFG
= (AXI_DIV_1
<< 0 | AHB_DIV_2
<< 4 | APB0_DIV_1
<< 8 | CPU_CLK_SRC_PLL1
<< 16);
135 * Get chip revision. Clear the register first and see what pops up again or are read only.
136 * Apparently revision A has some bits inverted.
139 CPU_CFG_CHIP_REV
= ((TIMER_CPU_CFG
>>6) & 0b11);
142 * Allwinner user manual says "If the clock source is changed, at most to wait for 8 present running clock cycles"
146 kprintf("Copyright (c)2014, The AROS Development Team. All rights reserved.\n\n");
148 kprintf("Allwinner A10 revision ");
149 switch(CPU_CFG_CHIP_REV
) {
150 case TIMER_CPU_CFG_CHIP_REV_A
:
153 case TIMER_CPU_CFG_CHIP_REV_C1
:
156 case TIMER_CPU_CFG_CHIP_REV_C2
:
159 case TIMER_CPU_CFG_CHIP_REV_B
:
163 kprintf("unknown\n\n");
170 - Move register definitions to header files
172 - Boot0 header that MKSUNXIBOOT creates can be truncated a bit (or can it?)
173 - Boot0 header will contain UARTDEBUG register address (and used GPIO pins same as on other operating systems)
174 - Boot0 header should be modifiable from Aros installer (Poseidon Sunxi class in case the destination is NAND)
176 - Our bootstrap code should enable DRAM and check it's size, maximum of 2Gb DRAM available on sun4i
177 - Boot0 header defines also the used DRAM type and its timing values etc. (DDR2 or DDR3), can it be automated to "safe" defaults?
178 - DDR3 memory chips use ODT (on die termination)
179 - Allwinner A10 has two(2) such pins, ODT0 and ODT1
180 - Unsure if PMU (power management unit) is needed before DRAM can be initialized
182 - Bootstrap code should enable NAND (in case the code is started from NAND it might be already setup by the BROM code)
183 - Bootstrap code should enable MMC access (in case the code is started from MMC it might be already setup by the BROM code)
184 - Bootstrap code will need a read only filesystem (FAT or SFS)
186 - Bootstrap code setups MMU
187 - Bootstrap code reads Aros module file list and loads and relocates ELF modules to DRAM
188 - Modules are loaded from NAND or MMC, depending on boot priority and presense of MMC card
190 - Bootstrap code jumps to Aros kernel and passes on information
193 uint32_t PLL1_P
, PLL1_N
, PLL1_K
, PLL1_M
, CPU_CLK
;
195 PLL1_M
= ((PLL1_CFG
>> 0) & 0x3) + 1;
196 PLL1_K
= ((PLL1_CFG
>> 4) & 0x3) + 1;
197 PLL1_N
= (PLL1_CFG
>> 8) & 0x1f;
198 PLL1_P
= (PLL1_CFG
>> 16) & 0x3;
199 PLL1_P
= (1<<PLL1_P
);
201 CPU_CLK
= ((24*PLL1_N
*PLL1_K
)/(PLL1_M
*PLL1_P
));
203 kprintf("Bootstrap CPU clock is %uMHz\n", CPU_CLK
);
206 * Bits are identical to PLL1
208 uint32_t PLL5_P
, PLL5_N
, PLL5_K
, PLL5_M
, DRAM_CLK
, PLL5_CLK
;
210 PLL5_M
= ((PLL5_CFG
>> 0) & 0x3) + 1;
211 PLL5_K
= ((PLL5_CFG
>> 4) & 0x3) + 1;
212 PLL5_N
= (PLL5_CFG
>> 8) & 0x1f;
213 PLL5_P
= (PLL5_CFG
>> 16) & 0x3;
214 PLL5_P
= (1<<PLL5_P
);
216 DRAM_CLK
= ((24*PLL5_N
*PLL5_K
)/PLL5_M
);
217 PLL5_CLK
= ((24*PLL5_N
*PLL5_K
)/PLL5_P
);
219 kprintf("Bootstrap DDR3 clock is %uMHz (for others PLL5 clock is %uMHz)\n\n", DRAM_CLK
, PLL5_CLK
);
221 kprintf("PLL5_CFG = %x\n", PLL5_CFG
);
223 struct parameters_ddr3
*parameter_ddr3
= (struct parameters_ddr3
*)platform_ddr3
;
224 kprintf("tAA %uns\n", parameter_ddr3
->tAA
);
225 kprintf("tRCD %uns\n", parameter_ddr3
->tRCD
);
226 kprintf("tRP %uns\n", parameter_ddr3
->tRP
);
227 kprintf("tRC %uns\n", parameter_ddr3
->tRC
);
228 kprintf("tRAS %uns\n", parameter_ddr3
->tRAS
);
229 kprintf("tRFC %uns\n", parameter_ddr3
->tRFC
);
230 kprintf("tREFI %uns\n", parameter_ddr3
->tREFI
);
232 /* DDR3 setup [start] - minus PLL5 clock for SDRAM */
237 setbits(PLL5_CFG
, 0x1<<29);
239 //MBUS_CLK_CFG = 0x82000001;
244 clrbits(AHB_GATE0
, 0x1<<14);
246 setbits(AHB_GATE0
, 0x1<<14);
249 if (CPU_CFG_CHIP_REV
!= 0) {
250 setbits(DRAM_MCR
, 0x1<<12);
252 clrbits(DRAM_MCR
, 0x1<<12);
254 clrbits(DRAM_MCR
, 0x1<<12);
256 setbits(DRAM_MCR
, 0x1<<12);
259 clrsetbits(DRAM_MCR
, 0x3, (0x6<<12) | 0xffc);
264 clrbits(DRAM_CLK_CFG
, 0x1<<15);
267 * DRAM controller needs to be kicked with a magic word
269 DRAM_CSEL
= SUN4I_DRAM_MAGIC1
;
271 setbits(DRAM_CCR
, 0x1<<28);
276 clrsetbits(DRAM_DLLCR0
, 0x1<<30, 0x1<<31);
277 clrsetbits(DRAM_DLLCR1
, 0x1<<30, 0x1<<31);
278 clrsetbits(DRAM_DLLCR2
, 0x1<<30, 0x1<<31);
279 clrsetbits(DRAM_DLLCR3
, 0x1<<30, 0x1<<31);
280 clrsetbits(DRAM_DLLCR4
, 0x1<<30, 0x1<<31);
282 clrbits(DRAM_DLLCR0
, 0x3<<30);
283 clrbits(DRAM_DLLCR1
, 0x3<<30);
284 clrbits(DRAM_DLLCR2
, 0x3<<30);
285 clrbits(DRAM_DLLCR3
, 0x3<<30);
286 clrbits(DRAM_DLLCR4
, 0x3<<30);
288 clrsetbits(DRAM_DLLCR0
, 0x1<<31, 0x1<<30);
289 clrsetbits(DRAM_DLLCR1
, 0x1<<31, 0x1<<30);
290 clrsetbits(DRAM_DLLCR2
, 0x1<<31, 0x1<<30);
291 clrsetbits(DRAM_DLLCR3
, 0x1<<31, 0x1<<30);
292 clrsetbits(DRAM_DLLCR4
, 0x1<<31, 0x1<<30);
296 * Configure DRAM (specific for pcDuino)
301 * Chip density: 2048 mebibits
304 DRAM_DCR
= 0x000030db;
309 setbits(DRAM_CLK_CFG
, 0x1<<15);
312 while (DRAM_CCR
& (0x1 << 31));
315 * Set ODT impedance divide ratio
317 DRAM_ZQCR0
= 0x07b00000;
320 * Set IO configuration register
322 DRAM_IOCR
= 0x00cc0000;
325 * Compute refresh interval
326 * tREFI is 7.8us from Hynix datasheet (normal temperature range, 3.9us for extended range)
327 * -> We need to convert it to our DDR3 clock ticks (nREFI)
329 * -> 7800ns/(DDR3_clk_period) = 7800nS*DDR3_clk
330 * -> DDR3_clk is already known so we use that
331 * -> nREFI = (7800nS*480MHz)/1000 = (7.8uS*480MHz) = 3744 ticks
333 * tRFC is given as clock ticks in Hynix datasheet (nRFC)
334 * nRFC for 2Gb DDR3-1066 is 86 ticks
335 * -> 1/(1066MHz/2) = 0.00187617 or so seconds for the clock period
336 * -> With 86 ticks this gives tRFC a value of 0.16135 seconds or so (161.35ms)
337 * -> If we multiply this with the DDR3_clk frequency of the original 533MHz clock we should arrive at the same tick count of 86
338 * -> (161.35ms*533MHz)/1000 = 86
340 * -> Instead we need to calculate the correct tic count for our DDR3 clock (480MHz for now)
341 * -> (161.35ms*480MHz)/1000 = 77 ticks = nRFC
342 * -> Or more simply ((86/533MHz) = (nRFC/480MHz)) or nRFC = ((86/533MHz)*480MHz)
344 * DDR3_numr = Number of posted refreshes 0-8 (0=1) set it to 8 for now
348 uint32_t temp
, DDR3_nREFI
, DDR3_nRFC
, DDR3_nRFPRD
;
350 DDR3_nREFI
= ((parameter_ddr3
->tREFI
*DRAM_CLK
)/1000);
351 DDR3_nRFC
= ((parameter_ddr3
->tRFC
*DRAM_CLK
)/1000);
352 DDR3_nRFPRD
= ((DDR3_nREFI
*(DDR3_numr
+1))-200);
355 kprintf("DRAM_DRR %x\n", temp
);
357 kprintf("DDR3_nREFI %u\n", DDR3_nREFI
);
358 kprintf("DDR3_nRFC %u\n", DDR3_nRFC
);
359 kprintf("DDR3_nRFPRD %u\n", DDR3_nRFPRD
);
361 DRAM_DRR
= ((1<<31) | (DDR3_numr
<<24) | ((DDR3_nRFPRD
)<<8) | (DDR3_nRFC
));
364 kprintf("DRAM_DRR %x\n", temp
);
367 * Computed DRAM_DRR = 0x886f1642, CHECKME:
374 Boostrap debug output:
376 Copyright (c)2014, The AROS Development Team. All rights reserved.
378 Allwinner A10 revision C2
380 Bootstrap CPU clock is 384MHz
381 Bootstrap DDR3 clock is 600MHz (for others PLL5 clock is 300MHz)
383 PLL5_CFG = 0x91069991
402 DDR3 data training succesful!
406 uint32_t DDR3_nRC
, DDR3_nRAS
, DDR3_nRCD
, DDR3_nRP
;
408 DDR3_nRC
= ((parameter_ddr3
->tRC
*DRAM_CLK
)/1000);
409 DDR3_nRAS
= ((parameter_ddr3
->tRAS
*DRAM_CLK
)/1000);
410 DDR3_nRCD
= ((parameter_ddr3
->tRCD
*DRAM_CLK
)/1000);
411 DDR3_nRP
= ((parameter_ddr3
->tRP
*DRAM_CLK
)/1000);
413 kprintf("DDR3_nRC %u\n", DDR3_nRC
);
414 kprintf("DDR3_nRAS %u\n", DDR3_nRAS
);
415 kprintf("DDR3_nRCD %u\n", DDR3_nRCD
);
416 kprintf("DDR3_nRP %u\n", DDR3_nRP
);
419 * Set timing parameters
421 DRAM_TPR0
= (0x00800092 | (DDR3_nRC
<<25) | (DDR3_nRAS
<<16) | (DDR3_nRCD
<<12) | (DDR3_nRP
<<8));
422 DRAM_TPR1
= 0x00001090;
423 DRAM_TPR2
= 0x0001a0c8;
426 kprintf("DRAM_TPR0 %x\n", temp
);
430 nCL
= ((parameter_ddr3
->tCL
*DRAM_CLK
)/1000);
431 kprintf("DDR3_nCL = %d\n", nCL
);
434 * Controller adds 4 extra cycles(or does it?), adjust the value
436 DRAM_MR
= (((nCL
-4)<<4) | (0x5<<9));
438 DRAM_EMR
= 0x00000004;
439 DRAM_EMR2
= 0x00000000;
440 DRAM_EMR3
= 0x00000000;
443 * Set DQS window mode
445 clrsetbits(DRAM_CCR
, 0x1<<17, 0x1<<14);
447 /* reset external DRAM */
448 setbits(DRAM_CCR
, 0x1<<31);
449 while (DRAM_CCR
& (0x1<<31));
451 clrbits(DRAM_CCR
, 0x1<<28);
454 * Trigger the data training and wait it to finish
456 setbits(DRAM_CCR
, 0x1<<30);
457 while (DRAM_CCR
& (0x1<<30));
462 if(DRAM_CSR
& (0x1<<20)) {
463 kprintf("DDR3 data training failed!\n");
466 kprintf("DDR3 data training succesful!\n");
470 * Host port access and priority (USB, CPU, GPU etc.)
488 DRAM_HPCR16
= 0x1031;
489 DRAM_HPCR17
= 0x1031;
490 DRAM_HPCR18
= 0x0735;
491 DRAM_HPCR19
= 0x1035;
492 DRAM_HPCR20
= 0x1035;
493 DRAM_HPCR21
= 0x0731;
494 DRAM_HPCR22
= 0x1031;
495 DRAM_HPCR23
= 0x0735;
496 DRAM_HPCR24
= 0x1035;
497 DRAM_HPCR25
= 0x1031;
498 DRAM_HPCR26
= 0x0731;
499 DRAM_HPCR27
= 0x1035;
500 DRAM_HPCR28
= 0x1031;
501 DRAM_HPCR29
= 0x0301;
502 DRAM_HPCR30
= 0x0301;
503 DRAM_HPCR31
= 0x0731;
505 /* DDR3 setup [end] */
509 uint32_t t
, x
, y
, z
, w
;
518 for(i
=0; i
<1024*1024; i
++) {
521 w
= w
^ (w
>> 19) ^ t
^ (t
>> 8);
533 for(i
=0; i
<1024*1024; i
++) {
536 w
= w
^ (w
>> 19) ^ t
^ (t
>> 8);
538 kprintf("%x = %x (should be %x)\n", &a
[i
], a
[i
], w
);
544 * pcDuino uses CARD0 interface in SD card mode (PF io pins) and PH1 as card detect switch input with pull up resistor
545 * For generic bootstrap we will need information stored for used DEBUGUART and SD-card interface
547 PIO_CFG0_REG(PH
) = (PIO_CFG2_REG(PB
) & ~(0b00000000000000000000000001110000)) | 0b00000000000000000000000000000000;
549 BOOL cardinserted
= FALSE
;
551 if(!(PIO_DATA_REG(PH
) & 0b10)) {
552 kprintf("SD card presence detected\n");
557 if((PIO_DATA_REG(PH
) & 0b10) && (cardinserted
)) {
558 kprintf("SD card removed\n");
559 cardinserted
= FALSE
;
562 if(!(PIO_DATA_REG(PH
) & 0b10) && !(cardinserted
)) {
563 kprintf("SD card inserted\n");