Sorted and grouped tests.
[AROS.git] / arch / arm-sun4i / bootstrap / bootstrap.c
blob873b24844aaaa39e4ddbe99331d1abd23bef5b2b
1 /*
2 Copyright © 2014, The AROS Development Team. All rights reserved.
3 $Id$
5 Desc:
6 Lang: english
7 */
9 #include <hardware/sun4i/ccm.h>
10 #include <hardware/sun4i/pio.h>
11 #include <hardware/sun4i/uart.h>
12 #include <hardware/sun4i/dram.h>
13 #include <hardware/sun4i/tmr.h>
15 #include <asm/cpu.h>
16 #include <asm/arm/mmu.h>
17 #include <asm/arm/cp15.h>
19 #include "platform.h"
21 #include <stdio.h>
23 #define clrbits(addr, clear) addr = (addr & ~(clear))
24 #define setbits(addr, set) addr = (addr | (set))
25 #define clrsetbits(addr, clear, set) addr = ((addr & ~(clear)) | (set))
27 #define PLL_SETTLING_DELAY 100000 /* CPU is running on 24MHz clock when this gets used */
30 * FIXME: Use timer0, it's so simple
32 void asmdelay(uint32_t t) {
33 asm volatile ("1: \n" \
34 " subs %0, %1, #1 \n" \
35 " bne 1b ":"=r" (t):"0"(t));
38 void kprintf(const char *format, ...);
40 void bootstrapS(void);
41 asm(" .text \n"
42 " .globl bootstrapS \n"
43 " .type bootstrapS,%function \n"
44 " \n"
45 "bootstrapS: ldr sp, =(0x8000-0x1000) \n"
46 " b bootstrapC \n"
47 " \n");
49 void __attribute__((noreturn)) bootstrapC(void) {
51 uint32_t CPU_CFG_CHIP_REV;
53 /* Disable MMU, level one data cache and strict alignment fault checking */
54 CP15_C1CR_Clear(C1CRF_C|C1CRF_A|C1CRF_M);
55 /* Enable level one data cache */
56 CP15_C1CR_Set(C1CRF_C);
57 /* Set cp10 and cp11 for Privileged and User mode access */
58 CP15_C1CACR_All(C1CACRV_CPAP(10)|C1CACRV_CPAP(11));
59 /* Enable VFP (and NEON in our case) */
60 fmxr(cr8, fmrx(cr8) | 1 << 30);
63 * PLL1 output for CPU = (24MHz*N*K)/(M*P)
64 * PLL1_M = (0)=1
65 * PLL1_K = (0)=1
66 * PLL1_N = (16)=16
67 * PLL1_P = (0)=1 (1/2/4/8 or the exponent of 2 in other words)
69 PLL1_CFG = 0xa1005000;
72 * PLL5 output for DDR = (24MHz*N*K)/M
73 * PLL5 output for others = (24MHz*N*K)/P
75 * pcDuino has four(4) Hynix DDR3 chips, U2(d0-d7), U3(d8-d15), U10(d16-d23) and U11(d24-d31) in one rank
76 * (pcDuino 408MHz, H5TQ2G83EFR-PBC 250K 4 x (256M x 8), https://www.skhynix.com/inc/pdfDownload.jsp?path=/datasheet/pdf/dram/Computing_DDR3_H5TQ2G4%288%293EFR%28Rev1.1%29.pdf)
78 * PLL5_M = (0)=1
79 * PLL5_K = (0)=1
80 * PLL5_N = (17)=17 (408MHz/24MHz)
81 * PLL5_P = (0)=1 (1/2/4/8 or the exponent of 2 in other words)
83 * PLL5 bypass disabled, PLL5 enabled and DDR clk out disabled
85 * Writes over some unknown bits, bad bad... but seems to work :)
89 //PLL5_CFG = 0x91059191; // 408MHz FIXME: document bits that are set and make it a macro
90 //PLL5_CFG = 0x91058091|(17<<8); // 408MHz DRAM N=17
91 //PLL5_CFG = 0x91058091|(18<<8); // 432MHz DRAM N=18
92 //PLL5_CFG = 0x91058091|(19<<8); // 456MHz DRAM N=19
93 //PLL5_CFG = 0x91048091|(20<<8)|(1<<17); // 480MHz DRAM N=20 P=2
94 //PLL5_CFG = 0x91048091|(24<<8)|(1<<17); // 576MHz DRAM N=24 P=2
95 PLL5_CFG = 0x91048091|(25<<8)|(1<<17); // 600MHz DRAM N=24 P=2
97 * Setup APB1 clock and open the gate for UART0 clock, clear others
99 APB1_CLK_DIV_CFG = (APB1_CLK_SRC_OSC24M << 24 | APB1_FACTOR_N_1 << 16 | APB1_FACTOR_M_1 << 0);
100 APB1_GATE = (0x1<<16);
103 * Setup IO pins for UART0
105 PIO_CFG2_REG(PB) = (PIO_CFG2_REG(PB) & ~(0b01110111000000000000000000000000)) | 0b00100010000000000000000000000000;
108 * Setup UART0 (115200, 8bits)
111 while(UART0_USR & 1);
112 UART0_LCR = (UART0_LCR | (1<<7));
113 UART0_DLL = (13>>0) & 0xff;
114 UART0_DLH = (13>>8) & 0xff;
115 UART0_LCR = (UART0_LCR & ~(1<<7));
116 UART0_HLT = 0;
117 UART0_LCR = 3;
118 UART0_MCR = 0;
119 UART0_IER = 0;
120 UART0_FCR = 6;
123 * PLL clocks need time to lock on (or settle on a frequency), set both PLL clocks and then delay for both of them.
124 * It would be nice if there was a PLL lock bit to monitor.
125 * "Also, once the DLH is set, at least 8 clock cycles of the slowest UART clock should be allowed to pass before transmitting or receiving data."
127 asmdelay(PLL_SETTLING_DELAY);
130 * Set CPU to use PLL1
132 CPU_AHB_APB0_CFG = (AXI_DIV_1 << 0 | AHB_DIV_2 << 4 | APB0_DIV_1 << 8 | CPU_CLK_SRC_PLL1 << 16);
135 * Get chip revision. Clear the register first and see what pops up again or are read only.
136 * Apparently revision A has some bits inverted.
138 TIMER_CPU_CFG = 0;
139 CPU_CFG_CHIP_REV = ((TIMER_CPU_CFG>>6) & 0b11);
142 * Allwinner user manual says "If the clock source is changed, at most to wait for 8 present running clock cycles"
144 asmdelay(1000);
146 kprintf("Copyright (c)2014, The AROS Development Team. All rights reserved.\n\n");
148 kprintf("Allwinner A10 revision ");
149 switch(CPU_CFG_CHIP_REV) {
150 case TIMER_CPU_CFG_CHIP_REV_A:
151 kprintf("A\n\n");
152 break;
153 case TIMER_CPU_CFG_CHIP_REV_C1:
154 kprintf("C1\n\n");
155 break;
156 case TIMER_CPU_CFG_CHIP_REV_C2:
157 kprintf("C2\n\n");
158 break;
159 case TIMER_CPU_CFG_CHIP_REV_B:
160 kprintf("B\n\n");
161 break;
162 default:
163 kprintf("unknown\n\n");
164 break;
168 Plan of attack:
170 - Move register definitions to header files
172 - Boot0 header that MKSUNXIBOOT creates can be truncated a bit (or can it?)
173 - Boot0 header will contain UARTDEBUG register address (and used GPIO pins same as on other operating systems)
174 - Boot0 header should be modifiable from Aros installer (Poseidon Sunxi class in case the destination is NAND)
176 - Our bootstrap code should enable DRAM and check it's size, maximum of 2Gb DRAM available on sun4i
177 - Boot0 header defines also the used DRAM type and its timing values etc. (DDR2 or DDR3), can it be automated to "safe" defaults?
178 - DDR3 memory chips use ODT (on die termination)
179 - Allwinner A10 has two(2) such pins, ODT0 and ODT1
180 - Unsure if PMU (power management unit) is needed before DRAM can be initialized
182 - Bootstrap code should enable NAND (in case the code is started from NAND it might be already setup by the BROM code)
183 - Bootstrap code should enable MMC access (in case the code is started from MMC it might be already setup by the BROM code)
184 - Bootstrap code will need a read only filesystem (FAT or SFS)
186 - Bootstrap code setups MMU
187 - Bootstrap code reads Aros module file list and loads and relocates ELF modules to DRAM
188 - Modules are loaded from NAND or MMC, depending on boot priority and presense of MMC card
190 - Bootstrap code jumps to Aros kernel and passes on information
193 uint32_t PLL1_P, PLL1_N, PLL1_K, PLL1_M, CPU_CLK;
195 PLL1_M = ((PLL1_CFG >> 0) & 0x3) + 1;
196 PLL1_K = ((PLL1_CFG >> 4) & 0x3) + 1;
197 PLL1_N = (PLL1_CFG >> 8) & 0x1f;
198 PLL1_P = (PLL1_CFG >> 16) & 0x3;
199 PLL1_P = (1<<PLL1_P);
201 CPU_CLK = ((24*PLL1_N*PLL1_K)/(PLL1_M*PLL1_P));
203 kprintf("Bootstrap CPU clock is %uMHz\n", CPU_CLK);
206 * Bits are identical to PLL1
208 uint32_t PLL5_P, PLL5_N, PLL5_K, PLL5_M, DRAM_CLK, PLL5_CLK;
210 PLL5_M = ((PLL5_CFG >> 0) & 0x3) + 1;
211 PLL5_K = ((PLL5_CFG >> 4) & 0x3) + 1;
212 PLL5_N = (PLL5_CFG >> 8) & 0x1f;
213 PLL5_P = (PLL5_CFG >> 16) & 0x3;
214 PLL5_P = (1<<PLL5_P);
216 DRAM_CLK = ((24*PLL5_N*PLL5_K)/PLL5_M);
217 PLL5_CLK = ((24*PLL5_N*PLL5_K)/PLL5_P);
219 kprintf("Bootstrap DDR3 clock is %uMHz (for others PLL5 clock is %uMHz)\n\n", DRAM_CLK, PLL5_CLK);
221 kprintf("PLL5_CFG = %x\n", PLL5_CFG);
223 struct parameters_ddr3 *parameter_ddr3 = (struct parameters_ddr3 *)platform_ddr3;
224 kprintf("tAA %uns\n", parameter_ddr3->tAA);
225 kprintf("tRCD %uns\n", parameter_ddr3->tRCD);
226 kprintf("tRP %uns\n", parameter_ddr3->tRP);
227 kprintf("tRC %uns\n", parameter_ddr3->tRC);
228 kprintf("tRAS %uns\n", parameter_ddr3->tRAS);
229 kprintf("tRFC %uns\n", parameter_ddr3->tRFC);
230 kprintf("tREFI %uns\n", parameter_ddr3->tREFI);
232 /* DDR3 setup [start] - minus PLL5 clock for SDRAM */
235 * Enable DDR_CLK_OUT
237 setbits(PLL5_CFG, 0x1<<29);
239 //MBUS_CLK_CFG = 0x82000001;
242 * Open DRAM gate
244 clrbits(AHB_GATE0, 0x1<<14);
245 asmdelay(0x1000);
246 setbits(AHB_GATE0, 0x1<<14);
247 asmdelay(0x1000);
249 if (CPU_CFG_CHIP_REV != 0) {
250 setbits(DRAM_MCR, 0x1<<12);
251 asmdelay(0x100);
252 clrbits(DRAM_MCR, 0x1<<12);
253 } else {
254 clrbits(DRAM_MCR, 0x1<<12);
255 asmdelay(0x100);
256 setbits(DRAM_MCR, 0x1<<12);
259 clrsetbits(DRAM_MCR, 0x3, (0x6<<12) | 0xffc);
262 * DRAM clock off
264 clrbits(DRAM_CLK_CFG, 0x1<<15);
267 * DRAM controller needs to be kicked with a magic word
269 DRAM_CSEL = SUN4I_DRAM_MAGIC1;
271 setbits(DRAM_CCR, 0x1<<28);
274 * Enable DLL's
276 clrsetbits(DRAM_DLLCR0, 0x1<<30, 0x1<<31);
277 clrsetbits(DRAM_DLLCR1, 0x1<<30, 0x1<<31);
278 clrsetbits(DRAM_DLLCR2, 0x1<<30, 0x1<<31);
279 clrsetbits(DRAM_DLLCR3, 0x1<<30, 0x1<<31);
280 clrsetbits(DRAM_DLLCR4, 0x1<<30, 0x1<<31);
281 asmdelay(0x100);
282 clrbits(DRAM_DLLCR0, 0x3<<30);
283 clrbits(DRAM_DLLCR1, 0x3<<30);
284 clrbits(DRAM_DLLCR2, 0x3<<30);
285 clrbits(DRAM_DLLCR3, 0x3<<30);
286 clrbits(DRAM_DLLCR4, 0x3<<30);
287 asmdelay(0x1000);
288 clrsetbits(DRAM_DLLCR0, 0x1<<31, 0x1<<30);
289 clrsetbits(DRAM_DLLCR1, 0x1<<31, 0x1<<30);
290 clrsetbits(DRAM_DLLCR2, 0x1<<31, 0x1<<30);
291 clrsetbits(DRAM_DLLCR3, 0x1<<31, 0x1<<30);
292 clrsetbits(DRAM_DLLCR4, 0x1<<31, 0x1<<30);
293 asmdelay(0x1000);
296 * Configure DRAM (specific for pcDuino)
298 * Type: DDR3
299 * Bus data width: 32
300 * Chip data width: 8
301 * Chip density: 2048 mebibits
302 * Ranks: 1
304 DRAM_DCR = 0x000030db;
307 * DRAM clock on
309 setbits(DRAM_CLK_CFG, 0x1<<15);
310 asmdelay(0x10);
312 while (DRAM_CCR & (0x1 << 31));
315 * Set ODT impedance divide ratio
317 DRAM_ZQCR0 = 0x07b00000;
320 * Set IO configuration register
322 DRAM_IOCR = 0x00cc0000;
325 * Compute refresh interval
326 * tREFI is 7.8us from Hynix datasheet (normal temperature range, 3.9us for extended range)
327 * -> We need to convert it to our DDR3 clock ticks (nREFI)
328 * -> 7.8us = 7800ns
329 * -> 7800ns/(DDR3_clk_period) = 7800nS*DDR3_clk
330 * -> DDR3_clk is already known so we use that
331 * -> nREFI = (7800nS*480MHz)/1000 = (7.8uS*480MHz) = 3744 ticks
333 * tRFC is given as clock ticks in Hynix datasheet (nRFC)
334 * nRFC for 2Gb DDR3-1066 is 86 ticks
335 * -> 1/(1066MHz/2) = 0.00187617 or so seconds for the clock period
336 * -> With 86 ticks this gives tRFC a value of 0.16135 seconds or so (161.35ms)
337 * -> If we multiply this with the DDR3_clk frequency of the original 533MHz clock we should arrive at the same tick count of 86
338 * -> (161.35ms*533MHz)/1000 = 86
340 * -> Instead we need to calculate the correct tic count for our DDR3 clock (480MHz for now)
341 * -> (161.35ms*480MHz)/1000 = 77 ticks = nRFC
342 * -> Or more simply ((86/533MHz) = (nRFC/480MHz)) or nRFC = ((86/533MHz)*480MHz)
344 * DDR3_numr = Number of posted refreshes 0-8 (0=1) set it to 8 for now
346 #define DDR3_numr 1
348 uint32_t temp, DDR3_nREFI, DDR3_nRFC, DDR3_nRFPRD;
350 DDR3_nREFI = ((parameter_ddr3->tREFI*DRAM_CLK)/1000);
351 DDR3_nRFC = ((parameter_ddr3->tRFC*DRAM_CLK)/1000);
352 DDR3_nRFPRD = ((DDR3_nREFI*(DDR3_numr+1))-200);
354 temp = DRAM_DRR;
355 kprintf("DRAM_DRR %x\n", temp);
357 kprintf("DDR3_nREFI %u\n", DDR3_nREFI);
358 kprintf("DDR3_nRFC %u\n", DDR3_nRFC);
359 kprintf("DDR3_nRFPRD %u\n", DDR3_nRFPRD);
361 DRAM_DRR = ((1<<31) | (DDR3_numr<<24) | ((DDR3_nRFPRD)<<8) | (DDR3_nRFC));
363 temp = DRAM_DRR;
364 kprintf("DRAM_DRR %x\n", temp);
367 * Computed DRAM_DRR = 0x886f1642, CHECKME:
374 Boostrap debug output:
376 Copyright (c)2014, The AROS Development Team. All rights reserved.
378 Allwinner A10 revision C2
380 Bootstrap CPU clock is 384MHz
381 Bootstrap DDR3 clock is 600MHz (for others PLL5 clock is 300MHz)
383 PLL5_CFG = 0x91069991
384 tAA 15ns
385 tRCD 15ns
386 tRP 15ns
387 tRC 52ns
388 tRAS 40ns
389 tRFC 161ns
390 tREFI 7800ns
391 DRAM_DRR 0x086c9883
392 DDR3_nREFI 4680
393 DDR3_nRFC 96
394 DDR3_nRFPRD 9160
395 DRAM_DRR 0x8123c860
396 DDR3_nRC 31
397 DDR3_nRAS 24
398 DDR3_nRCD 9
399 DDR3_nRP 9
400 DRAM_TPR0 0x3e989992
401 DDR3_nCL = 9
402 DDR3 data training succesful!
406 uint32_t DDR3_nRC, DDR3_nRAS, DDR3_nRCD, DDR3_nRP;
408 DDR3_nRC = ((parameter_ddr3->tRC*DRAM_CLK)/1000);
409 DDR3_nRAS = ((parameter_ddr3->tRAS*DRAM_CLK)/1000);
410 DDR3_nRCD = ((parameter_ddr3->tRCD*DRAM_CLK)/1000);
411 DDR3_nRP = ((parameter_ddr3->tRP*DRAM_CLK)/1000);
413 kprintf("DDR3_nRC %u\n", DDR3_nRC);
414 kprintf("DDR3_nRAS %u\n", DDR3_nRAS);
415 kprintf("DDR3_nRCD %u\n", DDR3_nRCD);
416 kprintf("DDR3_nRP %u\n", DDR3_nRP);
419 * Set timing parameters
421 DRAM_TPR0 = (0x00800092 | (DDR3_nRC<<25) | (DDR3_nRAS<<16) | (DDR3_nRCD<<12) | (DDR3_nRP<<8));
422 DRAM_TPR1 = 0x00001090;
423 DRAM_TPR2 = 0x0001a0c8;
425 temp = DRAM_TPR0;
426 kprintf("DRAM_TPR0 %x\n", temp);
428 uint32_t nCL;
430 nCL = ((parameter_ddr3->tCL*DRAM_CLK)/1000);
431 kprintf("DDR3_nCL = %d\n", nCL);
434 * Controller adds 4 extra cycles(or does it?), adjust the value
436 DRAM_MR = (((nCL-4)<<4) | (0x5<<9));
438 DRAM_EMR = 0x00000004;
439 DRAM_EMR2 = 0x00000000;
440 DRAM_EMR3 = 0x00000000;
443 * Set DQS window mode
445 clrsetbits(DRAM_CCR, 0x1<<17, 0x1<<14);
447 /* reset external DRAM */
448 setbits(DRAM_CCR, 0x1<<31);
449 while (DRAM_CCR & (0x1<<31));
451 clrbits(DRAM_CCR, 0x1<<28);
454 * Trigger the data training and wait it to finish
456 setbits(DRAM_CCR, 0x1<<30);
457 while (DRAM_CCR & (0x1<<30));
460 * Check the result
462 if(DRAM_CSR & (0x1<<20)) {
463 kprintf("DDR3 data training failed!\n");
464 bootstrapS();
465 } else {
466 kprintf("DDR3 data training succesful!\n");
470 * Host port access and priority (USB, CPU, GPU etc.)
472 DRAM_HPCR0 = 0x0301;
473 DRAM_HPCR1 = 0x0301;
474 DRAM_HPCR2 = 0x0301;
475 DRAM_HPCR3 = 0x0301;
476 DRAM_HPCR4 = 0x0301;
477 DRAM_HPCR5 = 0x0301;
478 DRAM_HPCR6 = 0;
479 DRAM_HPCR7 = 0;
480 DRAM_HPCR8 = 0;
481 DRAM_HPCR9 = 0;
482 DRAM_HPCR10 = 0;
483 DRAM_HPCR11 = 0;
484 DRAM_HPCR12 = 0;
485 DRAM_HPCR13 = 0;
486 DRAM_HPCR14 = 0;
487 DRAM_HPCR15 = 0;
488 DRAM_HPCR16 = 0x1031;
489 DRAM_HPCR17 = 0x1031;
490 DRAM_HPCR18 = 0x0735;
491 DRAM_HPCR19 = 0x1035;
492 DRAM_HPCR20 = 0x1035;
493 DRAM_HPCR21 = 0x0731;
494 DRAM_HPCR22 = 0x1031;
495 DRAM_HPCR23 = 0x0735;
496 DRAM_HPCR24 = 0x1035;
497 DRAM_HPCR25 = 0x1031;
498 DRAM_HPCR26 = 0x0731;
499 DRAM_HPCR27 = 0x1035;
500 DRAM_HPCR28 = 0x1031;
501 DRAM_HPCR29 = 0x0301;
502 DRAM_HPCR30 = 0x0301;
503 DRAM_HPCR31 = 0x0731;
505 /* DDR3 setup [end] */
507 uint32_t *a, b, i;
509 uint32_t t, x, y, z, w;
511 x = "AaAa";
512 y = "RrRr";
513 z = "OoOo";
514 w = "SsSs";
516 a = 0x40000000;
518 for(i=0; i<1024*1024; i++) {
519 t = x ^ (x << 11);
520 x = y; y = z; z = w;
521 w = w ^ (w >> 19) ^ t ^ (t >> 8);
522 a[i] = w;
526 x = "AaAa";
527 y = "RrRr";
528 z = "OoOo";
529 w = "SsSs";
531 a = 0x40000000;
533 for(i=0; i<1024*1024; i++) {
534 t = x ^ (x << 11);
535 x = y; y = z; z = w;
536 w = w ^ (w >> 19) ^ t ^ (t >> 8);
537 if(w!=a[i]) {
538 kprintf("%x = %x (should be %x)\n", &a[i], a[i], w);
539 while(1);
544 * pcDuino uses CARD0 interface in SD card mode (PF io pins) and PH1 as card detect switch input with pull up resistor
545 * For generic bootstrap we will need information stored for used DEBUGUART and SD-card interface
547 PIO_CFG0_REG(PH) = (PIO_CFG2_REG(PB) & ~(0b00000000000000000000000001110000)) | 0b00000000000000000000000000000000;
549 BOOL cardinserted = FALSE;
551 if(!(PIO_DATA_REG(PH) & 0b10)) {
552 kprintf("SD card presence detected\n");
553 cardinserted = TRUE;
556 while(1){
557 if((PIO_DATA_REG(PH) & 0b10) && (cardinserted)) {
558 kprintf("SD card removed\n");
559 cardinserted = FALSE;
562 if(!(PIO_DATA_REG(PH) & 0b10) && !(cardinserted)) {
563 kprintf("SD card inserted\n");
564 cardinserted = TRUE;