From 01f752bd18b165ed02866782041abf025465686c Mon Sep 17 00:00:00 2001 From: schulz Date: Tue, 21 Apr 2015 18:49:01 +0000 Subject: [PATCH] Wake up additional ARM cores of raspberry pi 2. This was necessary step since after bootup the cores were busy looping and polling registers, thus making raspi2 slower than the old raspi models. Now rpi2 shows its real power ;) git-svn-id: https://svn.aros.org/svn/aros/trunk/AROS@50440 fb15a70f-31f2-0310-bbcc-cdcc74a49acc --- arch/arm-native/kernel/kernel_cpu.c | 78 +++++++++++++++++++++++++++++++-- arch/arm-native/kernel/kernel_startup.c | 3 ++ 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/arch/arm-native/kernel/kernel_cpu.c b/arch/arm-native/kernel/kernel_cpu.c index 10fa74f589..dd121f45e0 100644 --- a/arch/arm-native/kernel/kernel_cpu.c +++ b/arch/arm-native/kernel/kernel_cpu.c @@ -10,6 +10,8 @@ #include +#include + #include "etask.h" #include "kernel_intern.h" @@ -25,12 +27,41 @@ extern struct Task *sysIdleTask; uint32_t __arm_affinitymask __attribute__((section(".data"))) = 1; +asm( +" .globl mpcore_trampoline \n" +" .type mpcore_trampoline,%function \n" +"mpcore_trampoline: \n" +" ldr r3, mpcore_pde \n" +" mcr p15, 0, r3, c2, c0, 0 \n" +" mov r3, #0 \n" +" mcr p15, 0, r3, c2, c0, 2 \n" +" mov r3, #1 \n" +" mcr p15, 0, r3, c3, c0, 0 \n" +" mrc p15, 0, r4, c1, c0, 0 \n" +" mov r3, #0 \n" +" mcr p15, 0, r3, c7, c10, 4 \n" +" orr r4, r4, #0x800000 \n" +" orr r4, r4, #1 \n" +" mcr p15, 0, r4, c1, c0, 0 \n" +" mcr p15, 0, r3, c7, c5, 4 \n" +" ldr sp, mpcore_data \n" +" ldr pc, mpcore_code \n" + +"mpcore_pde: .word 0 \n" +"mpcore_code: .word 0 \n" +"mpcore_data: .word 0 \n" +"mpcore_end: " +); + +extern mpcore_trampoline(); +extern uint32_t mpcore_end; +extern uint32_t mpcore_pde; + void cpu_Register() { uint32_t tmp; asm volatile ("mrc p15, 0, %0, c1, c0, 0" : "=r"(tmp)); - tmp &= ~1; /* Disable MMU */ tmp |= (1 << 2) | (1 << 12) | (1 << 11); /* I and D caches, branch prediction */ tmp = (tmp & ~2) | (1 << 22); /* Unaligned access enable */ asm volatile ("mcr p15, 0, %0, c1, c0, 0" : : "r"(tmp)); @@ -41,7 +72,9 @@ void cpu_Register() __arm_affinitymask |= (1 << (tmp & 0x3)); - asm volatile("wfi"); + bug("[KRN] Core %d up and waiting for interrupts\n", tmp & 0x3); + + for (;;) asm volatile("wfi"); } void cpu_Delay(int usecs) @@ -50,6 +83,10 @@ void cpu_Delay(int usecs) for (delay = 0; delay < usecs; delay++) asm volatile ("mov r0, r0\n"); } +void arm_flush_cache(uint32_t addr, uint32_t length); + +uint32_t tmp_stacks_smp[4*1024]; + void cpu_Probe(struct ARM_Implementation *krnARMImpl) { uint32_t tmp; @@ -59,12 +96,47 @@ void cpu_Probe(struct ARM_Implementation *krnARMImpl) { krnARMImpl->ARMI_Family = 7; + if (krnARMImpl->ARMI_Delay) + { // Read the Multiprocessor Affinity Register (MPIDR) asm volatile ("mrc p15, 0, %0, c0, c0, 5" : "=r" (tmp)); if (tmp & (2 << 30)) { - //Multicore system + void *trampoline_src = mpcore_trampoline; + void *trampoline_dst = (void *)0x2000; + uint32_t trampoline_length = (uintptr_t)&mpcore_end - (uintptr_t)mpcore_trampoline; + uint32_t trampoline_data_offset = (uintptr_t)&mpcore_pde - (uintptr_t)mpcore_trampoline; + + bug("[KRN] Multicore system\n"); + + bug("[KRN] Copy SMP trampoline from %p to %p (%d bytes)\n", trampoline_src, trampoline_dst, trampoline_length); + bcopy(trampoline_src, trampoline_dst, trampoline_length); + + bug("[KRN] Patching data for trampoline at offset %d\n", trampoline_data_offset); + asm volatile ("mrc p15, 0, %0, c2, c0, 0":"=r"(tmp)); + ((uint32_t *)(trampoline_dst + trampoline_data_offset))[0] = tmp; // pde + ((uint32_t *)(trampoline_dst + trampoline_data_offset))[1] = (uint32_t)cpu_Register; + + bug("[KRN] Waking up cores\n"); + + ((uint32_t *)(trampoline_dst + trampoline_data_offset))[2] = &tmp_stacks_smp[4*1024-16]; + arm_flush_cache((uint32_t)trampoline_dst, 512); + *((uint32_t *)(0x4000008c + 0x10)) = trampoline_dst; + cpu_Delay(10000000); + + ((uint32_t *)(trampoline_dst + trampoline_data_offset))[2] = &tmp_stacks_smp[3*1024-16]; + arm_flush_cache((uint32_t)trampoline_dst, 512); + *((uint32_t *)(0x4000008c + 0x20)) = trampoline_dst; + + cpu_Delay(10000000); + + ((uint32_t *)(trampoline_dst + trampoline_data_offset))[2] = &tmp_stacks_smp[2*1024-16]; + arm_flush_cache((uint32_t)trampoline_dst, 512); + *((uint32_t *)(0x4000008c + 0x30)) = trampoline_dst; + cpu_Delay(10000000); + + } } } else diff --git a/arch/arm-native/kernel/kernel_startup.c b/arch/arm-native/kernel/kernel_startup.c index bc342ffc56..46a1c49443 100644 --- a/arch/arm-native/kernel/kernel_startup.c +++ b/arch/arm-native/kernel/kernel_startup.c @@ -103,6 +103,7 @@ static void __attribute__((used)) __clear_bss(struct TagItem *msg) } uint32_t __arm_periiobase __attribute__((section(".data"))) = 0; +extern uint32_t __arm_affinitymask; void __attribute__((used)) kernel_cstart(struct TagItem *msg) { @@ -170,6 +171,7 @@ void __attribute__((used)) kernel_cstart(struct TagItem *msg) D(bug("[KRN] Entered kernel_cstart @ 0x%p, BootMsg @ %p\n", kernel_cstart, BootMsg)); + cpu_Probe(&__arm_arosintern); D( if (__arm_arosintern.ARMI_PutChar) { @@ -183,6 +185,7 @@ void __attribute__((used)) kernel_cstart(struct TagItem *msg) __arm_arosintern.ARMI_LED_Toggle(ARM_LED_POWER, ARM_LED_OFF); D(bug("[KRN] Platform initialised\n")); + D(bug("[KRN] Affinity mask %08x\n", __arm_affinitymask)); if (__arm_arosintern.ARMI_Delay) __arm_arosintern.ARMI_Delay(1500); -- 2.11.4.GIT