基本概念
加载地址:内核解压到物理内存上的物理起始地址
链接地址:内核编译链接后的虚拟起始地址
我们的这篇 文章,介绍了加载地址可以是自动的,也可以是固定的;一般都是物理内存的起始地址 + 一个偏移 ;这个是编译时就决定了链接地址跟加载地址的映射
我们的这篇 文章,介绍了kimage_voffset,就是运行时的内核的虚拟地址跟物理地址映射的真实偏移,不管经过与否kaslr
KASLR, kernel address space layout randomization,内核地址空间布局随机化。KASLR技术可以让kernel image的编译链接时确定的加载地址,在真正解压加载到物理内存时产生一个偏移,然后给这个新的加载地址映射一个新的虚拟地址
流程
stext作为镜像的开始的地方
#define KERNEL_START _text
#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET)
ENTRY(stext)bl preserve_boot_argsbl el2_setup // Drop to EL1, w0=cpu_boot_modeadrp x23, __PHYS_OFFSETand x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0bl set_cpu_boot_mode_flagbl __create_page_tables/** The following calls CPU setup code, see arch/arm64/mm/proc.S for* details.* On return, the CPU will be ready for the MMU to be turned on and* the TCR will have been set.*/bl __cpu_setup // initialise processorb __primary_switch
ENDPROC(stext)
__create_page_tables
因为不知道有没kaslr,先通过__create_page_tables创建加载地址跟链接地址的映射
__create_page_tables:mov x28, lr/** Invalidate the init page tables to avoid potential dirty cache lines* being evicted. Other page tables are allocated in rodata as part of* the kernel image, and thus are clean to the PoC per the boot* protocol.*/adrp x0, init_pg_diradrp x1, init_pg_endsub x1, x1, x0bl __inval_dcache_area/** Clear the init page tables.*/adrp x0, init_pg_diradrp x1, init_pg_endsub x1, x1, x0
1: stp xzr, xzr, [x0], #16stp xzr, xzr, [x0], #16stp xzr, xzr, [x0], #16stp xzr, xzr, [x0], #16subs x1, x1, #64b.ne 1bmov x7, SWAPPER_MM_MMUFLAGS/** Create the identity mapping.*/adrp x0, idmap_pg_diradrp x3, __idmap_text_start // __pa(__idmap_text_start)#ifdef CONFIG_ARM64_VA_BITS_52mrs_s x6, SYS_ID_AA64MMFR2_EL1and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)mov x5, #52cbnz x6, 1f
#endifmov x5, #VA_BITS_MIN
1:adr_l x6, vabits_actualstr x5, [x6]dmb sydc ivac, x6 // Invalidate potentially stale cache line/** VA_BITS may be too small to allow for an ID mapping to be created* that covers system RAM if that is located sufficiently high in the* physical address space. So for the ID map, use an extended virtual* range in that case, and configure an additional translation level* if needed.** Calculate the maximum allowed value for TCR_EL1.T0SZ so that the* entire ID map region can be mapped. As T0SZ == (64 - #bits used),* this number conveniently equals the number of leading zeroes in* the physical address of __idmap_text_end.*/adrp x5, __idmap_text_endclz x5, x5cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?b.ge 1f // .. then skip VA range extensionadr_l x6, idmap_t0szstr x5, [x6]dmb sydc ivac, x6 // Invalidate potentially stale cache line#if (VA_BITS < 48)
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))/** If VA_BITS < 48, we have to configure an additional table level.* First, we have to verify our assumption that the current value of* VA_BITS was chosen such that all translation levels are fully* utilised, and that lowering T0SZ will always result in an additional* translation level to be configured.*/
#if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endifmov x4, EXTRA_PTRScreate_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
#else/** If VA_BITS == 48, we don't have to configure an additional* translation level, but the top-level table has more entries.*/mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)str_l x4, idmap_ptrs_per_pgd, x5
#endif
1:ldr_l x4, idmap_ptrs_per_pgdmov x5, x3 // __pa(__idmap_text_start)adr_l x6, __idmap_text_end // __pa(__idmap_text_end)map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14/** Map the kernel image (starting with PHYS_OFFSET).*/adrp x0, init_pg_dirmov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)add x5, x5, x23 // add KASLR displacementmov x4, PTRS_PER_PGDadrp x6, _end // runtime __pa(_end)adrp x3, _text // runtime __pa(_text)sub x6, x6, x3 // _end - _textadd x6, x6, x5 // runtime __va(_end)map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14/** Since the page tables have been populated with non-cacheable* accesses (MMU disabled), invalidate those tables again to* remove any speculatively loaded cache lines.*/dmb syadrp x0, idmap_pg_diradrp x1, idmap_pg_endsub x1, x1, x0bl __inval_dcache_areaadrp x0, init_pg_diradrp x1, init_pg_endsub x1, x1, x0bl __inval_dcache_arearet x28
ENDPROC(__create_page_tables)
__primary_switch
1.如果开启了CONFIG_RANDOMIZE_BASE(kaslr),先通过第一次的__primary_switched里面的kaslr_early_init,解析出KASLR offset并存储到X23寄存器
2.然后通过__create_page_tables重新映射内核镜像,也就是给物理地址分配虚拟地址
3.分配完的地址跟编译时的链接地址不一样了,所以需要重定位内核镜像---对符号地址进行重定位,校正内核代码的符号寻址,以此确保内核代码的正常执行
4.再执行一次__primary_switched去更新kimage_voffset
__primary_switch:
#ifdef CONFIG_RANDOMIZE_BASEmov x19, x0 // preserve new SCTLR_EL1 valuemrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
#endifadrp x1, init_pg_dirbl __enable_mmu
#ifdef CONFIG_RELOCATABLE
#ifdef CONFIG_RELRmov x24, #0 // no RELR displacement yet
#endifbl __relocate_kernel
#ifdef CONFIG_RANDOMIZE_BASEldr x8, =__primary_switchedadrp x0, __PHYS_OFFSETblr x8/** If we return here, we have a KASLR displacement in x23 which we need* to take into account by discarding the current kernel mapping and* creating a new one.*/pre_disable_mmu_workaroundmsr sctlr_el1, x20 // disable the MMUisbbl __create_page_tables // recreate kernel mappingtlbi vmalle1 // Remove any stale TLB entriesdsb nshisbmsr sctlr_el1, x19 // re-enable the MMUisbic iallu // flush instructions fetcheddsb nsh // via old mappingisbbl __relocate_kernel
#endif
#endifldr x8, =__primary_switchedadrp x0, __PHYS_OFFSETbr x8
ENDPROC(__primary_switch)
__primary_switched
上述操作执行完_text变了,kimage_vaddr(_text - TEXT_OFFSET)也就跟着变了,kimage_voffset也就成了虚拟地址转物理地址函数的实际偏移了,就可以通过start_kernel进内核的C语言部分了
__primary_switched:adrp x4, init_thread_unionadd sp, x4, #THREAD_SIZEadr_l x5, init_taskmsr sp_el0, x5 // Save thread_infoadr_l x8, vectors // load VBAR_EL1 with virtualmsr vbar_el1, x8 // vector table addressisbstp xzr, x30, [sp, #-16]!mov x29, sp#ifdef CONFIG_SHADOW_CALL_STACKadr_l x18, init_shadow_call_stack // Set shadow call stack
#endifstr_l x21, __fdt_pointer, x5 // Save FDT pointerldr_l x4, kimage_vaddr // Save the offset betweensub x4, x4, x0 // the kernel virtual andstr_l x4, kimage_voffset, x5 // physical mappings// Clear BSSadr_l x0, __bss_startmov x1, xzradr_l x2, __bss_stopsub x2, x2, x0bl __pi_memsetdsb ishst // Make zero page visible to PTW#ifdef CONFIG_KASANbl kasan_early_init
#endif
#ifdef CONFIG_RANDOMIZE_BASEtst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?b.ne 0fmov x0, x21 // pass FDT address in x0bl kaslr_early_init // parse FDT for KASLR optionscbz x0, 0f // KASLR disabled? just proceedorr x23, x23, x0 // record KASLR offsetldp x29, x30, [sp], #16 // we must enable KASLR, returnret // to __primary_switch()
0:
#endifadd sp, sp, #16mov x29, #0mov x30, #0b start_kernel
ENDPROC(__primary_switched)/** end early head section, begin head code that is also used for* hotplug and needs to have the same protections as the text region*/.section ".idmap.text","awx"ENTRY(kimage_vaddr).quad _text - TEXT_OFFSET
EXPORT_SYMBOL(kimage_vaddr)