MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.S

   1 #
   2 # ConvertAsm.py: Automatically generated from CopyMem.asm
   3 #
   4 #------------------------------------------------------------------------------
   5 #
   6 # Copyright (c) 2006, Intel Corporation
   7 # All rights reserved. This program and the accompanying materials
   8 # are licensed and made available under the terms and conditions of the BSD License
   9 # which accompanies this distribution.  The full text of the license may be found at
  10 # http://opensource.org/licenses/bsd-license.php
  11 #
  12 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
  13 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
  14 #
  15 # Module Name:
  16 #
  17 #   CopyMem.S
  18 #
  19 # Abstract:
  20 #
  21 #   CopyMem function
  22 #
  23 # Notes:
  24 #
  25 #------------------------------------------------------------------------------
  26
  27 #------------------------------------------------------------------------------
  28 #  VOID *
  29 #  EFIAPI
  30 #  InternalMemCopyMem (
  31 #    IN VOID   *Destination,
  32 #    IN VOID   *Source,
  33 #    IN UINTN  Count
  34 #    )
  35 #------------------------------------------------------------------------------\r
  36 .intel_syntax noprefix\r
  37 .globl ASM_PFX(InternalMemCopyMem)\r
  38 ASM_PFX(InternalMemCopyMem):\r
  39     push    rsi\r
  40     push    rdi\r
  41     mov     rsi, rdx                    # rsi <- Source\r
  42     mov     rdi, rcx                    # rdi <- Destination\r
  43     lea     r9, [rsi + r8 - 1]          # r9 <- Last byte of Source\r
  44     cmp     rsi, rdi\r
  45     mov     rax, rdi                    # rax <- Destination as return value\r
  46     jae     L0                          # Copy forward if Source > Destination\r
  47     cmp     r9, rdi                     # Overlapped?\r
  48     jae     @CopyBackward               # Copy backward if overlapped\r
  49 L0:\r
  50     xor     rcx, rcx\r
  51     sub     rcx, rdi                    # rcx <- -rdi\r
  52     and     rcx, 15                     # rcx + rsi should be 16 bytes aligned\r
  53     jz      L1                          # skip if rcx == 0\r
  54     cmp     rcx, r8\r
  55     cmova   rcx, r8\r
  56     sub     r8, rcx\r
  57     rep     movsb\r
  58 L1:\r
  59     mov     rcx, r8\r
  60     and     r8, 15\r
  61     shr     rcx, 4                      # rcx <- # of DQwords to copy\r
  62     jz      L_CopyBytes\r
  63     movdqa  [rsp + 0x18], xmm0           # save xmm0 on stack\r
  64 L2:\r
  65     movdqu  xmm0, [rsi]                 # rsi may not be 16-byte aligned\r
  66     movntdq [rdi], xmm0                 # rdi should be 16-byte aligned\r
  67     add     rsi, 16\r
  68     add     rdi, 16\r
  69     loop    L2\r
  70     mfence\r
  71     movdqa  xmm0, [rsp + 0x18]           # restore xmm0\r
  72     jmp     L_CopyBytes                  # copy remaining bytes\r
  73 L_CopyBackward:\r
  74     mov     rsi, r9                     # rsi <- Last byte of Source\r
  75     lea     rdi, [rdi + r8 - 1]         # rdi <- Last byte of Destination\r
  76     std\r
  77 L_CopyBytes:\r
  78     mov     rcx, r8\r
  79     rep     movsb\r
  80     cld\r
  81     pop     rdi\r
  82     pop     rsi\r
  83     ret\r