dyngen.h

   1 /*
   2  * dyngen helpers
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  */
  20
  21 int __op_param1, __op_param2, __op_param3;
  22 #if defined(__sparc__) || defined(__arm__)
  23   void __op_gen_label1(){}
  24   void __op_gen_label2(){}
  25   void __op_gen_label3(){}
  26 #else
  27   int __op_gen_label1, __op_gen_label2, __op_gen_label3;
  28 #endif
  29 int __op_jmp0, __op_jmp1, __op_jmp2, __op_jmp3;
  30
  31 #if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
  32 static inline void flush_icache_range(unsigned long start, unsigned long stop)
  33 {
  34 }
  35 #elif defined(__ia64__)
  36 static inline void flush_icache_range(unsigned long start, unsigned long stop)
  37 {
  38     while (start < stop) {
  39         asm volatile ("fc %0" :: "r"(start));
  40         start += 32;
  41     }
  42     asm volatile (";;sync.i;;srlz.i;;");
  43 }
  44 #elif defined(__powerpc__)
  45
  46 #define MIN_CACHE_LINE_SIZE 8 /* conservative value */
  47
  48 static void inline flush_icache_range(unsigned long start, unsigned long stop)
  49 {
  50     unsigned long p;
  51
  52     start &= ~(MIN_CACHE_LINE_SIZE - 1);
  53     stop = (stop + MIN_CACHE_LINE_SIZE - 1) & ~(MIN_CACHE_LINE_SIZE - 1);
  54
  55     for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
  56         asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
  57     }
  58     asm volatile ("sync" : : : "memory");
  59     for (p = start; p < stop; p += MIN_CACHE_LINE_SIZE) {
  60         asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
  61     }
  62     asm volatile ("sync" : : : "memory");
  63     asm volatile ("isync" : : : "memory");
  64 }
  65 #elif defined(__alpha__)
  66 static inline void flush_icache_range(unsigned long start, unsigned long stop)
  67 {
  68     asm ("imb");
  69 }
  70 #elif defined(__sparc__)
  71 static void inline flush_icache_range(unsigned long start, unsigned long stop)
  72 {
  73         unsigned long p;
  74
  75         p = start & ~(8UL - 1UL);
  76         stop = (stop + (8UL - 1UL)) & ~(8UL - 1UL);
  77
  78         for (; p < stop; p += 8)
  79                 __asm__ __volatile__("flush\t%0" : : "r" (p));
  80 }
  81 #elif defined(__arm__)
  82 static inline void flush_icache_range(unsigned long start, unsigned long stop)
  83 {
  84     register unsigned long _beg __asm ("a1") = start;
  85     register unsigned long _end __asm ("a2") = stop;
  86     register unsigned long _flg __asm ("a3") = 0;
  87     __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
  88 }
  89 #elif defined(__mc68000)
  90
  91 # include <asm/cachectl.h>
  92 static inline void flush_icache_range(unsigned long start, unsigned long stop)
  93 {
  94     cacheflush(start,FLUSH_SCOPE_LINE,FLUSH_CACHE_BOTH,stop-start+16);
  95 }
  96 #elif defined(__mips__)
  97
  98 #include <sys/cachectl.h>
  99 static inline void flush_icache_range(unsigned long start, unsigned long stop)
 100 {
 101     _flush_cache ((void *)start, stop - start, BCACHE);
 102 }
 103 #else
 104 #error unsupported CPU
 105 #endif
 106
 107 #ifdef __alpha__
 108
 109 register int gp asm("$29");
 110
 111 static inline void immediate_ldah(void *p, int val) {
 112     uint32_t *dest = p;
 113     long high = ((val >> 16) + ((val >> 15) & 1)) & 0xffff;
 114
 115     *dest &= ~0xffff;
 116     *dest |= high;
 117     *dest |= 31 << 16;
 118 }
 119 static inline void immediate_lda(void *dest, int val) {
 120     *(uint16_t *) dest = val;
 121 }
 122 void fix_bsr(void *p, int offset) {
 123     uint32_t *dest = p;
 124     *dest &= ~((1 << 21) - 1);
 125     *dest |= (offset >> 2) & ((1 << 21) - 1);
 126 }
 127
 128 #endif /* __alpha__ */
 129
 130 #ifdef __arm__
 131
 132 #define ARM_LDR_TABLE_SIZE 1024
 133
 134 typedef struct LDREntry {
 135     uint8_t *ptr;
 136     uint32_t *data_ptr;
 137     unsigned type:2;
 138 } LDREntry;
 139
 140 static LDREntry arm_ldr_table[1024];
 141 static uint32_t arm_data_table[ARM_LDR_TABLE_SIZE];
 142
 143 extern char exec_loop;
 144
 145 static inline void arm_reloc_pc24(uint32_t *ptr, uint32_t insn, int val)
 146 {
 147     *ptr = (insn & ~0xffffff) | ((insn + ((val - (int)ptr) >> 2)) & 0xffffff);
 148 }
 149
 150 static uint8_t *arm_flush_ldr(uint8_t *gen_code_ptr,
 151                               LDREntry *ldr_start, LDREntry *ldr_end,
 152                               uint32_t *data_start, uint32_t *data_end,
 153                               int gen_jmp)
 154 {
 155     LDREntry *le;
 156     uint32_t *ptr;
 157     int offset, data_size, target;
 158     uint8_t *data_ptr;
 159     uint32_t insn;
 160     uint32_t mask;
 161
 162     data_size = (data_end - data_start) << 2;
 163
 164     if (gen_jmp) {
 165         /* generate branch to skip the data */
 166         if (data_size == 0)
 167             return gen_code_ptr;
 168         target = (long)gen_code_ptr + data_size + 4;
 169         arm_reloc_pc24((uint32_t *)gen_code_ptr, 0xeafffffe, target);
 170         gen_code_ptr += 4;
 171     }
 172
 173     /* copy the data */
 174     data_ptr = gen_code_ptr;
 175     memcpy(gen_code_ptr, data_start, data_size);
 176     gen_code_ptr += data_size;
 177
 178     /* patch the ldr to point to the data */
 179     for(le = ldr_start; le < ldr_end; le++) {
 180         ptr = (uint32_t *)le->ptr;
 181         offset = ((unsigned long)(le->data_ptr) - (unsigned long)data_start) +
 182             (unsigned long)data_ptr -
 183             (unsigned long)ptr - 8;
 184         if (offset < 0) {
 185             fprintf(stderr, "Negative constant pool offset\n");
 186             abort();
 187         }
 188         switch (le->type) {
 189           case 0: /* ldr */
 190             mask = ~0x00800fff;
 191             if (offset >= 4096) {
 192                 fprintf(stderr, "Bad ldr offset\n");
 193                 abort();
 194             }
 195             break;
 196           case 1: /* ldc */
 197             mask = ~0x008000ff;
 198             if (offset >= 1024 ) {
 199                 fprintf(stderr, "Bad ldc offset\n");
 200                 abort();
 201             }
 202             break;
 203           case 2: /* add */
 204             mask = ~0xfff;
 205             if (offset >= 1024 ) {
 206                 fprintf(stderr, "Bad add offset\n");
 207                 abort();
 208             }
 209             break;
 210           default:
 211             fprintf(stderr, "Bad pc relative fixup\n");
 212             abort();
 213           }
 214         insn = *ptr & mask;
 215         switch (le->type) {
 216           case 0: /* ldr */
 217             insn |= offset | 0x00800000;
 218             break;
 219           case 1: /* ldc */
 220             insn |= (offset >> 2) | 0x00800000;
 221             break;
 222           case 2: /* add */
 223             insn |= (offset >> 2) | 0xf00;
 224             break;
 225           }
 226         *ptr = insn;
 227     }
 228     return gen_code_ptr;
 229 }
 230
 231 #endif /* __arm__ */
 232
 233 #ifdef __ia64
 234
 235 /* Patch instruction with "val" where "mask" has 1 bits. */
 236 static inline void ia64_patch (uint64_t insn_addr, uint64_t mask, uint64_t val)
 237 {
 238     uint64_t m0, m1, v0, v1, b0, b1, *b = (uint64_t *) (insn_addr & -16);
 239 #   define insn_mask ((1UL << 41) - 1)
 240     unsigned long shift;
 241
 242     b0 = b[0]; b1 = b[1];
 243     shift = 5 + 41 * (insn_addr % 16); /* 5 template, 3 x 41-bit insns */
 244     if (shift >= 64) {
 245         m1 = mask << (shift - 64);
 246         v1 = val << (shift - 64);
 247     } else {
 248         m0 = mask << shift; m1 = mask >> (64 - shift);
 249         v0 = val  << shift; v1 = val >> (64 - shift);
 250         b[0] = (b0 & ~m0) | (v0 & m0);
 251     }
 252     b[1] = (b1 & ~m1) | (v1 & m1);
 253 }
 254
 255 static inline void ia64_patch_imm60 (uint64_t insn_addr, uint64_t val)
 256 {
 257         ia64_patch(insn_addr,
 258                    0x011ffffe000UL,
 259                    (  ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
 260                     | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */));
 261         ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18);
 262 }
 263
 264 static inline void ia64_imm64 (void *insn, uint64_t val)
 265 {
 266     /* Ignore the slot number of the relocation; GCC and Intel
 267        toolchains differed for some time on whether IMM64 relocs are
 268        against slot 1 (Intel) or slot 2 (GCC).  */
 269     uint64_t insn_addr = (uint64_t) insn & ~3UL;
 270
 271     ia64_patch(insn_addr + 2,
 272                0x01fffefe000UL,
 273                (  ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
 274                 | ((val & 0x0000000000200000UL) <<  0) /* bit 21 -> 21 */
 275                 | ((val & 0x00000000001f0000UL) <<  6) /* bit 16 -> 22 */
 276                 | ((val & 0x000000000000ff80UL) << 20) /* bit  7 -> 27 */
 277                 | ((val & 0x000000000000007fUL) << 13) /* bit  0 -> 13 */)
 278             );
 279     ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22);
 280 }
 281
 282 static inline void ia64_imm60b (void *insn, uint64_t val)
 283 {
 284     /* Ignore the slot number of the relocation; GCC and Intel
 285        toolchains differed for some time on whether IMM64 relocs are
 286        against slot 1 (Intel) or slot 2 (GCC).  */
 287     uint64_t insn_addr = (uint64_t) insn & ~3UL;
 288
 289     if (val + ((uint64_t) 1 << 59) >= (1UL << 60))
 290         fprintf(stderr, "%s: value %ld out of IMM60 range\n",
 291                 __FUNCTION__, (int64_t) val);
 292     ia64_patch_imm60(insn_addr + 2, val);
 293 }
 294
 295 static inline void ia64_imm22 (void *insn, uint64_t val)
 296 {
 297     if (val + (1 << 21) >= (1 << 22))
 298         fprintf(stderr, "%s: value %li out of IMM22 range\n",
 299                 __FUNCTION__, (int64_t)val);
 300     ia64_patch((uint64_t) insn, 0x01fffcfe000UL,
 301                (  ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
 302                 | ((val & 0x1f0000UL) <<  6) /* bit 16 -> 22 */
 303                 | ((val & 0x00ff80UL) << 20) /* bit  7 -> 27 */
 304                 | ((val & 0x00007fUL) << 13) /* bit  0 -> 13 */));
 305 }
 306
 307 /* Like ia64_imm22(), but also clear bits 20-21.  For addl, this has
 308    the effect of turning "addl rX=imm22,rY" into "addl
 309    rX=imm22,r0".  */
 310 static inline void ia64_imm22_r0 (void *insn, uint64_t val)
 311 {
 312     if (val + (1 << 21) >= (1 << 22))
 313         fprintf(stderr, "%s: value %li out of IMM22 range\n",
 314                 __FUNCTION__, (int64_t)val);
 315     ia64_patch((uint64_t) insn, 0x01fffcfe000UL | (0x3UL << 20),
 316                (  ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
 317                 | ((val & 0x1f0000UL) <<  6) /* bit 16 -> 22 */
 318                 | ((val & 0x00ff80UL) << 20) /* bit  7 -> 27 */
 319                 | ((val & 0x00007fUL) << 13) /* bit  0 -> 13 */));
 320 }
 321
 322 static inline void ia64_imm21b (void *insn, uint64_t val)
 323 {
 324     if (val + (1 << 20) >= (1 << 21))
 325         fprintf(stderr, "%s: value %li out of IMM21b range\n",
 326                 __FUNCTION__, (int64_t)val);
 327     ia64_patch((uint64_t) insn, 0x11ffffe000UL,
 328                (  ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
 329                 | ((val & 0x0fffffUL) << 13) /* bit  0 -> 13 */));
 330 }
 331
 332 static inline void ia64_nop_b (void *insn)
 333 {
 334     ia64_patch((uint64_t) insn, (1UL << 41) - 1, 2UL << 37);
 335 }
 336
 337 static inline void ia64_ldxmov(void *insn, uint64_t val)
 338 {
 339     if (val + (1 << 21) < (1 << 22))
 340         ia64_patch((uint64_t) insn, 0x1fff80fe000UL, 8UL << 37);
 341 }
 342
 343 static inline int ia64_patch_ltoff(void *insn, uint64_t val,
 344                                    int relaxable)
 345 {
 346     if (relaxable && (val + (1 << 21) < (1 << 22))) {
 347         ia64_imm22_r0(insn, val);
 348         return 0;
 349     }
 350     return 1;
 351 }
 352
 353 struct ia64_fixup {
 354     struct ia64_fixup *next;
 355     void *addr;                 /* address that needs to be patched */
 356     long value;
 357 };
 358
 359 #define IA64_PLT(insn, plt_index)                       \
 360 do {                                                    \
 361     struct ia64_fixup *fixup = alloca(sizeof(*fixup));  \
 362     fixup->next = plt_fixes;                            \
 363     plt_fixes = fixup;                                  \
 364     fixup->addr = (insn);                               \
 365     fixup->value = (plt_index);                         \
 366     plt_offset[(plt_index)] = 1;                        \
 367 } while (0)
 368
 369 #define IA64_LTOFF(insn, val, relaxable)                        \
 370 do {                                                            \
 371     if (ia64_patch_ltoff(insn, val, relaxable)) {               \
 372         struct ia64_fixup *fixup = alloca(sizeof(*fixup));      \
 373         fixup->next = ltoff_fixes;                              \
 374         ltoff_fixes = fixup;                                    \
 375         fixup->addr = (insn);                                   \
 376         fixup->value = (val);                                   \
 377     }                                                           \
 378 } while (0)
 379
 380 static inline void ia64_apply_fixes (uint8_t **gen_code_pp,
 381                                      struct ia64_fixup *ltoff_fixes,
 382                                      uint64_t gp,
 383                                      struct ia64_fixup *plt_fixes,
 384                                      int num_plts,
 385                                      unsigned long *plt_target,
 386                                      unsigned int *plt_offset)
 387 {
 388     static const uint8_t plt_bundle[] = {
 389         0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; movl r1=GP */
 390         0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x60,
 391
 392         0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* nop 0; brl IP */
 393         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0
 394     };
 395     uint8_t *gen_code_ptr = *gen_code_pp, *plt_start, *got_start, *vp;
 396     struct ia64_fixup *fixup;
 397     unsigned int offset = 0;
 398     struct fdesc {
 399         long ip;
 400         long gp;
 401     } *fdesc;
 402     int i;
 403
 404     if (plt_fixes) {
 405         plt_start = gen_code_ptr;
 406
 407         for (i = 0; i < num_plts; ++i) {
 408             if (plt_offset[i]) {
 409                 plt_offset[i] = offset;
 410                 offset += sizeof(plt_bundle);
 411
 412                 fdesc = (struct fdesc *) plt_target[i];
 413                 memcpy(gen_code_ptr, plt_bundle, sizeof(plt_bundle));
 414                 ia64_imm64 (gen_code_ptr + 0x02, fdesc->gp);
 415                 ia64_imm60b(gen_code_ptr + 0x12,
 416                             (fdesc->ip - (long) (gen_code_ptr + 0x10)) >> 4);
 417                 gen_code_ptr += sizeof(plt_bundle);
 418             }
 419         }
 420
 421         for (fixup = plt_fixes; fixup; fixup = fixup->next)
 422             ia64_imm21b(fixup->addr,
 423                         ((long) plt_start + plt_offset[fixup->value]
 424                          - ((long) fixup->addr & ~0xf)) >> 4);
 425     }
 426
 427     got_start = gen_code_ptr;
 428
 429     /* First, create the GOT: */
 430     for (fixup = ltoff_fixes; fixup; fixup = fixup->next) {
 431         /* first check if we already have this value in the GOT: */
 432         for (vp = got_start; vp < gen_code_ptr; ++vp)
 433             if (*(uint64_t *) vp == fixup->value)
 434                 break;
 435         if (vp == gen_code_ptr) {
 436             /* Nope, we need to put the value in the GOT: */
 437             *(uint64_t *) vp = fixup->value;
 438             gen_code_ptr += 8;
 439         }
 440         ia64_imm22(fixup->addr, (long) vp - gp);
 441     }
 442     /* Keep code ptr aligned. */
 443     if ((long) gen_code_ptr & 15)
 444         gen_code_ptr += 8;
 445     *gen_code_pp = gen_code_ptr;
 446 }
 447
 448 #endif