gold/merge.h

   1 // merge.h -- handle section merging for gold  -*- C++ -*-
   2
   3 // Copyright (C) 2006-2024 Free Software Foundation, Inc.
   4 // Written by Ian Lance Taylor <iant@google.com>.
   5
   6 // This file is part of gold.
   7
   8 // This program is free software; you can redistribute it and/or modify
   9 // it under the terms of the GNU General Public License as published by
  10 // the Free Software Foundation; either version 3 of the License, or
  11 // (at your option) any later version.
  12
  13 // This program is distributed in the hope that it will be useful,
  14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 // GNU General Public License for more details.
  17
  18 // You should have received a copy of the GNU General Public License
  19 // along with this program; if not, write to the Free Software
  20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  21 // MA 02110-1301, USA.
  22
  23 #ifndef GOLD_MERGE_H
  24 #define GOLD_MERGE_H
  25
  26 #include <climits>
  27 #include <map>
  28 #include <vector>
  29
  30 #include "stringpool.h"
  31 #include "output.h"
  32
  33 namespace gold
  34 {
  35
  36 // For each object with merge sections, we store an Object_merge_map.
  37 // This is used to map locations in input sections to a merged output
  38 // section.  The output section itself is not recorded here--it can be
  39 // found in the output_sections_ field of the Object.
  40
  41 class Object_merge_map
  42 {
  43  public:
  44   Object_merge_map()
  45     : section_merge_maps_()
  46   { }
  47
  48   ~Object_merge_map();
  49
  50   // Add a mapping for MERGE_MAP, for the bytes from OFFSET to OFFSET
  51   // + LENGTH in the input section SHNDX to OUTPUT_OFFSET in the
  52   // output section.  An OUTPUT_OFFSET of -1 means that the bytes are
  53   // discarded.  OUTPUT_OFFSET is relative to the start of the merged
  54   // data in the output section.
  55   void
  56   add_mapping(const Output_section_data*, unsigned int shndx,
  57               section_offset_type offset, section_size_type length,
  58               section_offset_type output_offset);
  59
  60   // Get the output offset for an input address.  MERGE_MAP is the map
  61   // we are looking for, or NULL if we don't care.  The input address
  62   // is at offset OFFSET in section SHNDX.  This sets *OUTPUT_OFFSET
  63   // to the offset in the output section; this will be -1 if the bytes
  64   // are not being copied to the output.  This returns true if the
  65   // mapping is known, false otherwise.  *OUTPUT_OFFSET is relative to
  66   // the start of the merged data in the output section.
  67   bool
  68   get_output_offset(unsigned int shndx,
  69                     section_offset_type offset,
  70                     section_offset_type* output_offset);
  71
  72   const Output_section_data*
  73   find_merge_section(unsigned int shndx) const;
  74
  75   // Initialize an mapping from input offsets to output addresses for
  76   // section SHNDX.  STARTING_ADDRESS is the output address of the
  77   // merged section.
  78   template<int size>
  79   void
  80   initialize_input_to_output_map(
  81       unsigned int shndx,
  82       typename elfcpp::Elf_types<size>::Elf_Addr starting_address,
  83       Unordered_map<section_offset_type,
  84                     typename elfcpp::Elf_types<size>::Elf_Addr>*);
  85
  86   // Map input section offsets to a length and an output section
  87   // offset.  An output section offset of -1 means that this part of
  88   // the input section is being discarded.
  89   struct Input_merge_entry
  90   {
  91     // The offset in the input section.
  92     section_offset_type input_offset;
  93     // The length.
  94     section_size_type length;
  95     // The offset in the output section.
  96     section_offset_type output_offset;
  97   };
  98
  99   // A list of entries for a particular input section.
 100   struct Input_merge_map
 101   {
 102     void add_mapping(section_offset_type input_offset, section_size_type length,
 103                      section_offset_type output_offset);
 104
 105     typedef std::vector<Input_merge_entry> Entries;
 106
 107     // We store these with the Relobj, and we look them up by input
 108     // section.  It is possible to have two different merge maps
 109     // associated with a single output section.  For example, this
 110     // happens routinely with .rodata, when merged string constants
 111     // and merged fixed size constants are both put into .rodata.  The
 112     // output offset that we store is not the offset from the start of
 113     // the output section; it is the offset from the start of the
 114     // merged data in the output section.  That means that the caller
 115     // is going to add the offset of the merged data within the output
 116     // section, which means that the caller needs to know which set of
 117     // merged data it found the entry in.  So it's not enough to find
 118     // this data based on the input section and the output section; we
 119     // also have to find it based on a set of merged data in the
 120     // output section.  In order to verify that we are looking at the
 121     // right data, we store a pointer to the Merge_map here, and we
 122     // pass in a pointer when looking at the data.  If we are asked to
 123     // look up information for a different Merge_map, we report that
 124     // we don't have it, rather than trying a lookup and returning an
 125     // answer which will receive the wrong offset.
 126     const Output_section_data* output_data;
 127     // The list of mappings.
 128     Entries entries;
 129     // Whether the ENTRIES field is sorted by input_offset.
 130     bool sorted;
 131
 132     Input_merge_map()
 133       : output_data(NULL), entries(), sorted(true)
 134     { }
 135   };
 136
 137   // Get or make the Input_merge_map to use for the section SHNDX
 138   // with MERGE_MAP.
 139   Input_merge_map*
 140   get_or_make_input_merge_map(const Output_section_data* merge_map,
 141                               unsigned int shndx);
 142
 143   private:
 144   // A less-than comparison routine for Input_merge_entry.
 145   struct Input_merge_compare
 146   {
 147     bool
 148     operator()(const Input_merge_entry& i1, const Input_merge_entry& i2) const
 149     { return i1.input_offset < i2.input_offset; }
 150   };
 151
 152   // Map input section indices to merge maps.
 153   typedef std::vector<std::pair<unsigned int, Input_merge_map*> >
 154       Section_merge_maps;
 155
 156   // Return a pointer to the Input_merge_map to use for the input
 157   // section SHNDX, or NULL.
 158   const Input_merge_map*
 159   get_input_merge_map(unsigned int shndx) const;
 160
 161   Input_merge_map *
 162   get_input_merge_map(unsigned int shndx) {
 163     return const_cast<Input_merge_map *>(static_cast<const Object_merge_map *>(
 164                                              this)->get_input_merge_map(shndx));
 165   }
 166
 167   Section_merge_maps section_merge_maps_;
 168 };
 169
 170 // A general class for SHF_MERGE data, to hold functions shared by
 171 // fixed-size constant data and string data.
 172
 173 class Output_merge_base : public Output_section_data
 174 {
 175  public:
 176   Output_merge_base(uint64_t entsize, uint64_t addralign)
 177     : Output_section_data(addralign), entsize_(entsize),
 178       keeps_input_sections_(false), first_relobj_(NULL), first_shndx_(-1),
 179       input_sections_()
 180   { }
 181
 182   // Return the entry size.
 183   uint64_t
 184   entsize() const
 185   { return this->entsize_; }
 186
 187   // Whether this is a merge string section.  This is only true of
 188   // Output_merge_string.
 189   bool
 190   is_string()
 191   { return this->do_is_string(); }
 192
 193   // Whether this keeps input sections.
 194   bool
 195   keeps_input_sections() const
 196   { return this->keeps_input_sections_; }
 197
 198   // Set the keeps-input-sections flag.  This is virtual so that sub-classes
 199   // can perform additional checks.
 200   void
 201   set_keeps_input_sections()
 202   { this->do_set_keeps_input_sections(); }
 203
 204   // Return the object of the first merged input section.  This used
 205   // for script processing.  This is NULL if merge section is empty.
 206   Relobj*
 207   first_relobj() const
 208   { return this->first_relobj_; }
 209
 210   // Return the section index of the first merged input section.  This
 211   // is used for script processing.  This is valid only if merge section
 212   // is not valid.
 213   unsigned int
 214   first_shndx() const
 215   {
 216     gold_assert(this->first_relobj_ != NULL);
 217     return this->first_shndx_;
 218   }
 219
 220   // Set of merged input sections.
 221   typedef Unordered_set<Section_id, Section_id_hash> Input_sections;
 222
 223   // Beginning of merged input sections.
 224   Input_sections::const_iterator
 225   input_sections_begin() const
 226   {
 227     gold_assert(this->keeps_input_sections_);
 228     return this->input_sections_.begin();
 229   }
 230
 231   // Beginning of merged input sections.
 232   Input_sections::const_iterator
 233   input_sections_end() const
 234   {
 235     gold_assert(this->keeps_input_sections_);
 236     return this->input_sections_.end();
 237   }
 238
 239  protected:
 240   // Return the output offset for an input offset.
 241   bool
 242   do_output_offset(const Relobj* object, unsigned int shndx,
 243                    section_offset_type offset,
 244                    section_offset_type* poutput) const;
 245
 246   // This may be overridden by the child class.
 247   virtual bool
 248   do_is_string()
 249   { return false; }
 250
 251   // This may be overridden by the child class.
 252   virtual void
 253   do_set_keeps_input_sections()
 254   { this->keeps_input_sections_ = true; }
 255
 256   // Record the merged input section for script processing.
 257   void
 258   record_input_section(Relobj* relobj, unsigned int shndx);
 259
 260  private:
 261   // The entry size.  For fixed-size constants, this is the size of
 262   // the constants.  For strings, this is the size of a character.
 263   uint64_t entsize_;
 264   // Whether we keep input sections.
 265   bool keeps_input_sections_;
 266   // Object of the first merged input section.  We use this for script
 267   // processing.
 268   Relobj* first_relobj_;
 269   // Section index of the first merged input section.
 270   unsigned int first_shndx_;
 271   // Input sections.  We only keep them is keeps_input_sections_ is true.
 272   Input_sections input_sections_;
 273 };
 274
 275 // Handle SHF_MERGE sections with fixed-size constant data.
 276
 277 class Output_merge_data : public Output_merge_base
 278 {
 279  public:
 280   Output_merge_data(uint64_t entsize, uint64_t addralign)
 281     : Output_merge_base(entsize, addralign), p_(NULL), len_(0), alc_(0),
 282       input_count_(0),
 283       hashtable_(128, Merge_data_hash(this), Merge_data_eq(this))
 284   { }
 285
 286  protected:
 287   // Add an input section.
 288   bool
 289   do_add_input_section(Relobj* object, unsigned int shndx);
 290
 291   // Set the final data size.
 292   void
 293   set_final_data_size();
 294
 295   // Write the data to the file.
 296   void
 297   do_write(Output_file*);
 298
 299   // Write the data to a buffer.
 300   void
 301   do_write_to_buffer(unsigned char*);
 302
 303   // Write to a map file.
 304   void
 305   do_print_to_mapfile(Mapfile* mapfile) const
 306   { mapfile->print_output_data(this, _("** merge constants")); }
 307
 308   // Print merge stats to stderr.
 309   void
 310   do_print_merge_stats(const char* section_name);
 311
 312   // Set keeps-input-sections flag.
 313   void
 314   do_set_keeps_input_sections()
 315   {
 316     gold_assert(this->input_count_ == 0);
 317     Output_merge_base::do_set_keeps_input_sections();
 318   }
 319
 320  private:
 321   // We build a hash table of the fixed-size constants.  Each constant
 322   // is stored as a pointer into the section data we are accumulating.
 323
 324   // A key in the hash table.  This is an offset in the section
 325   // contents we are building.
 326   typedef section_offset_type Merge_data_key;
 327
 328   // Compute the hash code.  To do this we need a pointer back to the
 329   // object holding the data.
 330   class Merge_data_hash
 331   {
 332    public:
 333     Merge_data_hash(const Output_merge_data* pomd)
 334       : pomd_(pomd)
 335     { }
 336
 337     size_t
 338     operator()(Merge_data_key) const;
 339
 340    private:
 341     const Output_merge_data* pomd_;
 342   };
 343
 344   friend class Merge_data_hash;
 345
 346   // Compare two entries in the hash table for equality.  To do this
 347   // we need a pointer back to the object holding the data.  Note that
 348   // we now have a pointer to the object stored in two places in the
 349   // hash table.  Fixing this would require specializing the hash
 350   // table, which would be hard to do portably.
 351   class Merge_data_eq
 352   {
 353    public:
 354     Merge_data_eq(const Output_merge_data* pomd)
 355       : pomd_(pomd)
 356     { }
 357
 358     bool
 359     operator()(Merge_data_key k1, Merge_data_key k2) const;
 360
 361    private:
 362     const Output_merge_data* pomd_;
 363   };
 364
 365   friend class Merge_data_eq;
 366
 367   // The type of the hash table.
 368   typedef Unordered_set<Merge_data_key, Merge_data_hash, Merge_data_eq>
 369     Merge_data_hashtable;
 370
 371   // Given a hash table key, which is just an offset into the section
 372   // data, return a pointer to the corresponding constant.
 373   const unsigned char*
 374   constant(Merge_data_key k) const
 375   {
 376     gold_assert(k >= 0 && k < static_cast<section_offset_type>(this->len_));
 377     return this->p_ + k;
 378   }
 379
 380   // Add a constant to the output.
 381   void
 382   add_constant(const unsigned char*);
 383
 384   // The accumulated data.
 385   unsigned char* p_;
 386   // The length of the accumulated data.
 387   section_size_type len_;
 388   // The size of the allocated buffer.
 389   section_size_type alc_;
 390   // The number of entries seen in input files.
 391   size_t input_count_;
 392   // The hash table.
 393   Merge_data_hashtable hashtable_;
 394 };
 395
 396 // Handle SHF_MERGE sections with string data.  This is a template
 397 // based on the type of the characters in the string.
 398
 399 template<typename Char_type>
 400 class Output_merge_string : public Output_merge_base
 401 {
 402  public:
 403   Output_merge_string(uint64_t addralign)
 404     : Output_merge_base(sizeof(Char_type), addralign), stringpool_(addralign),
 405       merged_strings_lists_(), input_count_(0), input_size_(0)
 406   {
 407     this->stringpool_.set_no_zero_null();
 408   }
 409
 410  protected:
 411   // Add an input section.
 412   bool
 413   do_add_input_section(Relobj* object, unsigned int shndx);
 414
 415   // Do all the final processing after the input sections are read in.
 416   // Returns the final data size.
 417   section_size_type
 418   finalize_merged_data();
 419
 420   // Set the final data size.
 421   void
 422   set_final_data_size();
 423
 424   // Write the data to the file.
 425   void
 426   do_write(Output_file*);
 427
 428   // Write the data to a buffer.
 429   void
 430   do_write_to_buffer(unsigned char*);
 431
 432   // Write to a map file.
 433   void
 434   do_print_to_mapfile(Mapfile* mapfile) const
 435   { mapfile->print_output_data(this, _("** merge strings")); }
 436
 437   // Print merge stats to stderr.
 438   void
 439   do_print_merge_stats(const char* section_name);
 440
 441   // Writes the stringpool to a buffer.
 442   void
 443   stringpool_to_buffer(unsigned char* buffer, section_size_type buffer_size)
 444   { this->stringpool_.write_to_buffer(buffer, buffer_size); }
 445
 446   // Clears all the data in the stringpool, to save on memory.
 447   void
 448   clear_stringpool()
 449   { this->stringpool_.clear(); }
 450
 451   // Whether this is a merge string section.
 452   virtual bool
 453   do_is_string()
 454   { return true; }
 455
 456   // Set keeps-input-sections flag.
 457   void
 458   do_set_keeps_input_sections()
 459   {
 460     gold_assert(this->input_count_ == 0);
 461     Output_merge_base::do_set_keeps_input_sections();
 462   }
 463
 464  private:
 465   // The name of the string type, for stats.
 466   const char*
 467   string_name();
 468
 469   // As we see input sections, we build a mapping from object, section
 470   // index and offset to strings.
 471   struct Merged_string
 472   {
 473     // The offset in the input section.
 474     section_offset_type offset;
 475     // The key in the Stringpool.
 476     Stringpool::Key stringpool_key;
 477
 478     Merged_string(section_offset_type offseta, Stringpool::Key stringpool_keya)
 479       : offset(offseta), stringpool_key(stringpool_keya)
 480     { }
 481   };
 482
 483   typedef std::vector<Merged_string> Merged_strings;
 484
 485   struct Merged_strings_list
 486   {
 487     // The input object where the strings were found.
 488     Relobj* object;
 489     // The input section in the input object.
 490     unsigned int shndx;
 491     // The list of merged strings.
 492     Merged_strings merged_strings;
 493
 494     Merged_strings_list(Relobj* objecta, unsigned int shndxa)
 495       : object(objecta), shndx(shndxa), merged_strings()
 496     { }
 497   };
 498
 499   typedef std::vector<Merged_strings_list*> Merged_strings_lists;
 500
 501   // As we see the strings, we add them to a Stringpool.
 502   Stringpool_template<Char_type> stringpool_;
 503   // Map from a location in an input object to an entry in the
 504   // Stringpool.
 505   Merged_strings_lists merged_strings_lists_;
 506   // The number of entries seen in input files.
 507   size_t input_count_;
 508   // The total size of input sections.
 509   size_t input_size_;
 510 };
 511
 512 } // End namespace gold.
 513
 514 #endif // !defined(GOLD_MERGE_H)