pack.rb

   1 # for pack.c
   2
   3 class Array
   4   #  call-seq:
   5   #     arr.pack( aTemplateString ) -> aBinaryString
   6   #     arr.pack( aTemplateString, buffer: aBufferString ) -> aBufferString
   7   #
   8   #  Packs the contents of <i>arr</i> into a binary sequence according to
   9   #  the directives in <i>aTemplateString</i> (see the table below)
  10   #  Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
  11   #  which gives the width of the resulting field. The remaining
  12   #  directives also may take a count, indicating the number of array
  13   #  elements to convert. If the count is an asterisk
  14   #  (``<code>*</code>''), all remaining array elements will be
  15   #  converted. Any of the directives ``<code>sSiIlL</code>'' may be
  16   #  followed by an underscore (``<code>_</code>'') or
  17   #  exclamation mark (``<code>!</code>'') to use the underlying
  18   #  platform's native size for the specified type; otherwise, they use a
  19   #  platform-independent size. Spaces are ignored in the template
  20   #  string. See also String#unpack.
  21   #
  22   #     a = [ "a", "b", "c" ]
  23   #     n = [ 65, 66, 67 ]
  24   #     a.pack("A3A3A3")   #=> "a  b  c  "
  25   #     a.pack("a3a3a3")   #=> "a\000\000b\000\000c\000\000"
  26   #     n.pack("ccc")      #=> "ABC"
  27   #
  28   #  If <i>aBufferString</i> is specified and its capacity is enough,
  29   #  +pack+ uses it as the buffer and returns it.
  30   #  When the offset is specified by the beginning of <i>aTemplateString</i>,
  31   #  the result is filled after the offset.
  32   #  If original contents of <i>aBufferString</i> exists and it's longer than
  33   #  the offset, the rest of <i>offsetOfBuffer</i> are overwritten by the result.
  34   #  If it's shorter, the gap is filled with ``<code>\0</code>''.
  35   #
  36   #     # packed data is appended by default
  37   #     [255].pack("C", buffer:"foo".b) #=> "foo\xFF"
  38   #
  39   #     # "@0" (offset 0) specifies that packed data is filled from beginning.
  40   #     # Also, original data after packed data is removed. ("oo" is removed.)
  41   #     [255].pack("@0C", buffer:"foo".b) #=> "\xFF"
  42   #
  43   #     # If the offset is bigger than the original length, \x00 is filled.
  44   #     [255].pack("@5C", buffer:"foo".b) #=> "foo\x00\x00\xFF"
  45   #
  46   #  Note that ``buffer:'' option does not guarantee not to allocate memory
  47   #  in +pack+.  If the capacity of <i>aBufferString</i> is not enough,
  48   #  +pack+ allocates memory.
  49   #
  50   #  Directives for +pack+.
  51   #
  52   #   Integer       | Array   |
  53   #   Directive     | Element | Meaning
  54   #   ----------------------------------------------------------------------------
  55   #   C             | Integer | 8-bit unsigned (unsigned char)
  56   #   S             | Integer | 16-bit unsigned, native endian (uint16_t)
  57   #   L             | Integer | 32-bit unsigned, native endian (uint32_t)
  58   #   Q             | Integer | 64-bit unsigned, native endian (uint64_t)
  59   #   J             | Integer | pointer width unsigned, native endian (uintptr_t)
  60   #                 |         | (J is available since Ruby 2.3.)
  61   #                 |         |
  62   #   c             | Integer | 8-bit signed (signed char)
  63   #   s             | Integer | 16-bit signed, native endian (int16_t)
  64   #   l             | Integer | 32-bit signed, native endian (int32_t)
  65   #   q             | Integer | 64-bit signed, native endian (int64_t)
  66   #   j             | Integer | pointer width signed, native endian (intptr_t)
  67   #                 |         | (j is available since Ruby 2.3.)
  68   #                 |         |
  69   #   S_ S!         | Integer | unsigned short, native endian
  70   #   I I_ I!       | Integer | unsigned int, native endian
  71   #   L_ L!         | Integer | unsigned long, native endian
  72   #   Q_ Q!         | Integer | unsigned long long, native endian (ArgumentError
  73   #                 |         | if the platform has no long long type.)
  74   #                 |         | (Q_ and Q! is available since Ruby 2.1.)
  75   #   J!            | Integer | uintptr_t, native endian (same with J)
  76   #                 |         | (J! is available since Ruby 2.3.)
  77   #                 |         |
  78   #   s_ s!         | Integer | signed short, native endian
  79   #   i i_ i!       | Integer | signed int, native endian
  80   #   l_ l!         | Integer | signed long, native endian
  81   #   q_ q!         | Integer | signed long long, native endian (ArgumentError
  82   #                 |         | if the platform has no long long type.)
  83   #                 |         | (q_ and q! is available since Ruby 2.1.)
  84   #   j!            | Integer | intptr_t, native endian (same with j)
  85   #                 |         | (j! is available since Ruby 2.3.)
  86   #                 |         |
  87   #   S> s> S!> s!> | Integer | same as the directives without ">" except
  88   #   L> l> L!> l!> |         | big endian
  89   #   I!> i!>       |         | (available since Ruby 1.9.3)
  90   #   Q> q> Q!> q!> |         | "S>" is the same as "n"
  91   #   J> j> J!> j!> |         | "L>" is the same as "N"
  92   #                 |         |
  93   #   S< s< S!< s!< | Integer | same as the directives without "<" except
  94   #   L< l< L!< l!< |         | little endian
  95   #   I!< i!<       |         | (available since Ruby 1.9.3)
  96   #   Q< q< Q!< q!< |         | "S<" is the same as "v"
  97   #   J< j< J!< j!< |         | "L<" is the same as "V"
  98   #                 |         |
  99   #   n             | Integer | 16-bit unsigned, network (big-endian) byte order
 100   #   N             | Integer | 32-bit unsigned, network (big-endian) byte order
 101   #   v             | Integer | 16-bit unsigned, VAX (little-endian) byte order
 102   #   V             | Integer | 32-bit unsigned, VAX (little-endian) byte order
 103   #                 |         |
 104   #   U             | Integer | UTF-8 character
 105   #   w             | Integer | BER-compressed integer
 106   #
 107   #   Float        | Array   |
 108   #   Directive    | Element | Meaning
 109   #   ---------------------------------------------------------------------------
 110   #   D d          | Float   | double-precision, native format
 111   #   F f          | Float   | single-precision, native format
 112   #   E            | Float   | double-precision, little-endian byte order
 113   #   e            | Float   | single-precision, little-endian byte order
 114   #   G            | Float   | double-precision, network (big-endian) byte order
 115   #   g            | Float   | single-precision, network (big-endian) byte order
 116   #
 117   #   String       | Array   |
 118   #   Directive    | Element | Meaning
 119   #   ---------------------------------------------------------------------------
 120   #   A            | String  | arbitrary binary string (space padded, count is width)
 121   #   a            | String  | arbitrary binary string (null padded, count is width)
 122   #   Z            | String  | same as ``a'', except that null is added with *
 123   #   B            | String  | bit string (MSB first)
 124   #   b            | String  | bit string (LSB first)
 125   #   H            | String  | hex string (high nibble first)
 126   #   h            | String  | hex string (low nibble first)
 127   #   u            | String  | UU-encoded string
 128   #   M            | String  | quoted printable, MIME encoding (see also RFC2045)
 129   #                |         | (text mode but input must use LF and output LF)
 130   #   m            | String  | base64 encoded string (see RFC 2045)
 131   #                |         | (if count is 0, no line feed are added, see RFC 4648)
 132   #                |         | (count specifies input bytes between each LF,
 133   #                |         | rounded down to nearest multiple of 3)
 134   #   P            | String  | pointer to a structure (fixed-length string)
 135   #   p            | String  | pointer to a null-terminated string
 136   #
 137   #   Misc.        | Array   |
 138   #   Directive    | Element | Meaning
 139   #   ---------------------------------------------------------------------------
 140   #   @            | ---     | moves to absolute position
 141   #   X            | ---     | back up a byte
 142   #   x            | ---     | null byte
 143   def pack(fmt, buffer: nil)
 144     Primitive.pack_pack(fmt, buffer)
 145   end
 146 end
 147
 148 class String
 149   # call-seq:
 150   #    str.unpack(format)    ->  anArray
 151   #    str.unpack(format, offset: anInteger)    ->  anArray
 152   #
 153   # Decodes <i>str</i> (which may contain binary data) according to the
 154   # format string, returning an array of each value extracted.
 155   # The format string consists of a sequence of single-character directives,
 156   # summarized in the table at the end of this entry.
 157   # Each directive may be followed
 158   # by a number, indicating the number of times to repeat with this
 159   # directive. An asterisk (``<code>*</code>'') will use up all
 160   # remaining elements. The directives <code>sSiIlL</code> may each be
 161   # followed by an underscore (``<code>_</code>'') or
 162   # exclamation mark (``<code>!</code>'') to use the underlying
 163   # platform's native size for the specified type; otherwise, it uses a
 164   # platform-independent consistent size. Spaces are ignored in the
 165   # format string.
 166   #
 167   # See also String#unpack1,  Array#pack.
 168   #
 169   #    "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
 170   #    "abc \0\0".unpack('a3a3')           #=> ["abc", " \000\000"]
 171   #    "abc \0abc \0".unpack('Z*Z*')       #=> ["abc ", "abc "]
 172   #    "aa".unpack('b8B8')                 #=> ["10000110", "01100001"]
 173   #    "aaa".unpack('h2H2c')               #=> ["16", "61", 97]
 174   #    "\xfe\xff\xfe\xff".unpack('sS')     #=> [-2, 65534]
 175   #    "now=20is".unpack('M*')             #=> ["now is"]
 176   #    "whole".unpack('xax2aX2aX1aX2a')    #=> ["h", "e", "l", "l", "o"]
 177   #
 178   # This table summarizes the various formats and the Ruby classes
 179   # returned by each.
 180   #
 181   #  Integer       |         |
 182   #  Directive     | Returns | Meaning
 183   #  ------------------------------------------------------------------
 184   #  C             | Integer | 8-bit unsigned (unsigned char)
 185   #  S             | Integer | 16-bit unsigned, native endian (uint16_t)
 186   #  L             | Integer | 32-bit unsigned, native endian (uint32_t)
 187   #  Q             | Integer | 64-bit unsigned, native endian (uint64_t)
 188   #  J             | Integer | pointer width unsigned, native endian (uintptr_t)
 189   #                |         |
 190   #  c             | Integer | 8-bit signed (signed char)
 191   #  s             | Integer | 16-bit signed, native endian (int16_t)
 192   #  l             | Integer | 32-bit signed, native endian (int32_t)
 193   #  q             | Integer | 64-bit signed, native endian (int64_t)
 194   #  j             | Integer | pointer width signed, native endian (intptr_t)
 195   #                |         |
 196   #  S_ S!         | Integer | unsigned short, native endian
 197   #  I I_ I!       | Integer | unsigned int, native endian
 198   #  L_ L!         | Integer | unsigned long, native endian
 199   #  Q_ Q!         | Integer | unsigned long long, native endian (ArgumentError
 200   #                |         | if the platform has no long long type.)
 201   #  J!            | Integer | uintptr_t, native endian (same with J)
 202   #                |         |
 203   #  s_ s!         | Integer | signed short, native endian
 204   #  i i_ i!       | Integer | signed int, native endian
 205   #  l_ l!         | Integer | signed long, native endian
 206   #  q_ q!         | Integer | signed long long, native endian (ArgumentError
 207   #                |         | if the platform has no long long type.)
 208   #  j!            | Integer | intptr_t, native endian (same with j)
 209   #                |         |
 210   #  S> s> S!> s!> | Integer | same as the directives without ">" except
 211   #  L> l> L!> l!> |         | big endian
 212   #  I!> i!>       |         |
 213   #  Q> q> Q!> q!> |         | "S>" is the same as "n"
 214   #  J> j> J!> j!> |         | "L>" is the same as "N"
 215   #                |         |
 216   #  S< s< S!< s!< | Integer | same as the directives without "<" except
 217   #  L< l< L!< l!< |         | little endian
 218   #  I!< i!<       |         |
 219   #  Q< q< Q!< q!< |         | "S<" is the same as "v"
 220   #  J< j< J!< j!< |         | "L<" is the same as "V"
 221   #                |         |
 222   #  n             | Integer | 16-bit unsigned, network (big-endian) byte order
 223   #  N             | Integer | 32-bit unsigned, network (big-endian) byte order
 224   #  v             | Integer | 16-bit unsigned, VAX (little-endian) byte order
 225   #  V             | Integer | 32-bit unsigned, VAX (little-endian) byte order
 226   #                |         |
 227   #  U             | Integer | UTF-8 character
 228   #  w             | Integer | BER-compressed integer (see Array#pack)
 229   #
 230   #  Float        |         |
 231   #  Directive    | Returns | Meaning
 232   #  -----------------------------------------------------------------
 233   #  D d          | Float   | double-precision, native format
 234   #  F f          | Float   | single-precision, native format
 235   #  E            | Float   | double-precision, little-endian byte order
 236   #  e            | Float   | single-precision, little-endian byte order
 237   #  G            | Float   | double-precision, network (big-endian) byte order
 238   #  g            | Float   | single-precision, network (big-endian) byte order
 239   #
 240   #  String       |         |
 241   #  Directive    | Returns | Meaning
 242   #  -----------------------------------------------------------------
 243   #  A            | String  | arbitrary binary string (remove trailing nulls and ASCII spaces)
 244   #  a            | String  | arbitrary binary string
 245   #  Z            | String  | null-terminated string
 246   #  B            | String  | bit string (MSB first)
 247   #  b            | String  | bit string (LSB first)
 248   #  H            | String  | hex string (high nibble first)
 249   #  h            | String  | hex string (low nibble first)
 250   #  u            | String  | UU-encoded string
 251   #  M            | String  | quoted-printable, MIME encoding (see RFC2045)
 252   #  m            | String  | base64 encoded string (RFC 2045) (default)
 253   #               |         | base64 encoded string (RFC 4648) if followed by 0
 254   #  P            | String  | pointer to a structure (fixed-length string)
 255   #  p            | String  | pointer to a null-terminated string
 256   #
 257   #  Misc.        |         |
 258   #  Directive    | Returns | Meaning
 259   #  -----------------------------------------------------------------
 260   #  @            | ---     | skip to the offset given by the length argument
 261   #  X            | ---     | skip backward one byte
 262   #  x            | ---     | skip forward one byte
 263   #
 264   # The keyword <i>offset</i> can be given to start the decoding after skipping
 265   # the specified amount of bytes:
 266   #   "abc".unpack("C*") # => [97, 98, 99]
 267   #   "abc".unpack("C*", offset: 2) # => [99]
 268   #   "abc".unpack("C*", offset: 4) # => offset outside of string (ArgumentError)
 269   #
 270   # HISTORY
 271   #
 272   # * J, J! j, and j! are available since Ruby 2.3.
 273   # * Q_, Q!, q_, and q! are available since Ruby 2.1.
 274   # * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
 275   def unpack(fmt, offset: 0)
 276     Primitive.pack_unpack(fmt, offset)
 277   end
 278
 279   # call-seq:
 280   #    str.unpack1(format)    ->  obj
 281   #    str.unpack1(format, offset: anInteger)    ->  obj
 282   #
 283   # Decodes <i>str</i> (which may contain binary data) according to the
 284   # format string, returning the first value extracted.
 285   #
 286   # See also String#unpack, Array#pack.
 287   #
 288   # Contrast with String#unpack:
 289   #
 290   #    "abc \0\0abc \0\0".unpack('A6Z6')   #=> ["abc", "abc "]
 291   #    "abc \0\0abc \0\0".unpack1('A6Z6')  #=> "abc"
 292   #
 293   # In that case data would be lost but often it's the case that the array
 294   # only holds one value, especially when unpacking binary data. For instance:
 295   #
 296   #    "\xff\x00\x00\x00".unpack("l")         #=>  [255]
 297   #    "\xff\x00\x00\x00".unpack1("l")        #=>  255
 298   #
 299   # Thus unpack1 is convenient, makes clear the intention and signals
 300   # the expected return value to those reading the code.
 301   #
 302   # The keyword <i>offset</i> can be given to start the decoding after skipping
 303   # the specified amount of bytes:
 304   #   "abc".unpack1("C*") # => 97
 305   #   "abc".unpack1("C*", offset: 2) # => 99
 306   #   "abc".unpack1("C*", offset: 4) # => offset outside of string (ArgumentError)
 307   #
 308   def unpack1(fmt, offset: 0)
 309     Primitive.pack_unpack1(fmt, offset)
 310   end
 311 end