Optimize cstr_reset() to only reset string to empty, not call free() and later malloc()
authorKirill Smelkov <kirr@navytux.spb.ru>
Fri, 21 Dec 2012 08:21:59 +0000 (12:21 +0400)
committerKirill Smelkov <kirr@navytux.spb.ru>
Fri, 21 Dec 2012 16:46:26 +0000 (20:46 +0400)
commit8eb92e605200b1fe8d570ad309e28245c3f1af0a
tree06e451e132f7f8b6d6c307deb3d745afa1580426
parente79281f58ec302e8cc9dfc7d00e06f426fcd2acd
Optimize cstr_reset() to only reset string to empty, not call free() and later malloc()

A CString could be reset to empty just setting its .size to 0.

If memory was already allocated, that would be remembered in
.data_allocated and .size_allocated and on consequent string
manipulations that memory will be used without immediate need to call
malloc().

For

    $ ./tcc -B. -bench -DONE_SOURCE -DCONFIG_MULTIARCHDIR=\"i386-linux-gnu\" -c tcc.c

after the patch malloc/free are called less often:

(tcc is run in loop; perf record -a sleep 10 && perf report)
before:

 # Overhead      Command       Shared Object                                      Symbol
 # ........  ...........  ..................  ..........................................
 #
     13.89%          tcc  tcc                 [.] next_nomacro1
      4.73%          tcc  libc-2.13.so        [.] _int_malloc
      4.39%          tcc  tcc                 [.] next
      2.94%          tcc  tcc                 [.] tok_str_add2
      2.78%          tcc  tcc                 [.] macro_subst_tok
      2.75%          tcc  libc-2.13.so        [.] free
      2.74%          tcc  tcc                 [.] macro_subst
      2.63%          tcc  libc-2.13.so        [.] _int_free
      2.28%          tcc  tcc                 [.] vswap
      2.24%          tcc  tcc                 [.] next_nomacro_spc
      2.06%          tcc  libc-2.13.so        [.] realloc
      2.00%          tcc  libc-2.13.so        [.] malloc
      1.99%          tcc  tcc                 [.] unary
      1.85%          tcc  libc-2.13.so        [.] __i686.get_pc_thunk.bx
      1.76%  kworker/0:1  [kernel.kallsyms]   [k] delay_tsc
      1.70%          tcc  tcc                 [.] next_nomacro
      1.62%          tcc  tcc                 [.] preprocess
      1.41%          tcc  libc-2.13.so        [.] __memcmp_ssse3
      1.38%          tcc  [kernel.kallsyms]   [k] memset
      1.10%          tcc  tcc                 [.] g
      1.06%          tcc  tcc                 [.] parse_btype
      1.05%          tcc  tcc                 [.] sym_push2
      1.04%          tcc  libc-2.13.so        [.] _int_realloc
      1.00%          tcc  libc-2.13.so        [.] malloc_consolidate

after:

 # Overhead      Command       Shared Object                                          Symbol
 # ........  ...........  ..................  ..............................................
 #
     15.26%          tcc  tcc                 [.] next_nomacro1
      5.07%          tcc  libc-2.13.so        [.] _int_malloc
      4.62%          tcc  tcc                 [.] next
      3.22%          tcc  tcc                 [.] tok_str_add2
      3.03%          tcc  tcc                 [.] macro_subst_tok
      3.02%          tcc  tcc                 [.] macro_subst
      2.59%          tcc  tcc                 [.] next_nomacro_spc
      2.44%          tcc  tcc                 [.] vswap
      2.39%          tcc  libc-2.13.so        [.] _int_free
      2.28%          tcc  libc-2.13.so        [.] free
      2.22%          tcc  tcc                 [.] unary
      2.07%          tcc  libc-2.13.so        [.] realloc
      1.97%          tcc  libc-2.13.so        [.] malloc
      1.70%          tcc  tcc                 [.] preprocess
      1.69%          tcc  libc-2.13.so        [.] __i686.get_pc_thunk.bx
      1.68%          tcc  tcc                 [.] next_nomacro
      1.59%          tcc  [kernel.kallsyms]   [k] memset
      1.55%          tcc  libc-2.13.so        [.] __memcmp_ssse3
      1.22%          tcc  tcc                 [.] parse_comment
      1.11%          tcc  tcc                 [.] g
      1.11%          tcc  tcc                 [.] sym_push2
      1.10%          tcc  tcc                 [.] parse_btype
      1.10%          tcc  libc-2.13.so        [.] _int_realloc
      1.06%          tcc  tcc                 [.] vsetc
      0.98%          tcc  libc-2.13.so        [.] malloc_consolidate

and this gains small speedup for tcc:

    # best of 5 runs
    before: 8268 idents, 47191 lines, 1526670 bytes, 0.153 s, 307997 lines/s, 10.0 MB/s
    after:  8268 idents, 47203 lines, 1526763 bytes, 0.148 s, 319217 lines/s, 10.3 MB/s
tcc.h
tccpp.c