All about fanda
All about fanda
GLIBC2.23 malloc内存分配原理分析

  懂我的人都知道,我很喜欢直接上手调试,对于malloc,我们也用这种方式亲手一步步观察其内存管理的逻辑原理,因此首先我们写一个简单的hello,world式程序,然后用glibc2.23的源码参考配合gdb分析:

#include <stdio.h>
#include <stdlib.h>

int main()
{
    malloc(0x100);
    malloc(0x20);

    return 0;
}

  首先malloc.c的5221行可以看到别名的定义:

strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)

  因此malloc函数主体的源码如下:

void *
__libc_malloc (size_t bytes)
{
  mstate ar_ptr;
  void *victim;

  void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
  if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));

  arena_get (ar_ptr, bytes);

  victim = _int_malloc (ar_ptr, bytes);
  /* Retry with another arena only if we were able to find a usable arena
     before.  */
  if (!victim && ar_ptr != NULL)
    {
      LIBC_PROBE (memory_malloc_retry, 1, bytes);
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }

  if (ar_ptr != NULL)
    (void) mutex_unlock (&ar_ptr->mutex);

  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
          ar_ptr == arena_for_chunk (mem2chunk (victim)));
  return victim;
}

  首先定义了一个函数指针hook取得__malloc_hook区域的内容,然后如果hook != 0,就会调用hook函数(这也是攻击__malloc_hook思路的由来),否则继续往下执行,看arena_get的定义:

/* arena_get() acquires an arena and locks the corresponding mutex.
   First, try the one last locked successfully by this thread.  (This
   is the common case and handled with a macro for speed.)  Then, loop
   once over the circularly linked list of arenas.  If no arena is
   readily available, create a new one.  In this latter case, `size'
   is just a hint as to how much memory will be required immediately
   in the new arena. */
#define arena_get(ptr, size) do { \
      ptr = thread_arena;                                                     \
      arena_lock (ptr, size);                                                 \
  } while (0)

#define arena_lock(ptr, size) do {                                            \
      if (ptr && !arena_is_corrupt (ptr))                                     \
        (void) mutex_lock (&ptr->mutex);                                      \
      else                                                                    \
        ptr = arena_get2 ((size), NULL);                                      \
  } while (0)

  thread_arena的定义如下:

//include/malloc.h
typedef struct malloc_state *mstate;
//**************************************

//malloc/malloc.c
struct malloc_state
{
  /* Serialize access.  */
  mutex_t mutex;

  /* Flags (formerly in max_fast).  */
  int flags;

  /* Fastbins */
  mfastbinptr fastbinsY[NFASTBINS];

  /* Base of the topmost chunk -- not otherwise kept in a bin */
  mchunkptr top;

  /* The remainder from the most recent split of a small request */
  mchunkptr last_remainder;

  /* Normal bins packed as described above */
  mchunkptr bins[NBINS * 2 - 2];

  /* Bitmap of bins */
  unsigned int binmap[BINMAPSIZE];

  /* Linked list */
  struct malloc_state *next;

  /* Linked list for free arenas.  Access to this field is serialized
     by free_list_lock in arena.c.  */
  struct malloc_state *next_free;

  /* Number of threads attached to this arena.  0 if the arena is on
     the free list.  Access to this field is serialized by
     free_list_lock in arena.c.  */
  INTERNAL_SIZE_T attached_threads;

  /* Memory allocated from the system in this arena.  */
  INTERNAL_SIZE_T system_mem;
  INTERNAL_SIZE_T max_system_mem;
};
//**************************************

//malloc/arena.c
static __thread mstate thread_arena attribute_tls_model_ie;

  所以在这里我先讲解一下这个arena,根据注释,每次malloc时都会获取当前线程的arena,也就是thread_arena,其结构就是malloc_state,malloc_state的定义又由一系列互斥数组链表等组成。mchunkptr定义如下:

typedef struct malloc_chunk* mchunkptr;
typedef struct malloc_chunk *mfastbinptr;

struct malloc_chunk {

  INTERNAL_SIZE_T      prev_size;  /* Size of previous chunk (if free).  */
  INTERNAL_SIZE_T      size;       /* Size in bytes, including overhead. */

  struct malloc_chunk* fd;         /* double links -- used only if free. */
  struct malloc_chunk* bk;

  /* Only used for large blocks: pointer to next larger size.  */
  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
  struct malloc_chunk* bk_nextsize;
};

  代表每个线程堆块情况的malloc_state除去开头的互斥体和flags,接下来就是fastbinY数组,顾名思义其保存了fastbins,然后是top堆块,普通堆块bins和一个binmap,在对malloc分析时如果不深入系统原理的话我们只需要关注fastbins,top和bins这些堆块就行了。根据malloc_chunk的结构定义和注释,每个堆块在内存中形态如下:

    An allocated chunk looks like this:

    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |             Size of previous chunk, if allocated            | |
            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |             Size of chunk, in bytes                       |M|P|
      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |             User data starts here...                          .
            .                                                               .
            .             (malloc_usable_size() bytes)                      .
            .                                                               |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |             Size of chunk                                     |
            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

    Free chunks are stored in circular doubly-linked lists, and look like this:

    chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |             Size of previous chunk                            |
            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    `head:' |             Size of chunk, in bytes                         |P|
      mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |             Forward pointer to next chunk in list             |
            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |             Back pointer to previous chunk in list            |
            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
            |             Unused space (may be 0 bytes long)                .
            .                                                               .
            .                                                               |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    `foot:' |             Size of chunk, in bytes                           |
            +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

  为了方便理解,先放一张内存快照:

pwndbg> x/20xg 0x602000
0x602000:   0x0000000000000000  0x0000000000000111
0x602010:   0x0000000000000000  0x0000000000000000
0x602020:   0x0000000000000000  0x0000000000000000
0x602030:   0x0000000000000000  0x0000000000000000
0x602040:   0x0000000000000000  0x0000000000000000
0x602050:   0x0000000000000000  0x0000000000000000
0x602060:   0x0000000000000000  0x0000000000000000
0x602070:   0x0000000000000000  0x0000000000000000
0x602080:   0x0000000000000000  0x0000000000000000
0x602090:   0x0000000000000000  0x0000000000000000
pwndbg>
0x6020a0:   0x0000000000000000  0x0000000000000000
0x6020b0:   0x0000000000000000  0x0000000000000000
0x6020c0:   0x0000000000000000  0x0000000000000000
0x6020d0:   0x0000000000000000  0x0000000000000000
0x6020e0:   0x0000000000000000  0x0000000000000000
0x6020f0:   0x0000000000000000  0x0000000000000000
0x602100:   0x0000000000000000  0x0000000000000000
0x602110:   0x0000000000000000  0x0000000000000031
0x602120:   0x0000000000000000  0x0000000000000000
0x602130:   0x0000000000000000  0x0000000000000000
pwndbg>

  这是执行了malloc(0x100);malloc(0x20);后的结果,可以看到对于64位系统(小端排序),前8个字节为Size of previous chunk,但在里因为没有前一个堆块(事实上如果存在pre_size,前一个堆块也得是freed状态)所以pre_size为0,第二个8字节为0x111,经过了计算,malloc的参数0x100+align_size+标志位。其中align_size和标识位是怎么来的我们在后续讲解。将这个0x100堆块free之后状态如下:

pwndbg> x/20xg 0x602000
0x602000:   0x0000000000000000  0x0000000000000111
0x602010:   0x00007ffff7dd1b78  0x00007ffff7dd1b78
0x602020:   0x0000000000000000  0x0000000000000000
0x602030:   0x0000000000000000  0x0000000000000000
0x602040:   0x0000000000000000  0x0000000000000000
0x602050:   0x0000000000000000  0x0000000000000000
0x602060:   0x0000000000000000  0x0000000000000000
0x602070:   0x0000000000000000  0x0000000000000000
0x602080:   0x0000000000000000  0x0000000000000000
0x602090:   0x0000000000000000  0x0000000000000000
pwndbg>
0x6020a0:   0x0000000000000000  0x0000000000000000
0x6020b0:   0x0000000000000000  0x0000000000000000
0x6020c0:   0x0000000000000000  0x0000000000000000
0x6020d0:   0x0000000000000000  0x0000000000000000
0x6020e0:   0x0000000000000000  0x0000000000000000
0x6020f0:   0x0000000000000000  0x0000000000000000
0x602100:   0x0000000000000000  0x0000000000000000
0x602110:   0x0000000000000110  0x0000000000000030
0x602120:   0x0000000000000000  0x0000000000000000
0x602130:   0x0000000000000000  0x0000000000000000
pwndbg>

  可以看到0x100堆块的pre_size仍然为0,理由如上。而chunk的fd和bk指针被放上了两个奇怪的数据,但fd_nextsize和bk_nextsize没有(理由见malloc_chunk结构体定义的注释),原理我们也是后续讲解,比较显眼的是第二个0x20堆块的pre_size被置位了!而且本身的size从0x31变为了0x30!

  之前我们说过pre_size保存的是上一个堆块的size,所以在这里被置位非常容易理解,至于size里的标志位P,其义为pre_inuse,其原理我们先看看注释解释:

​ The P (PREV_INUSE) bit, stored in the unused low-order bit of the
​ chunk size (which is always a multiple of two words), is an in-use
​ bit for the previous chunk. If that bit is clear, then the
​ word before the current chunk size contains the previous chunk
​ size, and can be used to find the front of the previous chunk.
​ The very first chunk allocated always has this bit set,
​ preventing access to non-existent (or non-owned) memory. If
​ prev_inuse is set for any given chunk, then you CANNOT determine
​ the size of the previous chunk, and might even get a memory
​ addressing fault when trying to do so.

  也就是说这个P标志位代表的不是chunk的size,而是一种flag,其意义就是上一个堆块是否为freed状态。如果是,P标志位就会被置位为0,不是就会置位为1,0x20堆块的P标志位被清空则意味着上一个堆块为freed状态(在我们的情况中正是如此)。


上手跟踪

  现在我们已经大致认识到了malloc的堆块在内存中的结构,接下来我们就开始从源码角度分析内存分配的原理:

void *
__libc_malloc (size_t bytes)
{
  mstate ar_ptr;
  void *victim;

  void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
  if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));

  其实上一节我没有提及的是,第一次使用malloc函数时,__malloc_hook里其实是有一个函数的,叫malloc_hook_ini,其定义如下:

static void *
malloc_hook_ini (size_t sz, const void *caller)
{
  __malloc_hook = NULL;
  ptmalloc_init ();
  return __libc_malloc (sz);
}

  该函数首先把__mallochook置0防止再次调用(因为返回地址是\_libc_malloc,然后调用了ptmalloc_init()这个做初始化工作的主体函数:

static void
ptmalloc_init (void)
{
  if (__malloc_initialized >= 0)
    return;

  __malloc_initialized = 0;

#ifdef SHARED
  /* In case this libc copy is in a non-default namespace, never use brk.
     Likewise if dlopened from statically linked program.  */
  Dl_info di;
  struct link_map *l;

  if (_dl_open_hook != NULL
      || (_dl_addr (ptmalloc_init, &di, &l, NULL) != 0
          && l->l_ns != LM_ID_BASE))
    __morecore = __failing_morecore;
#endif

  对于_dl_addr这个函数不需要太过深入了解,只要看注释就行了,其作用就是检测主程序是否是动态链接还是静态链接并以此来决定是否使用brk内配器。我们继续往下看:

  thread_arena = &main_arena;
  thread_atfork (ptmalloc_lock_all, ptmalloc_unlock_all, ptmalloc_unlock_all2);
  const char *s = NULL;
  if (__glibc_likely (_environ != NULL))
    {
      char **runp = _environ;
      char *envline;

      while (__builtin_expect ((envline = next_env_entry (&runp)) != NULL,
                               0))
        {
          size_t len = strcspn (envline, "=");

          if (envline[len] != '=')
            /* This is a "MALLOC_" variable at the end of the string
               without a '=' character.  Ignore it since otherwise we
               will access invalid memory below.  */
            continue;

          switch (len)
            {
              //....
            }
      }
      if (s && s[0])
      {
        //...
      }
      void (*hook) (void) = atomic_forced_read (__malloc_initialize_hook);
      if (hook != NULL)
         (*hook)();
      __malloc_initialized = 1;
}

  后来把threadarena设置为了main_arena(但同样此时arena为空,里面没有内容!)的地址后对环境变量有一个遍历,试图找到"MALLOC_",然而我跟踪的时候并没有,所以这个while循环里的所有代码都不会得到执行因此都省略了2333,还有后面的一个if也不满足条件,然后对\_malloc_initializehook区域有一个判断,类似一开始的__malloc_hook,如果有内容,就调用之(pwn手们有新思路了吗),最后把__malloc_initialized置1返回,流程回到\_libc_malloc:

  void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
  if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));

  arena_get (ar_ptr, bytes);

  victim = _int_malloc (ar_ptr, bytes);
  /* Retry with anothe

  arena_get定义如下:

#define arena_get(ptr, size) do { \
      ptr = thread_arena;                                                     \
      arena_lock (ptr, size);                                                 \
  } while (0)

  ar_ptr被设置为了thread_arena,thread_arena我们之前看到过已经被赋值为了main_arena的地址,然后调用了_int_malloc,事实上这个才是堆块分配的主要逻辑:

static void *
_int_malloc (mstate av, size_t bytes)
{
  INTERNAL_SIZE_T nb;               /* normalized request size */
  unsigned int idx;                 /* associated bin index */
  mbinptr bin;                      /* associated bin */

  mchunkptr victim;                 /* inspected/selected chunk */
  INTERNAL_SIZE_T size;             /* its size */
  int victim_index;                 /* its bin index */

  mchunkptr remainder;              /* remainder from a split */
  unsigned long remainder_size;     /* its size */

  unsigned int block;               /* bit map traverser */
  unsigned int bit;                 /* bit map traverser */
  unsigned int map;                 /* current word of binmap */

  mchunkptr fwd;                    /* misc temp for linking */
  mchunkptr bck;                    /* misc temp for linking */

  const char *errstr = NULL;

  /*
     Convert request size to internal form by adding SIZE_SZ bytes
     overhead plus possibly more to obtain necessary alignment and/or
     to obtain a size of at least MINSIZE, the smallest allocatable
     size. Also, checked_request2size traps (returning 0) request sizes
     that are so large that they wrap around zero when padded and
     aligned.
   */

  checked_request2size (bytes, nb);

  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
     mmap.  */
  if (__glibc_unlikely (av == NULL))
    {
      void *p = sysmalloc (nb, av);
      if (p != NULL)
        alloc_perturb (p, bytes);
      return p;
    }

  函数非常大,我们慢慢分析,该函数接受两个参数,一个是malloc参数size,还有一个就是之前去的的thread_arena,checked_request2size定义如下:

#define checked_request2size(req, sz)                             \
  if (REQUEST_OUT_OF_RANGE (req)) {                                           \
      __set_errno (ENOMEM);                                                   \
      return 0;                                                               \
    }                                                                         \
  (sz) = request2size (req);

#define REQUEST_OUT_OF_RANGE(req)                                 \
  ((unsigned long) (req) >=                                                   \
   (unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))

#define request2size(req)                                         \
  (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
   MINSIZE :                                                      \
   ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)

#define MINSIZE  \
  (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK))
#define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
#define MALLOC_ALIGN_MASK      (MALLOC_ALIGNMENT - 1)
#define SIZE_SZ                (sizeof(INTERNAL_SIZE_T))

#define offsetof(type,ident) ((size_t)&(((type*)0)->ident))

  定义有些多,req就是我们申请的大小,sz就是经过计算或者说经过对齐的最终申请大小。其中的 INTERNAL_SIZE_T 其实就是size_t,SIZE_SZ在64位下是8,MALLOC_ALIGNMENT也就是0x10,因此MALLOC_ALIGN_MASK就是0xF。可以看如下源码注释:

/
MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks.
It must be a power of two at least 2
SIZE_SZ, even on machines
for which smaller alignments would suffice. It may be defined as
larger than this though. Note however that code and data structures
are optimized for the case of 8-byte alignment.
*/

  offsetof是一个非常巧妙的宏,可以获得任何结构体中的成员的偏移。因此MIN_CHUNK_SIZE是fd_nextsize在malloc_chunk结构中的偏移,也就是0x20。MINSIZE(64位下)就是0x20,所以request2size宏就可以理解了,根据这个算法,如果申请一个0x29~0x38大小的堆块,最终会返还一个0x40大小的堆块,而0x39~0x48,就会返还一个0x50大小的堆块,以此类推。因为& ~MALLOC_ALIGN_MASK这个对齐掩码的操作,不可能出现最后4个bit不为0的情况,都是0x10的整数倍。这时候我们已经得到正确的堆块大小了,继续往下看:

  checked_request2size (bytes, nb);

  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
     mmap.  */
  if (__glibc_unlikely (av == NULL))
    {
      //...
    }

  /*
     If the size qualifies as a fastbin, first check corresponding bin.
     This code is safe to execute even if av is not yet initialized, so we
     can try it without checking, which saves some time on this fast path.
   */

  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
  {
        //...
  }

  事实上,因为我们是第一次分配,所以get_max_fast()返回的是0,因此无论第一次我们分配什么大小的堆块都不可能进入这个if。接下来我们先看看如下定义:

#define in_smallbin_range(sz)  \
  ((unsigned long) (sz) < (unsigned long) MIN_LARGE_SIZE)

#define NBINS             128
#define NSMALLBINS         64
#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
#define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)

  我们已经知道MALLOC_ALIGNMENT为0x10,所以MIN_LARGE_SIZE可以很轻易的计算得出是0x400,我们申请的堆块经过计算为0x110,因此满足这个smallbin的条件,看如下gdb跟踪信息:

 R15  0x0
 RBP  0x110
 RSP  0x7fffffffdb10 —▸ 0x7ffff7a92810 (ptmalloc_init) ◂— mov    eax, dword ptr [rip + 0x33e92e]
 RIP  0x7ffff7a8ec40 (_int_malloc+192) ◂— cmp    rbp, 0x3ff
────────────────────────────────────────[ DISASM ]─────────────────────────────────────────
   0x7ffff7a8ebb9 <_int_malloc+57>     test   rdi, rdi
   0x7ffff7a8ebbc <_int_malloc+60>     mov    qword ptr [rsp + 8], rsi
   0x7ffff7a8ebc1 <_int_malloc+65>     je     _int_malloc+2213 <0x7ffff7a8f425>

   0x7ffff7a8ebc7 <_int_malloc+71>     cmp    rbp, qword ptr [rip + 0x344c2a] <0x7ffff7dd37f8>
   0x7ffff7a8ebce <_int_malloc+78>     ja     _int_malloc+192 <0x7ffff7a8ec40>
    ↓
 ► 0x7ffff7a8ec40 <_int_malloc+192>    cmp    rbp, 0x3ff
   0x7ffff7a8ec47 <_int_malloc+199>    ja     _int_malloc+294 <0x7ffff7a8eca6>

   0x7ffff7a8ec49 <_int_malloc+201>    mov    eax, ebp
   0x7ffff7a8ec4b <_int_malloc+203>    shr    eax, 4

  cmp rbp,0x3ff就是smallbin比较(经过编译器优化),我们继续看源码:

  if (in_smallbin_range (nb))
    {
      idx = smallbin_index (nb);
      bin = bin_at (av, idx);

      if ((victim = last (bin)) != bin)
        {

  进入if循环后,还有几个宏操作,我们先来看其定义:

#define smallbin_index(sz) \
  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
   + SMALLBIN_CORRECTION)
#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)

  在64位系统下SMALLBIN_WIDTH就是16,因此这个smallbin_index返回的就是0x110>>4=0x11,bin_at定义如下:

#define bin_at(m, i) \
  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))                           \
             - offsetof (struct malloc_chunk, fd))

  m为传入的参数av,也就是malloc_state结构的main_arena,bins就是里面的普通堆块,简言之,就是计算得到了我们申请的堆块的大小应该在bins数组里的位置,然后减去偏移,猜测应该是让指针指向堆块的头部而不是fd的位置(程序中malloc返回的指针都是指向的fd,因为数据从fd开始写入,而不是从头部,因为这样输入会覆盖pre_size和size)。mbinptr和last的定义如下:

typedef struct malloc_chunk *mbinptr;

#define last(b)      ((b)->bk)

  所以其根据index得到bins里的堆块后,检查这个堆块的bk指针是否指向自己,在这里因为main_arena都是空的,所以bk指针为0,因此不是指向自己,所以if条件被满足,而且victim作为bk指针的内容,自然也是0,所以malloc_consolidate函数被调用了:

          if (victim == 0) /* initialization check */
            malloc_consolidate (av);
          else
            {
              //...
            }
        }
    }

  malloc_consolidate:

static void malloc_consolidate(mstate av)
{
  mfastbinptr*    fb;                 /* current fastbin being consolidated */
  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
  mchunkptr       p;                  /* current chunk being consolidated */
  mchunkptr       nextp;              /* next chunk to consolidate */
  mchunkptr       unsorted_bin;       /* bin header */
  mchunkptr       first_unsorted;     /* chunk to link to */

  /* These have same use as in free() */
  mchunkptr       nextchunk;
  INTERNAL_SIZE_T size;
  INTERNAL_SIZE_T nextsize;
  INTERNAL_SIZE_T prevsize;
  int             nextinuse;
  mchunkptr       bck;
  mchunkptr       fwd;

  /*
    If max_fast is 0, we know that av hasn't
    yet been initialized, in which case do so below
  */

  if (get_max_fast () != 0) {
    //...
  }
  else {
    malloc_init_state(av);
    check_malloc_state(av);
  }
}

  之前我们提到过,get_max_fast()位置为0,因此进入else内部执行了,先来看malloc_init_state

static void
malloc_init_state (mstate av)
{
  int i;
  mbinptr bin;

  /* Establish circular links for normal bins */
  for (i = 1; i < NBINS; ++i)
    {
      bin = bin_at (av, i);
      bin->fd = bin->bk = bin;
    }

#if MORECORE_CONTIGUOUS
  if (av != &main_arena)
#endif
  set_noncontiguous (av);
  if (av == &main_arena)
    set_max_fast (DEFAULT_MXFAST);
  av->flags |= FASTCHUNKS_BIT;

  av->top = initial_top (av);
}

  相关定义如下:

typedef struct malloc_chunk *mbinptr;
#define NBINS             128
#define set_max_fast(s) \
  global_max_fast = (((s) == 0)                                               \
                     ? SMALLBIN_WIDTH : ((s + SIZE_SZ) & ~MALLOC_ALIGN_MASK))//0x80
#ifndef DEFAULT_MXFAST
#define DEFAULT_MXFAST     (64 * SIZE_SZ / 4)               //128
#endif

#define initial_top(M)              (unsorted_chunks (M))
#define unsorted_chunks(M)          (bin_at (M, 1))

所见即所得

#define unsorted_chunks(M)          (bin_at (M, 1))
#define bin_at(m, i) \
  (mbinptr) (((char *) &((m)->bins[((i) - 1) * 2]))                           \
             - offsetof (struct malloc_chunk, fd))
#define BINMAPSHIFT      5
#define BITSPERMAP       (1U << BINMAPSHIFT)          //0x20
#define BINMAPSIZE       (NBINS / BITSPERMAP)           //4
#define NBINS             128

#define idx2block(i)     ((i) >> BINMAPSHIFT)
#define idx2bit(i)       ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))

for (;; )
    {
      int iters = 0;
      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
        {
          //...
        }
      if (!in_smallbin_range (nb))
        {
          //..
        }
      ++idx;
      bin = bin_at (av, idx);
      block = idx2block (idx);
      map = av->binmap[block];
      bit = idx2bit (idx);

      for (;; )
        {
          /* Skip rest of block if there are no more set bits in this block.  */
          if (bit > map || bit == 0)
            {
              do
                {
                  if (++block >= BINMAPSIZE) /* out of bins */
                    goto use_top;
                }
              while ((map = av->binmap[block]) == 0);

  因为在这里我们是第一次分配堆块,所以while的判断无法命中,后面的!in_smallbin_range也无法满足(因为我们申请的就是smallbin大小),因此往后就是binmap的操作了,我分析了下每一个binmap block可以"照顾"16个堆块,由(1<<5)/2=0x10得出,因此对于bins这个64个堆块(相当于128个索引)大小的堆块数组,BINMAPSIZE为4正好够用,与程序计算得出的相符合。因为我们的binmap为空,所以bit>map是肯定的(注意bit在这里更像是一个索引,不是0)。接下来的do while循环中,因为我们的binmap为空,所以会循环四次最后goto到了use_top处:

#define chunksize(p)         ((p)->size & ~(SIZE_BITS))

    use_top:
      /*
         If large enough, split off the chunk bordering the end of memory
         (held in av->top). Note that this is in accord with the best-fit
         search rule.  In effect, av->top is treated as larger (and thus
         less well fitting) than any other available chunk since it can
         be extended to be as large as necessary (up to system
         limitations).

         We require that av->top always exists (i.e., has size >=
         MINSIZE) after initialization, so if it would otherwise be
         exhausted by current request, it is replenished. (The main
         reason for ensuring it exists is that we may need MINSIZE space
         to put in fenceposts in sysmalloc.)
       */

      victim = av->top;
      size = chunksize (victim);

      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
        {
          //...
        }
      else if (have_fastchunks (av))
        {
          malloc_consolidate (av);
          /* restore original bin index */
          if (in_smallbin_range (nb))
            idx = smallbin_index (nb);
          else
            idx = largebin_index (nb);
        }

      /*
         Otherwise, relay to handle system-dependent cases
       */
      else
        {
          void *p = sysmalloc (nb, av);
          if (p != NULL)
            alloc_perturb (p, bytes);
          return p;
        }
    }
}

  显然top chunk还未被分配,还只是被初始化为了其在main_arena自身的地址,因此size为0,因此不满足if条件判断,而fastchunks当然也没有,所以最终来到了sysmalloc初始化堆区:

/*
   sysmalloc handles malloc cases requiring more memory from the system.
   On entry, it is assumed that av->top does not have enough
   space to service request for nb bytes, thus requiring that av->top
   be extended or replaced.
 */

static void *
sysmalloc (INTERNAL_SIZE_T nb, mstate av)
{
  mchunkptr old_top;              /* incoming value of av->top */
  INTERNAL_SIZE_T old_size;       /* its size */
  char *old_end;                  /* its end address */

  long size;                      /* arg to first MORECORE or mmap call */
  char *brk;                      /* return value from MORECORE */

  long correction;                /* arg to 2nd MORECORE call */
  char *snd_brk;                  /* 2nd return val */

  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
  char *aligned_brk;              /* aligned offset into brk */

  mchunkptr p;                    /* the allocated/returned chunk */
  mchunkptr remainder;            /* remainder from allocation */
  unsigned long remainder_size;   /* its size */

  size_t pagesize = GLRO (dl_pagesize);
  bool tried_mmap = false;

  /*
     If have mmap, and the request size meets the mmap threshold, and
     the system supports mmap, and there are few enough currently
     allocated mmapped regions, try to directly map this request
     rather than expanding top.
   */

  if (av == NULL
      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
          && (mp_.n_mmaps < mp_.n_mmaps_max)))
    {
      //...
    }
  if (av == NULL)
    return 0;

  /* Record incoming configuration of top */

  old_top = av->top;
  old_size = chunksize (old_top);
  old_end = (char *) (chunk_at_offset (old_top, old_size));

  brk = snd_brk = (char *) (MORECORE_FAILURE);

  /*
     If not the first time through, we require old_size to be
     at least MINSIZE and to have prev_inuse set.
   */

  assert ((old_top == initial_top (av) && old_size == 0) ||
          ((unsigned long) (old_size) >= MINSIZE &&
           prev_inuse (old_top) &&
           ((unsigned long) old_end & (pagesize - 1)) == 0));

  /* Precondition: not enough current space to satisfy nb request */
  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));

  if (av != &main_arena)
    {
      //...
    }

#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
#define chunksize(p)         ((p)->size & ~(SIZE_BITS))

  因为top chunk的大小为0(还未初始化),所以old_top为top在main_arena中的地址,old_size为0,最终old_end经过计算仍然为top的地址。因为av就是main_arena,因此if条件不满足,进入else:

#define NONCONTIGUOUS_BIT     (2U)
#define contiguous(M)          (((M)->flags & NONCONTIGUOUS_BIT) == 0)
//include/libc-internal.h
#define ALIGN_UP(base, size)    ALIGN_DOWN ((base) + (size) - 1, (size))
#define ALIGN_DOWN(base, size)  ((base) & -((__typeof__ (base)) (size)))

else     /* av == main_arena */
  {
      /* Request enough space for nb + pad + overhead */
      size = nb + mp_.top_pad + MINSIZE;

      /*
         If contiguous, we can subtract out existing space that we hope to
         combine with new space. We add it back later only if
         we don't actually get contiguous space.
       */
      if (contiguous (av))
        size -= old_size;

      /*
         Round to a multiple of page size.
         If MORECORE is not contiguous, this ensures that we only call it
         with whole-page arguments.  And if MORECORE is contiguous and
         this is not first time through, this preserves page-alignment of
         previous calls. Otherwise, we correct to page-align below.
       */
      size = ALIGN_UP (size, pagesize);

      if (size > 0)
      {
        brk = (char *) (MORECORE (size));
        LIBC_PROBE (memory_sbrk_more, 2, brk, size);
      }

  我们申请的大小为0x100,因此nb为0x110,size为nb+MINSIZE=0x130,pagesize在这里为0x20000,由sysmalloc函数开头的 size_t pagesize = GLRO (dl_pagesize);得到,__typeof__这个有必要说明一下,这是一个编译拓展,非常类似python里的type函数,只是GCC里这个可以这样用:int a;typeof(a) b;,这样做相当于int a;int b;因此ALIGNUP最终返回的大小为0x21000。然后调用了__morecore函数,其实际上是\_default_morecore:

# define __sbrk  sbrk

void *
__default_morecore (ptrdiff_t increment)
{
  void *result = (void *) __sbrk (increment);
  if (result == (void *) -1)
    return NULL;

  return result;
}

  可以看到最终调用了sbrk函数,这是一个很眼熟的函数,Linux的终极内存分配器2333,其参数就是之前计算得到的0x21000。因为这已经很底层了,再深入一步你都能看到SYSCALL了,所以不细扒了,只要知道其返回值就是得到的堆区的首地址就行了(堆区就是在这个时候出现)。然后控制流回到了sysmalloc,继续往下看:

#define MORECORE_FAILURE 0

      if (brk != (char *) (MORECORE_FAILURE))
        {
          /* Call the `morecore' hook if necessary.  */
          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
          if (__builtin_expect (hook != NULL, 0))
            (*hook)();
        }
      else
        {
          //...
        }

  brk就是返回的堆区的首地址,在调试器下应当为0x602000,MORECORE_FAILURE就是0。所以if其实就是检查sbrk分配内存是否成功(失败会返回0,因此正好进入else),在这里sbrk成功了,所以进入if内部,检查__after_morecore_hook区域是否有函数,有的话就用hook调用之(新思路?),吐槽一句这个函数可真长啊真累,主要是编译器优化的太厉害,控制流乱七八糟的,gdb跟踪的时候要仔细看。不过最后两个大if了,继续看:

      if (brk != (char *) (MORECORE_FAILURE))
        {
          if (mp_.sbrk_base == 0)
            mp_.sbrk_base = brk;
          av->system_mem += size;

          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
            //...

          else if (contiguous (av) && old_size && brk < old_end)
            {
              //...
            }

          /*
             Otherwise, make adjustments:

           * If the first time through or noncontiguous, we need to call sbrk
              just to find out where the end of memory lies.

           * We need to ensure that all returned chunks from malloc will meet
              MALLOC_ALIGNMENT

           * If there was an intervening foreign sbrk, we need to adjust sbrk
              request size to account for fact that we will not be able to
              combine new space with existing space in old_top.

           * Almost all systems internally allocate whole pages at a time, in
              which case we might as well use the whole last page of request.
              So we allocate enough more memory to hit a page boundary now,
              which in turn causes future contiguous calls to page-align.
           */

          else
            {
              front_misalign = 0;
              end_misalign = 0;
              correction = 0;
              aligned_brk = brk;

              /* handle contiguous cases */
              if (contiguous (av))
                {
                  //...
                }
              else
              {
                if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
                    /* MORECORE/mmap must correctly align */
                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
                else
                  {
                    front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
                    if (front_misalign > 0)
                      {
                        /*
                           Skip over some bytes to arrive at an aligned position.
                           We don't need to specially mark these wasted front bytes.
                           They will never be accessed anyway because
                           prev_inuse of av->top (and any chunk created from its start)
                           is always true after initialization.
                         */

                        aligned_brk += MALLOC_ALIGNMENT - front_misalign;
                      }
                  }

                  /* Find out current end of memory */
                  if (snd_brk == (char *) (MORECORE_FAILURE))
                  {
                    snd_brk = (char *) (MORECORE (0));
                  }
              }

  显然控制流会进入这个if,mp_.sbrk_base在这个时候被设置为了sbrk返回的堆区首地址,size为之前分析的0x21000,av->systemmem为0,所以现在被设置为了0x21000。随后会再一次调用MORECORE,而MORECORE实际上就是__morecore,最终调用了\_sbrk,而这一次返回了堆区的末地址。所以snd_brk就是堆区的末地址,在调试器里面一般为0x623000。最后一个大if:

#define set_head(p, s)       ((p)->size = (s))

                if (snd_brk != (char *) (MORECORE_FAILURE))
                {
                  av->top = (mchunkptr) aligned_brk;
                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
                  av->system_mem += correction;

                  /*
                     If not the first time through, we either have a
                     gap due to foreign sbrk or a non-contiguous region.  Insert a
                     double fencepost at old_top to prevent consolidation with space
                     we don't own. These fenceposts are artificial chunks that are
                     marked as inuse and are in any case too small to use.  We need
                     two to make sizes and alignments work out.
                   */

                  if (old_size != 0)
                    {
                      //...
                    }
                 }
              }
         }
     }

  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
    av->max_system_mem = av->system_mem;
  check_malloc_state (av);

  /* finally, do the allocation */
  p = av->top;
  size = chunksize (p);

  /* check that one of the above allocation paths succeeded */
  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
    {
      remainder_size = size - nb;
      remainder = chunk_at_offset (p, nb);
      av->top = remainder;
      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
      set_head (remainder, remainder_size | PREV_INUSE);
      check_malloced_chunk (av, p, nb);
      return chunk2mem (p);
    }

  /* catch all failure paths */
  __set_errno (ENOMEM);
  return 0;
}

  correction在这里实际上为0,所以在这里设置了top堆块的地址(堆区的首地址),然后设置了top堆块的pre_inuse位,最终size上为0x21001,但还没完呢,显然目前堆区的大小是大于我们申请的大小的,还要减去我们申请的堆块大小0x110,所以还要重新设置top堆块的head和申请的堆块的head,这个时候堆区的内存快照已经变为了我们在第一节看到的样子了!_int_malloc也到了末尾了:

      else
        {
          void *p = sysmalloc (nb, av);
          if (p != NULL)
            alloc_perturb (p, bytes);
          return p;
        }
    }
}

  p就是从sysmalloc返回的堆块,也就是我们的一手看大的堆块,最后会调用一下alloc_perturb函数:

static void
alloc_perturb (char *p, size_t n)
{
  if (__glibc_unlikely (perturb_byte))
    memset (p, perturb_byte ^ 0xff, n);
}

  但是perturbbyte未定义所以函数直接返回了。这可能也是malloc返回的堆块会有use-after-free的原因吧2333,接下来控制流回到了\_libc_malloc:

  victim = _int_malloc (ar_ptr, bytes);
  /* Retry with another arena only if we were able to find a usable arena
     before.  */
  if (!victim && ar_ptr != NULL)
    {
      LIBC_PROBE (memory_malloc_retry, 1, bytes);
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }

  if (ar_ptr != NULL)
    (void) mutex_unlock (&ar_ptr->mutex);

  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
          ar_ptr == arena_for_chunk (mem2chunk (victim)));
  return victim;
}

  然后就是对堆块进行一些最终检查后把堆块交到用户手上了。对malloc(0x100);的第一次调用分析完成了。


分配fastbin

  前三节我们已经分析完了对于第一次申请smallbin程序所做的一切工作,那么接下来根据hello,world程序,再分配一个0x20的fastbin会发生什么呢?我们还是从__libc_malloc开始看起:

void *
__libc_malloc (size_t bytes)
{
  mstate ar_ptr;
  void *victim;

  void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
  if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));

  arena_get (ar_ptr, bytes);

  victim = _int_malloc (ar_ptr, bytes);

  这一次,__malloc_hook里已经没有函数需要调用了,因此是0。同样的arena_get取得main_arena地址赋予ar_ptr,然后调用了_int_malloc这个主要分配函数:

static void *
_int_malloc (mstate av, size_t bytes)
{
  INTERNAL_SIZE_T nb;               /* normalized request size */
  unsigned int idx;                 /* associated bin index */
  mbinptr bin;                      /* associated bin */

  mchunkptr victim;                 /* inspected/selected chunk */
  INTERNAL_SIZE_T size;             /* its size */
  int victim_index;                 /* its bin index */

  mchunkptr remainder;              /* remainder from a split */
  unsigned long remainder_size;     /* its size */

  unsigned int block;               /* bit map traverser */
  unsigned int bit;                 /* bit map traverser */
  unsigned int map;                 /* current word of binmap */

  mchunkptr fwd;                    /* misc temp for linking */
  mchunkptr bck;                    /* misc temp for linking */

  const char *errstr = NULL;

  /*
     Convert request size to internal form by adding SIZE_SZ bytes
     overhead plus possibly more to obtain necessary alignment and/or
     to obtain a size of at least MINSIZE, the smallest allocatable
     size. Also, checked_request2size traps (returning 0) request sizes
     that are so large that they wrap around zero when padded and
     aligned.
   */

  checked_request2size (bytes, nb);

  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
     mmap.  */
  if (__glibc_unlikely (av == NULL))
    {
      //...
    }

  首先还是通过checked_request2size这个宏把申请的size转化为实际申请的size。接下来是fastbin链表查询:

  /*
     If the size qualifies as a fastbin, first check corresponding bin.
     This code is safe to execute even if av is not yet initialized, so we
     can try it without checking, which saves some time on this fast path.
   */

  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
    {
      idx = fastbin_index (nb);
      mfastbinptr *fb = &fastbin (av, idx);
      mchunkptr pp = *fb;
      do
        {
          victim = pp;
          if (victim == NULL)
            break;
        }
      while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim))
             != victim);
      if (victim != 0)
        {
          //...
        }
    }

  因为经过第一个堆块的申请,此时get_max_fast()获取的地址已经有了数据,就是0x80。我们申请的大小为0x20,nb就会是0x30,接下来是涉及的一些宏定义:

#define fastbin(ar_ptr, idx) ((ar_ptr)->fastbinsY[idx])

/* offset 2 to use otherwise unindexable first 2 bins */
#define fastbin_index(sz) \
  ((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)

  首先根据size求得index,因为chunk的大小最少为0x20,所以(0x20>>4)-2就是0,正好对应fastbinY数组里的最小堆块的地址,然后是一个do while循环,如果fastbinY里有堆块的话,循环结束后victim指向的应当是链表中最后一个堆块(通过fd指针查找),但是我们的fastbinY数组还未初始化,因此第一次就遇到break退出了,下面的if条件也不成立,因此继续往下执行:

#define smallbin_index(sz) \
  ((SMALLBIN_WIDTH == 16 ? (((unsigned) (sz)) >> 4) : (((unsigned) (sz)) >> 3))\
   + SMALLBIN_CORRECTION)
#define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)     //0

    if (in_smallbin_range (nb))
    {
      idx = smallbin_index (nb);
      bin = bin_at (av, idx);

      if ((victim = last (bin)) != bin)
        {
          //...
        }

    }
  else
    {
      //...
    }

  因为nb(0x30)在smallbin的范围内,而且经过第一个堆块的申请,bins链表已经被初始化,所以if中last(bin)!=bin无法满足,跳出if,继续往下看:

#define unsorted_chunks(M)          (bin_at (M, 1))

  for (;; )
    {
      int iters = 0;
      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
        {
          //...
        }
      if (!in_smallbin_range (nb))
        {
          //...
        }
      ++idx;
      bin = bin_at (av, idx);
      block = idx2block (idx);
      map = av->binmap[block];
      bit = idx2bit (idx);

      for (;; )
        {
          /* Skip rest of block if there are no more set bits in this block.  */
          if (bit > map || bit == 0)
            {
              do
                {
                  if (++block >= BINMAPSIZE) /* out of bins */
                    goto use_top;
                }
              while ((map = av->binmap[block]) == 0);

  与第一次分配堆块类似,binmap都是0,所以bit>map条件必定满足,do while四次循环后跳入use_top:

   use_top:
      /*
         If large enough, split off the chunk bordering the end of memory
         (held in av->top). Note that this is in accord with the best-fit
         search rule.  In effect, av->top is treated as larger (and thus
         less well fitting) than any other available chunk since it can
         be extended to be as large as necessary (up to system
         limitations).

         We require that av->top always exists (i.e., has size >=
         MINSIZE) after initialization, so if it would otherwise be
         exhausted by current request, it is replenished. (The main
         reason for ensuring it exists is that we may need MINSIZE space
         to put in fenceposts in sysmalloc.)
       */

      victim = av->top;
      size = chunksize (victim);

      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
        {
          remainder_size = size - nb;
          remainder = chunk_at_offset (victim, nb);
          av->top = remainder;
          set_head (victim, nb | PREV_INUSE |
                    (av != &main_arena ? NON_MAIN_ARENA : 0));
          set_head (remainder, remainder_size | PREV_INUSE);

          check_malloced_chunk (av, victim, nb);
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }

  top之前已经经过设置,是一个很大的值,我们申请的nb+MINSIZE应该为0x50,所以肯定是可以进入if执行的。接下来的操作非常简单,只需要把top的size设置一下,切割出来就能用了,我们很快就得到了一个fastbin,而largebin的方式我猜测也差不多,所以malloc的部分就讲完了,所以这一个系列完结了:D

发表评论

textsms
account_circle
email

All about fanda

GLIBC2.23 malloc内存分配原理分析
  懂我的人都知道,我很喜欢直接上手调试,对于malloc,我们也用这种方式亲手一步步观察其内存管理的逻辑原理,因此首先我们写一个简单的hello,world式程序,然后用glibc2.23的…
扫描二维码继续阅读
2019-09-15