malloc源码解析

  • glibc2.35

内部结构

heap

/* A heap is a single contiguous memory region holding (coalesceable)
malloc_chunks. It is allocated with mmap() and always starts at an
address aligned to HEAP_MAX_SIZE. */

typedef struct _heap_info
{
mstate ar_ptr; /* Arena for this heap. */
struct _heap_info *prev; /* Previous heap. */
size_t size; /* Current size in bytes. */
size_t mprotect_size; /* Size in bytes that has been mprotected
PROT_READ|PROT_WRITE. */
size_t pagesize; /* Page size used when allocating the arena. */
/* Make sure the following data is properly aligned, particularly
that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of
MALLOC_ALIGNMENT. */
char pad[-3 * SIZE_SZ & MALLOC_ALIGN_MASK];
} heap_info;

chunk

struct malloc_chunk {

INTERNAL_SIZE_T mchunk_prev_size; /* Size of previous chunk (if free). */
INTERNAL_SIZE_T mchunk_size; /* Size in bytes, including overhead. */

struct malloc_chunk* fd; /* double links -- used only if free. */
struct malloc_chunk* bk;

/* Only used for large blocks: pointer to next larger size. */
struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
struct malloc_chunk* bk_nextsize;
};
/* The chunk header is two SIZE_SZ elements, but this is used widely, so
we define it here for clarity later. */
#define CHUNK_HDR_SZ (2 * SIZE_SZ)

/* Convert a chunk address to a user mem pointer without correcting
the tag. */
#define chunk2mem(p) ((void*)((char*)(p) + CHUNK_HDR_SZ))

/* Convert a chunk address to a user mem pointer and extract the right tag. */
#define chunk2mem_tag(p) ((void*)tag_at ((char*)(p) + CHUNK_HDR_SZ))

/* Convert a user mem pointer to a chunk address and extract the right tag. */
#define mem2chunk(mem) ((mchunkptr)tag_at (((char*)(mem) - CHUNK_HDR_SZ)))
  • allocated chunk
 chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk, if unallocated (P clear) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of chunk, in bytes |A|M|P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| User data starts here... .
. .
. (malloc_usable_size() bytes) .
. |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| (size of chunk, but used for application data) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of next chunk, in bytes |A|0|1|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  • free chunk
 chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of previous chunk, if unallocated (P clear) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of chunk, in bytes |A|M|P|
mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| User data starts here... .
. .
. (malloc_usable_size() bytes) .
. |
nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| (size of chunk, but used for application data) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| Size of next chunk, in bytes |A|0|1|
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

arena结构

static struct malloc_state main_arena =
{
.mutex = _LIBC_LOCK_INITIALIZER,
.next = &main_arena,
.attached_threads = 1
};

struct malloc_state
{
/* Serialize access. */
__libc_lock_define (, mutex);

/* Flags (formerly in max_fast). */
int flags;

/* Set if the fastbin chunks contain recently inserted free blocks. */
/* Note this is a bool but not all targets support atomics on booleans. */
int have_fastchunks;

/* Fastbins */
mfastbinptr fastbinsY[NFASTBINS];

/* Base of the topmost chunk -- not otherwise kept in a bin */
mchunkptr top;

/* The remainder from the most recent split of a small request */
mchunkptr last_remainder;

/* Normal bins packed as described above */
mchunkptr bins[NBINS * 2 - 2];

/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];

/* Linked list */
struct malloc_state *next;

/* Linked list for free arenas. Access to this field is serialized
by free_list_lock in arena.c. */
struct malloc_state *next_free;

/* Number of threads attached to this arena. 0 if the arena is on
the free list. Access to this field is serialized by
free_list_lock in arena.c. */
INTERNAL_SIZE_T attached_threads;

/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
};

malloc中的系统操作

sysmalloc_mmap

static void *
sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
{
long int size;

// 对齐
if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ)
size = ALIGN_UP (nb + SIZE_SZ, pagesize);
else
size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);

char *mm = (char *) MMAP (0, size,
mtag_mmap_flags | PROT_READ | PROT_WRITE,
extra_flags);
if (mm == MAP_FAILED)
return mm;


// ...
mchunkptr p; /* the allocated/returned chunk */
// 如果对齐需要切掉一块内存,设置为该chunk的prev_size
if (front_misalign > 0)
{
ptrdiff_t correction = MALLOC_ALIGNMENT - front_misalign;
p = (mchunkptr) (mm + correction);
set_prev_size (p, correction);
set_head (p, (size - correction) | IS_MMAPPED);
}
else
{
p = (mchunkptr) mm;
set_prev_size (p, 0);
set_head (p, size | IS_MMAPPED);
}

/* update statistics */
int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
atomic_max (&mp_.max_n_mmaps, new);

unsigned long sum;
sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
atomic_max (&mp_.max_mmapped_mem, sum);

check_chunk (av, p);

return chunk2mem (p);
}

sysmalloc

  • 先使用sysmalloc_mmap扩展
  • 扩展当前堆,如果不是main_arena,如果当前堆不够,直接新分配一个堆;如果是main_arena,先brk申请内存,失败使用mmap再申请
  • 对sbrk申请的内存与之前的内存进行可用的合并
    • 处理两次sbrk不连续:如果这不是第一次进行堆扩展,则需要在old_top处插入两个”double fencepost”。这些”double fencepost”是人工创建的块头,用于防止与未分配的空间合并。它们被标记为已分配,但实际上是太小而无法使用的块。为了使尺寸和对齐方式正确,需要插入两个”fencepost”。然后,根据需要释放剩余的空间(_int_free)。
  • 最后,进行内存的分配
static void *
sysmalloc (INTERNAL_SIZE_T nb, mstate av)
{
mchunkptr old_top; /* incoming value of av->top */
INTERNAL_SIZE_T old_size; /* its size */
char *old_end; /* its end address */

long size; /* arg to first MORECORE or mmap call */
char *brk; /* return value from MORECORE */

long correction; /* arg to 2nd MORECORE call */
char *snd_brk; /* 2nd return val */

INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
INTERNAL_SIZE_T end_misalign; /* partial page left at end of new space */
char *aligned_brk; /* aligned offset into brk */

mchunkptr p; /* the allocated/returned chunk */
mchunkptr remainder; /* remainder from allocation */
unsigned long remainder_size; /* its size */


size_t pagesize = GLRO (dl_pagesize);
bool tried_mmap = false;


/*
If have mmap, and the request size meets the mmap threshold, and
the system supports mmap, and there are few enough currently
allocated mmapped regions, try to directly map this request
rather than expanding top.
*/
// use mmap

if (av == NULL
|| ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
&& (mp_.n_mmaps < mp_.n_mmaps_max)))
{
char *mm;
#if HAVE_TUNABLES
if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize)
{
/* There is no need to isse the THP madvise call if Huge Pages are
used directly. */
mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av);
if (mm != MAP_FAILED)
return mm;
}
#endif
mm = sysmalloc_mmap (nb, pagesize, 0, av);
if (mm != MAP_FAILED)
return mm;
tried_mmap = true;
}

/* There are no usable arenas and mmap also failed. */
if (av == NULL)
return 0;

/* Record incoming configuration of top */

old_top = av->top;
old_size = chunksize (old_top);
old_end = (char *) (chunk_at_offset (old_top, old_size));

brk = snd_brk = (char *) (MORECORE_FAILURE); // -1

// check...

if (av != &main_arena)
{
heap_info *old_heap, *heap;
size_t old_heap_size;

/* First try to extend the current heap. */
/* 根据 chunk 指针找到所属的 heap_info */
old_heap = heap_for_ptr (old_top);
old_heap_size = old_heap->size;
if ((long) (MINSIZE + nb - old_size) > 0
&& grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
{
av->system_mem += old_heap->size - old_heap_size;
set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
| PREV_INUSE);
}
else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
{
/* Use a newly allocated heap. */
heap->ar_ptr = av;
heap->prev = old_heap;
av->system_mem += heap->size;
/* Set up the new top. */
top (av) = chunk_at_offset (heap, sizeof (*heap));
set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);

/* Setup fencepost and free the old top chunk with a multiple of
MALLOC_ALIGNMENT in size. */
/* The fencepost takes at least MINSIZE bytes, because it might
become the top chunk again later. Note that a footer is set
up, too, although the chunk is marked in use. */
old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
set_head (chunk_at_offset (old_top, old_size + CHUNK_HDR_SZ),
0 | PREV_INUSE);
if (old_size >= MINSIZE)
{
set_head (chunk_at_offset (old_top, old_size),
CHUNK_HDR_SZ | PREV_INUSE);
set_foot (chunk_at_offset (old_top, old_size), CHUNK_HDR_SZ);
set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
_int_free (av, old_top, 1);
}
else
{
set_head (old_top, (old_size + CHUNK_HDR_SZ) | PREV_INUSE);
set_foot (old_top, (old_size + CHUNK_HDR_SZ));
}
}
else if (!tried_mmap)
{
/* We can at least try to use to mmap memory. If new_heap fails
it is unlikely that trying to allocate huge pages will
succeed. */
char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
if (mm != MAP_FAILED)
return mm;
}
}
else /* av == main_arena */


{ /* Request enough space for nb + pad + overhead */
size = nb + mp_.top_pad + MINSIZE;

/*
If contiguous, we can subtract out existing space that we hope to
combine with new space. We add it back later only if
we don't actually get contiguous space.
*/

if (contiguous (av))
size -= old_size;

/*
Round to a multiple of page size or huge page size.
If MORECORE is not contiguous, this ensures that we only call it
with whole-page arguments. And if MORECORE is contiguous and
this is not first time through, this preserves page-alignment of
previous calls. Otherwise, we correct to page-align below.
*/

#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
/* Defined in brk.c. */
extern void *__curbrk;
if (__glibc_unlikely (mp_.thp_pagesize != 0))
{
uintptr_t top = ALIGN_UP ((uintptr_t) __curbrk + size,
mp_.thp_pagesize);
size = top - (uintptr_t) __curbrk;
}
else
#endif
size = ALIGN_UP (size, GLRO(dl_pagesize));

/*
Don't try to call MORECORE if argument is so big as to appear
negative. Note that since mmap takes size_t arg, it may succeed
below even if we cannot call MORECORE.
*/

if (size > 0)
{
brk = (char *) (MORECORE (size));
if (brk != (char *) (MORECORE_FAILURE))
madvise_thp (brk, size);
LIBC_PROBE (memory_sbrk_more, 2, brk, size);
}

if (brk == (char *) (MORECORE_FAILURE))
{
/*
If have mmap, try using it as a backup when MORECORE fails or
cannot be used. This is worth doing on systems that have "holes" in
address space, so sbrk cannot extend to give contiguous space, but
space is available elsewhere. Note that we ignore mmap max count
and threshold limits, since the space will not be used as a
segregated mmap region.
*/

char *mbrk = MAP_FAILED;
#if HAVE_TUNABLES
if (mp_.hp_pagesize > 0)
mbrk = sysmalloc_mmap_fallback (&size, nb, old_size,
mp_.hp_pagesize, mp_.hp_pagesize,
mp_.hp_flags, av);
#endif
if (mbrk == MAP_FAILED)
mbrk = sysmalloc_mmap_fallback (&size, nb, old_size, pagesize,
MMAP_AS_MORECORE_SIZE, 0, av);
if (mbrk != MAP_FAILED)
{
/* We do not need, and cannot use, another sbrk call to find end */
brk = mbrk;
snd_brk = brk + size;
}
}

if (brk != (char *) (MORECORE_FAILURE))
{
if (mp_.sbrk_base == 0)
mp_.sbrk_base = brk;
av->system_mem += size;

/*
If MORECORE extends previous space, we can likewise extend top size.
*/

if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
set_head (old_top, (size + old_size) | PREV_INUSE);

else if (contiguous (av) && old_size && brk < old_end)
/* Oops! Someone else killed our space.. Can't touch anything. */
malloc_printerr ("break adjusted to free malloc space");

/*
Otherwise, make adjustments:

* If the first time through or noncontiguous, we need to call sbrk
just to find out where the end of memory lies.

* We need to ensure that all returned chunks from malloc will meet
MALLOC_ALIGNMENT

* If there was an intervening foreign sbrk, we need to adjust sbrk
request size to account for fact that we will not be able to
combine new space with existing space in old_top.

* Almost all systems internally allocate whole pages at a time, in
which case we might as well use the whole last page of request.
So we allocate enough more memory to hit a page boundary now,
which in turn causes future contiguous calls to page-align.
*/

else
{
front_misalign = 0;
end_misalign = 0;
correction = 0;
aligned_brk = brk;

/* handle contiguous cases */
if (contiguous (av))
{
/* Count foreign sbrk as system_mem. */
if (old_size)
av->system_mem += brk - old_end;

/* Guarantee alignment of first new chunk made from this space */

front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
if (front_misalign > 0)
{
/*
Skip over some bytes to arrive at an aligned position.
We don't need to specially mark these wasted front bytes.
They will never be accessed anyway because
prev_inuse of av->top (and any chunk created from its start)
is always true after initialization.
*/

correction = MALLOC_ALIGNMENT - front_misalign;
aligned_brk += correction;
}

/*
If this isn't adjacent to existing space, then we will not
be able to merge with old_top space, so must add to 2nd request.
*/

correction += old_size;

/* Extend the end address to hit a page boundary */
end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;

assert (correction >= 0);
snd_brk = (char *) (MORECORE (correction));

/*
If can't allocate correction, try to at least find out current
brk. It might be enough to proceed without failing.

Note that if second sbrk did NOT fail, we assume that space
is contiguous with first sbrk. This is a safe assumption unless
program is multithreaded but doesn't use locks and a foreign sbrk
occurred between our first and second calls.
*/

if (snd_brk == (char *) (MORECORE_FAILURE))
{
correction = 0;
snd_brk = (char *) (MORECORE (0));
}
else
madvise_thp (snd_brk, correction);
}

/* handle non-contiguous cases */
else
{
if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ)
/* MORECORE/mmap must correctly align */
assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
else
{
front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
if (front_misalign > 0)
{
/*
Skip over some bytes to arrive at an aligned position.
We don't need to specially mark these wasted front bytes.
They will never be accessed anyway because
prev_inuse of av->top (and any chunk created from its start)
is always true after initialization.
*/

aligned_brk += MALLOC_ALIGNMENT - front_misalign;
}
}

/* Find out current end of memory */
if (snd_brk == (char *) (MORECORE_FAILURE))
{
snd_brk = (char *) (MORECORE (0));
}
}

/* Adjust top based on results of second sbrk */
if (snd_brk != (char *) (MORECORE_FAILURE))
{
av->top = (mchunkptr) aligned_brk;
set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
av->system_mem += correction;

/*
If not the first time through, we either have a
gap due to foreign sbrk or a non-contiguous region. Insert a
double fencepost at old_top to prevent consolidation with space
we don't own. These fenceposts are artificial chunks that are
marked as inuse and are in any case too small to use. We need
two to make sizes and alignments work out.
*/

if (old_size != 0)
{
/*
Shrink old_top to insert fenceposts, keeping size a
multiple of MALLOC_ALIGNMENT. We know there is at least
enough space in old_top to do this.
*/
old_size = (old_size - 2 * CHUNK_HDR_SZ) & ~MALLOC_ALIGN_MASK;
set_head (old_top, old_size | PREV_INUSE);

/*
Note that the following assignments completely overwrite
old_top when old_size was previously MINSIZE. This is
intentional. We need the fencepost, even if old_top otherwise gets
lost.
*/
set_head (chunk_at_offset (old_top, old_size),
CHUNK_HDR_SZ | PREV_INUSE);
set_head (chunk_at_offset (old_top,
old_size + CHUNK_HDR_SZ),
CHUNK_HDR_SZ | PREV_INUSE);

/* If possible, release the rest. */
if (old_size >= MINSIZE)
{
_int_free (av, old_top, 1);
}
}
}
}
}
} /* if (av != &main_arena) */

if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
av->max_system_mem = av->system_mem;
check_malloc_state (av);

/* finally, do the allocation */
p = av->top;
size = chunksize (p);

/* check that one of the above allocation paths succeeded */
if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
{
remainder_size = size - nb;
remainder = chunk_at_offset (p, nb);
av->top = remainder;
set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
set_head (remainder, remainder_size | PREV_INUSE);
check_malloced_chunk (av, p, nb);
return chunk2mem (p);
}

/* catch all failure paths */
__set_errno (ENOMEM);
return 0;
}

systrim

  • 通过释放顶部块之后的额外内存来减少内存的使用量

  • 调用MORECORE函数释放额外的内存。忽略其返回值,再次调用MORECORE函数以获取新的堆内存结束位置。

static int
systrim (size_t pad, mstate av)
{
long top_size; /* Amount of top-most memory */
long extra; /* Amount to release */
long released; /* Amount actually released */
char *current_brk; /* address returned by pre-check sbrk call */
char *new_brk; /* address returned by post-check sbrk call */
long top_area;

top_size = chunksize (av->top);

top_area = top_size - MINSIZE - 1;
if (top_area <= pad)
return 0;

/* Release in pagesize units and round down to the nearest page. */
#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
if (__glibc_unlikely (mp_.thp_pagesize != 0))
extra = ALIGN_DOWN (top_area - pad, mp_.thp_pagesize);
else
#endif
extra = ALIGN_DOWN (top_area - pad, GLRO(dl_pagesize));

if (extra == 0)
return 0;

/*
Only proceed if end of memory is where we last set it.
This avoids problems if there were foreign sbrk calls.
*/
current_brk = (char *) (MORECORE (0));
if (current_brk == (char *) (av->top) + top_size)
{
/*
Attempt to release memory. We ignore MORECORE return value,
and instead call again to find out where new end of memory is.
This avoids problems if first call releases less than we asked,
of if failure somehow altered brk value. (We could still
encounter problems if it altered brk in some very bad way,
but the only thing we can do is adjust anyway, which will cause
some downstream failure.)
*/

MORECORE (-extra);
new_brk = (char *) (MORECORE (0));

LIBC_PROBE (memory_sbrk_less, 2, new_brk, extra);

if (new_brk != (char *) MORECORE_FAILURE)
{
released = (long) (current_brk - new_brk);

if (released != 0)
{
/* Success. Adjust top. */
av->system_mem -= released;
set_head (av->top, (top_size - released) | PREV_INUSE);
check_malloc_state (av);
return 1;
}
}
}
return 0;
}

arena操作

_int_new_arena

  • 创建一个新的堆。并初始化内存管理信息
static mstate
_int_new_arena (size_t size)
{
mstate a;
heap_info *h;
char *ptr;
unsigned long misalign;

h = new_heap (size + (sizeof (*h) + sizeof (*a) + MALLOC_ALIGNMENT),
mp_.top_pad);
if (!h)
{
/* Maybe size is too large to fit in a single heap. So, just try
to create a minimally-sized arena and let _int_malloc() attempt
to deal with the large request via mmap_chunk(). */
h = new_heap (sizeof (*h) + sizeof (*a) + MALLOC_ALIGNMENT, mp_.top_pad);
if (!h)
return 0;
}
a = h->ar_ptr = (mstate) (h + 1);
malloc_init_state (a);
a->attached_threads = 1;
/*a->next = NULL;*/
a->system_mem = a->max_system_mem = h->size;

/* Set up the top chunk, with proper alignment. */
ptr = (char *) (a + 1);
misalign = (unsigned long) chunk2mem (ptr) & MALLOC_ALIGN_MASK;
if (misalign > 0)
ptr += MALLOC_ALIGNMENT - misalign;
top (a) = (mchunkptr) ptr;
set_head (top (a), (((char *) h + h->size) - ptr) | PREV_INUSE);

LIBC_PROBE (memory_arena_new, 2, a, size);
mstate replaced_arena = thread_arena;
thread_arena = a;
__libc_lock_init (a->mutex);

__libc_lock_lock (list_lock);

/* Add the new arena to the global list. */
a->next = main_arena.next;
/* FIXME: The barrier is an attempt to synchronize with read access
in reused_arena, which does not acquire list_lock while
traversing the list. */
atomic_write_barrier ();
main_arena.next = a;

__libc_lock_unlock (list_lock);

__libc_lock_lock (free_list_lock);
detach_arena (replaced_arena);
__libc_lock_unlock (free_list_lock);

/* Lock this arena. NB: Another thread may have been attached to
this arena because the arena is now accessible from the
main_arena.next list and could have been picked by reused_arena.
This can only happen for the last arena created (before the arena
limit is reached). At this point, some arena has to be attached
to two threads. We could acquire the arena lock before list_lock
to make it less likely that reused_arena picks this new arena,
but this could result in a deadlock with
__malloc_fork_lock_parent. */

__libc_lock_lock (a->mutex);

return a;
}

获取arena

  • free-list是一个arena全局链表
static mstate
arena_get2 (size_t size, mstate avoid_arena)
{
mstate a;

static size_t narenas_limit;

a = get_free_list ();
if (a == NULL)
{
/* Nothing immediately available, so generate a new arena. */
if (narenas_limit == 0)
{
if (mp_.arena_max != 0)
narenas_limit = mp_.arena_max;
else if (narenas > mp_.arena_test)
{
int n = __get_nprocs_sched ();

if (n >= 1)
narenas_limit = NARENAS_FROM_NCORES (n);
else
/* We have no information about the system. Assume two
cores. */
narenas_limit = NARENAS_FROM_NCORES (2);
}
}
repeat:;
size_t n = narenas;
/* NB: the following depends on the fact that (size_t)0 - 1 is a
very large number and that the underflow is OK. If arena_max
is set the value of arena_test is irrelevant. If arena_test
is set but narenas is not yet larger or equal to arena_test
narenas_limit is 0. There is no possibility for narenas to
be too big for the test to always fail since there is not
enough address space to create that many arenas. */
if (__glibc_unlikely (n <= narenas_limit - 1))
{
if (catomic_compare_and_exchange_bool_acq (&narenas, n + 1, n))
goto repeat;
a = _int_new_arena (size);
if (__glibc_unlikely (a == NULL))
catomic_decrement (&narenas);
}
else
a = reused_arena (avoid_arena);
}
return a;
}

/* If we don't have the main arena, then maybe the failure is due to running
out of mmapped areas, so we can try allocating on the main arena.
Otherwise, it is likely that sbrk() has failed and there is still a chance
to mmap(), so try one of the other arenas. */
static mstate
arena_get_retry (mstate ar_ptr, size_t bytes)
{
LIBC_PROBE (memory_arena_retry, 2, bytes, ar_ptr);
if (ar_ptr != &main_arena)
{
__libc_lock_unlock (ar_ptr->mutex);
ar_ptr = &main_arena;
__libc_lock_lock (ar_ptr->mutex);
}
else
{
__libc_lock_unlock (ar_ptr->mutex);
ar_ptr = arena_get2 (bytes, ar_ptr);
}

return ar_ptr;
}

heap_trim

  1. 计算需要释放的额外内存大小(extra),以页大小为单位,并向下对齐到最近的页边界。
  2. 如果额外内存大小为0,则无需进行修剪,返回状态值0表示修剪操作失败。
  3. 检查当前堆内存的结束位置是否与上一次设置的结束位置相同。这是为了避免在修剪之前存在外部的sbrk调用导致堆内存位置发生变化。
  • 如果可以重新映射堆空间,则使用MMAP函数将额外的堆空间映射为不可访问的内存页。
  • 如果无法重新映射堆空间,则使用__madvise函数将额外的堆空间标记为不需要的。
/* Shrink a heap.  */
// 将多余内存映射为不可访问的内存页
static int
shrink_heap (heap_info *h, long diff)
{
long new_size;

new_size = (long) h->size - diff;
if (new_size < (long) sizeof (*h))
return -1;

/* Try to re-map the extra heap space freshly to save memory, and make it
inaccessible. See malloc-sysdep.h to know when this is true. */
if (__glibc_unlikely (check_may_shrink_heap ()))
{
if ((char *) MMAP ((char *) h + new_size, diff, PROT_NONE,
MAP_FIXED) == (char *) MAP_FAILED)
return -2;

h->mprotect_size = new_size;
}
else
__madvise ((char *) h + new_size, diff, MADV_DONTNEED);
/*fprintf(stderr, "shrink %p %08lx\n", h, new_size);*/

h->size = new_size;
LIBC_PROBE (memory_heap_less, 2, h, h->size);
return 0;
}

/* Delete a heap. */

static int
heap_trim (heap_info *heap, size_t pad)
{
mstate ar_ptr = heap->ar_ptr;
mchunkptr top_chunk = top (ar_ptr), p;
heap_info *prev_heap;
long new_size, top_size, top_area, extra, prev_size, misalign;
size_t max_size = heap_max_size ();

/* Can this heap go away completely? */
while (top_chunk == chunk_at_offset (heap, sizeof (*heap)))
{
prev_heap = heap->prev;
prev_size = prev_heap->size - (MINSIZE - 2 * SIZE_SZ);
p = chunk_at_offset (prev_heap, prev_size);
/* fencepost must be properly aligned. */
misalign = ((long) p) & MALLOC_ALIGN_MASK;
p = chunk_at_offset (prev_heap, prev_size - misalign);
assert (chunksize_nomask (p) == (0 | PREV_INUSE)); /* must be fencepost */
p = prev_chunk (p);
new_size = chunksize (p) + (MINSIZE - 2 * SIZE_SZ) + misalign;
assert (new_size > 0 && new_size < (long) (2 * MINSIZE));
if (!prev_inuse (p))
new_size += prev_size (p);
assert (new_size > 0 && new_size < max_size);
if (new_size + (max_size - prev_heap->size) < pad + MINSIZE
+ heap->pagesize)
break;
ar_ptr->system_mem -= heap->size;
LIBC_PROBE (memory_heap_free, 2, heap, heap->size);
if ((char *) heap + max_size == aligned_heap_area)
aligned_heap_area = NULL;
__munmap (heap, max_size);
heap = prev_heap;
if (!prev_inuse (p)) /* consolidate backward */
{
p = prev_chunk (p);
unlink_chunk (ar_ptr, p);
}
assert (((unsigned long) ((char *) p + new_size) & (heap->pagesize - 1))
== 0);
assert (((char *) p + new_size) == ((char *) heap + heap->size));
top (ar_ptr) = top_chunk = p;
set_head (top_chunk, new_size | PREV_INUSE);
/*check_chunk(ar_ptr, top_chunk);*/
}

/* Uses similar logic for per-thread arenas as the main arena with systrim
and _int_free by preserving the top pad and rounding down to the nearest
page. */
top_size = chunksize (top_chunk);
if ((unsigned long)(top_size) <
(unsigned long)(mp_.trim_threshold))
return 0;

top_area = top_size - MINSIZE - 1;
if (top_area < 0 || (size_t) top_area <= pad)
return 0;

/* Release in pagesize units and round down to the nearest page. */
extra = ALIGN_DOWN(top_area - pad, heap->pagesize);
if (extra == 0)
return 0;

/* Try to shrink. */
if (shrink_heap (heap, extra) != 0)
return 0;

ar_ptr->system_mem -= extra;

/* Success. Adjust top accordingly. */
set_head (top_chunk, (top_size - extra) | PREV_INUSE);
/*check_chunk(ar_ptr, top_chunk);*/
return 1;
}

内部执行函数

_libc_malloc

  • 调用ptmalloc_init()函数进行内存分配器的初始化。
  • 单线程调用_int_malloc函数在主线程堆(main_arena)中分配内存块,并对返回的指针进行标记(tag_new_usable);如果是多线程环境,调用arena_get函数获取一个可用的内存堆(arena)
  • 调用arena_get_retry函数再次获取可用的内存堆,并调用_int_malloc函数进行分配。
void *
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;

_Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2,
"PTRDIFF_MAX is not more than half of SIZE_MAX");

if (!__malloc_initialized)
ptmalloc_init ();
#if USE_TCACHE
/* int_free also calls request2size, be careful to not pad twice. */
size_t tbytes;
if (!checked_request2size (bytes, &tbytes))
{
__set_errno (ENOMEM);
return NULL;
}
size_t tc_idx = csize2tidx (tbytes);

MAYBE_INIT_TCACHE ();

DIAG_PUSH_NEEDS_COMMENT;
if (tc_idx < mp_.tcache_bins
&& tcache
&& tcache->counts[tc_idx] > 0)
{
victim = tcache_get (tc_idx);
return tag_new_usable (victim);
}
DIAG_POP_NEEDS_COMMENT;
#endif

if (SINGLE_THREAD_P)
{
victim = tag_new_usable (_int_malloc (&main_arena, bytes));
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
&main_arena == arena_for_chunk (mem2chunk (victim)));
return victim;
}

arena_get (ar_ptr, bytes);

victim = _int_malloc (ar_ptr, bytes);
/* Retry with another arena only if we were able to find a usable arena
before. */
if (!victim && ar_ptr != NULL)
{
LIBC_PROBE (memory_malloc_retry, 1, bytes);
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}

if (ar_ptr != NULL)
__libc_lock_unlock (ar_ptr->mutex);

victim = tag_new_usable (victim);

assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
ar_ptr == arena_for_chunk (mem2chunk (victim)));
return victim;
}
libc_hidden_def (__libc_malloc)

_libc_free

  • 将内存指针mem转换为对应的内存块指针p
  • 如果内存块是通过内存映射(mmapped)方式分配的,则调用munmap_chunk函数释放该内存块。
    • 如果需要,会调整动态的堆扩展和内存映射的阈值。
  • 如果内存块不是通过内存映射方式分配的,则进行以下操作:
    • 初始化线程缓存(Thread Cache,TCACHE)。
    • 将内存块标记为属于库(tag_region)。
    • 获取内存块所属的内存堆(arena)。
    • 调用_int_free函数释放内存块。

void
__libc_free (void *mem)
{
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */

if (mem == 0) /* free(0) has no effect */
return;

/* Quickly check that the freed pointer matches the tag for the memory.
This gives a useful double-free detection. */
if (__glibc_unlikely (mtag_enabled))
*(volatile char *)mem;

int err = errno;

p = mem2chunk (mem);

if (chunk_is_mmapped (p)) /* release mmapped memory. */
{
/* See if the dynamic brk/mmap threshold needs adjusting.
Dumped fake mmapped chunks do not affect the threshold. */
if (!mp_.no_dyn_threshold
&& chunksize_nomask (p) > mp_.mmap_threshold
&& chunksize_nomask (p) <= DEFAULT_MMAP_THRESHOLD_MAX)
{
mp_.mmap_threshold = chunksize (p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk (p);
}
else
{
MAYBE_INIT_TCACHE ();

/* Mark the chunk as belonging to the library again. */
(void)tag_region (chunk2mem (p), memsize (p));

ar_ptr = arena_for_chunk (p);
_int_free (ar_ptr, p, 0);
}

__set_errno (err);
}
libc_hidden_def (__libc_free)

_int_malloc

  • 如果此时没有arena,直接调用sysmalloc

  • 如果chunk的大小 < max_fast,在fast bins上查找适合的chunk;如果不存在,转到5

    4、如果chunk大小 < 512B,从small bins上去查找chunk,如果存在,分配结束

    5、需要分配的是一块大的内存,或者small bins中找不到chunk:

    • 遍历fast bins,合并相邻的chunk,并链接到unsorted bin中
    • 遍历unsorted bin中的chunk:
    • 能够切割chunk直接分配,分配结束
    • 根据chunk的空间大小将其放入small bins或是large bins中,遍历完成后,转到6

    6、需要分配的是一块大的内存,或者small bins和unsorted bin中都找不到合适的 chunk,且fast bins和unsorted bin中所有的chunk已清除:

    • 从large bins中查找,反向遍历链表,直到找到第一个大小大于待分配的chunk进行切割,余下放入unsorted bin,分配结束

    7、检索fast bins和bins没有找到合适的chunk,判断top chunk大小是否满足所需chunk的大小,从top chunk中分配

    8、top chunk不能满足需求,需要扩大top chunk:

    • 主分区上,如果分配的内存 < 分配阈值(默认128KB),使用brk()分配;如果分配的内存 > 分配阈值,使用mmap分配
    • 非主分区上,使用mmap来分配一块内存

_int_free

2、如果free的是空指针,返回

3、如果当前chunk是mmap映射区域映射的内存,调用munmap()释放内存

4、如果chunk与top chunk相邻,直接与top chunk合并,转到8

5、如果chunk的大小 > max_fast,放入unsorted bin,并且检查是否有合并:

  • a.没有合并情况则free
  • b.有合并情况并且和top chunk相邻,转到8

6、如果chunk的大小 < max_fast,放入fast bin,并且检查是否有合并:

  • a.fast bin并没有改变chunk的状态,没有合并情况则free
  • b.有合并情况,转到7

7、在fast bin,如果相邻chunk空闲,则将这两个chunk合并,放入unsorted bin。如果合并后的大小 > 64KB,会触发进行fast bins的合并操作,fast bins中的chunk将被遍历合并,合并后的chunk会被放到unsorted bin中。合并后的chunk和top chunk相邻,则会合并到top chunk中,转到8

8、如果top chunk的大小 > mmap收缩阈值(默认为128KB),对于主分配区,会试图归还top chunk中的一部分给操作系统

if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
if (atomic_load_relaxed (&av->have_fastchunks))
malloc_consolidate(av);

if (av == &main_arena) {
#ifndef MORECORE_CANNOT_TRIM
if ((unsigned long)(chunksize(av->top)) >=
(unsigned long)(mp_.trim_threshold))
systrim(mp_.top_pad, av);
#endif
} else {
/* Always try heap_trim(), even if the top chunk is not
large, because the corresponding heap might go away. */
heap_info *heap = heap_for_ptr(top(av));

assert(heap->ar_ptr == av);
heap_trim(heap, mp_.top_pad);
}
} else {
munmap_chunk (p);
}