1 // Written in the D programming language.
2 /**
3 `AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations
4 and preserving a low degree of fragmentation by means of aligned allocations.
5 
6 Source: $(PHOBOSSRC std/experimental/allocator/building_blocks/aligned_block_list.d)
7 */
8 module std.experimental.allocator.building_blocks.aligned_block_list;
9 
10 import std.experimental.allocator.common;
11 import std.experimental.allocator.building_blocks.null_allocator;
12 
13 // Common function implementation for thread local and shared AlignedBlockList
14 private mixin template AlignedBlockListImpl(bool isShared)
15 {
16     import std.traits : hasMember;
17     import std.typecons : Ternary;
18 
19     static if (isShared)
20     import core.internal.spinlock : SpinLock;
21 
22 private:
23     // Doubly linked list of 'AlignedBlockNode'
24     // Each node contains an `Allocator` followed by its payload
25     static struct AlignedBlockNode
26     {
27         AlignedBlockNode* next, prev;
28         Allocator bAlloc;
29 
30         static if (isShared)
31         {
32             shared(size_t) bytesUsed;
33             // Since the lock is not taken when allocating, this acts like a refcount
34             // keeping the node alive
35             uint keepAlive;
36         }
37         else
38         {
39             size_t bytesUsed;
40         }
41     }
42 
43     // Root of the internal doubly linked list
44     AlignedBlockNode* root;
45 
46     // Number of active nodes
47     uint numNodes;
48 
49     // If the numNodes exceeds this limit, we will start deallocating nodes
50     enum uint maxNodes = 64;
51 
52     // This lock is always taken when changing the list
53     // To improve performance, the lock is not taken when the allocation logic is called
54     static if (isShared)
55     SpinLock lock = SpinLock(SpinLock.Contention.brief);
56 
57     // Moves a node to the front of the list, allowing for quick allocations
58     void moveToFront(AlignedBlockNode* tmp)
59     {
60         auto localRoot = cast(AlignedBlockNode*) root;
61         if (tmp == localRoot)
62             return;
63 
64         if (tmp.prev) tmp.prev.next = tmp.next;
65         if (tmp.next) tmp.next.prev = tmp.prev;
66         if (localRoot) localRoot.prev = tmp;
67         tmp.next = localRoot;
68         tmp.prev = null;
69 
70         root = cast(typeof(root)) tmp;
71     }
72 
73     // Removes a node from the list, including its payload
74     // The payload is deallocated by calling 'parent.deallocate'
75     void removeNode(AlignedBlockNode* tmp)
76     {
77         auto next = tmp.next;
78         if (tmp.prev) tmp.prev.next = tmp.next;
79         if (tmp.next) tmp.next.prev = tmp.prev;
80         parent.deallocate((cast(void*) tmp)[0 .. theAlignment]);
81 
82         if (tmp == cast(AlignedBlockNode*) root)
83             root = cast(typeof(root)) next;
84 
85         static if (isShared)
86         {
87             import core.atomic : atomicOp;
88             atomicOp!"-="(numNodes, 1);
89         }
90         else
91         {
92             numNodes--;
93         }
94     }
95 
96     // If the nodes do not have available space, a new node is created
97     // by drawing memory from the parent allocator with aligned allocations.
98     // The new node is inserted at the front of the list
99     bool insertNewNode()
100     {
101         void[] buf = parent.alignedAllocate(theAlignment, theAlignment);
102         if (buf is null)
103             return false;
104 
105         auto localRoot = cast(AlignedBlockNode*) root;
106         auto newNode = cast(AlignedBlockNode*) buf;
107 
108         // The first part of the allocation represent the node contents
109         // followed by the actual payload
110         ubyte[] payload = cast(ubyte[]) buf[AlignedBlockNode.sizeof .. $];
111         newNode.bAlloc = Allocator(payload);
112 
113         newNode.next = localRoot;
114         newNode.prev = null;
115         if (localRoot)
116             localRoot.prev = newNode;
117         root = cast(typeof(root)) newNode;
118 
119         static if (isShared)
120         {
121             import core.atomic : atomicOp;
122             atomicOp!"+="(numNodes, 1);
123         }
124         else
125         {
126             numNodes++;
127         }
128 
129         return true;
130     }
131 
132 public:
133     static if (stateSize!ParentAllocator) ParentAllocator parent;
134     else alias parent = ParentAllocator.instance;
135 
136     enum ulong alignment = Allocator.alignment;
137 
138     // Since all memory is drawn from ParentAllocator, we can
139     // forward this to the parent
140     static if (hasMember!(ParentAllocator, "owns"))
141     Ternary owns(void[] b)
142     {
143         return parent.owns(b);
144     }
145 
146     // Use `theAlignment` to find the node which allocated this block
147     bool deallocate(void[] b)
148     {
149         if (b is null)
150             return true;
151 
152         // Round buffer to nearest `theAlignment` multiple to quickly find
153         // the `parent` `AlignedBlockNode`
154         enum ulong mask = ~(theAlignment - 1);
155         ulong ptr = ((cast(ulong) b.ptr) & mask);
156         AlignedBlockNode *node = cast(AlignedBlockNode*) ptr;
157         if (node.bAlloc.deallocate(b))
158         {
159             static if (isShared)
160             {
161                 import core.atomic : atomicOp;
162                 atomicOp!"-="(node.bytesUsed, b.length);
163             }
164             else
165             {
166                 node.bytesUsed -= b.length;
167             }
168             return true;
169         }
170         return false;
171     }
172 
173     // Allocate works only if memory can be provided via `alignedAllocate` from the parent
174     static if (hasMember!(ParentAllocator, "alignedAllocate"))
175     void[] allocate(size_t n)
176     {
177         static if (isShared)
178         import core.atomic : atomicOp, atomicLoad;
179 
180         if (n == 0 || n > theAlignment)
181             return null;
182 
183         static if (isShared)
184         {
185             lock.lock();
186             scope(exit) lock.unlock();
187         }
188 
189         auto tmp = cast(AlignedBlockNode*) root;
190 
191         // Iterate through list and find first node which has memory available
192         while (tmp)
193         {
194             auto next = tmp.next;
195             static if (isShared)
196             {
197                 // Allocations can happen outside the lock
198                 // Make sure nobody deletes this node while using it
199                 tmp.keepAlive++;
200                 if (next) next.keepAlive++;
201                 lock.unlock();
202             }
203 
204             auto result = tmp.bAlloc.allocate(n);
205             if (result.length == n)
206             {
207                 // Success
208                 static if (isShared)
209                 {
210                     atomicOp!"+="(tmp.bytesUsed, n);
211                     lock.lock();
212                 }
213                 else
214                 {
215                     tmp.bytesUsed += n;
216                 }
217 
218                 // Most likely this node has memory for more allocations
219                 // Move it to the front
220                 moveToFront(tmp);
221 
222                 static if (isShared)
223                 {
224                     tmp.keepAlive--;
225                     if (next) next.keepAlive--;
226                 }
227 
228                 return result;
229             }
230 
231             // This node can now be removed if necessary
232             static if (isShared)
233             {
234                 lock.lock();
235                 tmp.keepAlive--;
236                 if (next) next.keepAlive--;
237             }
238 
239             if (!next)
240                 break;
241 
242             tmp = next;
243             next = tmp.next;
244 
245             // If there are too many nodes, free memory by removing empty nodes
246             static if (isShared)
247             {
248                 if (atomicLoad(numNodes) > maxNodes &&
249                     atomicLoad(tmp.bytesUsed) == 0 &&
250                     tmp.keepAlive == 0)
251                 {
252                     removeNode(tmp);
253                 }
254             }
255             else
256             {
257                 if (numNodes > maxNodes && tmp.bytesUsed == 0)
258                 {
259                     removeNode(tmp);
260                 }
261             }
262 
263             tmp = next;
264         }
265 
266         // Cannot create new AlignedBlockNode. Most likely the ParentAllocator ran out of resources
267         if (!insertNewNode())
268             return null;
269 
270         tmp = cast(typeof(tmp)) root;
271         void[] result = tmp.bAlloc.allocate(n);
272 
273         static if (isShared)
274         {
275             atomicOp!"+="(root.bytesUsed, result.length);
276         }
277         else
278         {
279             root.bytesUsed += result.length;
280         }
281 
282         return result;
283     }
284 
285     // goodAllocSize should not use state
286     size_t goodAllocSize(const size_t n)
287     {
288         Allocator a = null;
289         return a.goodAllocSize(n);
290     }
291 }
292 
293 /**
294 `AlignedBlockList` represents a wrapper around a chain of allocators, allowing for fast deallocations
295 and preserving a low degree of fragmentation.
296 The allocator holds internally a doubly linked list of `Allocator` objects, which will serve allocations
297 in a most-recently-used fashion. Most recent allocators used for `allocate` calls, will be
298 moved to the front of the list.
299 
300 Although allocations are in theory served in linear searching time, `deallocate` calls take
301 $(BIGOH 1) time, by using aligned allocations. `ParentAllocator` must implement `alignedAllocate`
302 and it must be able to allocate `theAlignment` bytes at the same alignment. Each aligned allocation
303 done by `ParentAllocator` will contain metadata for an `Allocator`, followed by its payload.
304 
305 Params:
306     Allocator = the allocator which is used to manage each node; it must have a constructor which receives
307         `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator`
308     ParentAllocator = each node draws memory from the parent allocator; it must support `alignedAllocate`
309     theAlignment = alignment of each block and at the same time length of each node
310 */
311 struct AlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21))
312 {
313     version (StdDdoc)
314     {
315         import std.typecons : Ternary;
316         import std.traits : hasMember;
317 
318         /**
319         Returns a chunk of memory of size `n`
320         It finds the first node in the `AlignedBlockNode` list which has available memory,
321         and moves it to the front of the list.
322 
323         All empty nodes which cannot return new memory, are removed from the list.
324 
325         Params:
326             n = bytes to allocate
327         Returns:
328             A chunk of memory of the required length or `null` on failure or
329         */
330         static if (hasMember!(ParentAllocator, "alignedAllocate"))
331         void[] allocate(size_t n);
332 
333         /**
334         Deallocates the buffer `b` given as parameter. Deallocations take place in constant
335         time, regardless of the number of nodes in the list. `b.ptr` is rounded down
336         to the nearest multiple of the `alignment` to quickly find the corresponding
337         `AlignedBlockNode`.
338 
339         Params:
340             b = buffer candidate for deallocation
341         Returns:
342             `true` on success and `false` on failure
343         */
344         bool deallocate(void[] b);
345 
346         /**
347         Returns `Ternary.yes` if the buffer belongs to the parent allocator and
348         `Ternary.no` otherwise.
349 
350         Params:
351             b = buffer tested if owned by this allocator
352         Returns:
353             `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise
354         */
355         static if (hasMember!(ParentAllocator, "owns"))
356         Ternary owns(void[] b);
357     }
358     else
359     {
360         import std.math.traits : isPowerOf2;
361         static assert(isPowerOf2(alignment));
362         mixin AlignedBlockListImpl!false;
363     }
364 }
365 
366 ///
367 @system unittest
368 {
369     import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator;
370     import std.experimental.allocator.building_blocks.segregator : Segregator;
371     import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock;
372     import std.typecons : Ternary;
373 
374     /*
375     In this example we use 'AlignedBlockList' in conjunction with other allocators
376     in order to create a more complex allocator.
377 
378     The 'SuperAllocator' uses a 'Segregator' to distribute allocations to sub-allocators,
379     based on the requested size.
380 
381     Each sub-allocator is represented by an 'AlignedBlockList' of 'BitmappedBlocks'.
382     Each 'AlignedBlockList' draws memory from a root allocator which in this case is an 'AscendingPageAllocator'
383 
384     Such an allocator not only provides good performance, but also a low degree of memory fragmentation.
385     */
386     alias SuperAllocator = Segregator!(
387         32,
388         AlignedBlockList!(BitmappedBlock!32, AscendingPageAllocator*, 1 << 12),
389         Segregator!(
390 
391         64,
392         AlignedBlockList!(BitmappedBlock!64, AscendingPageAllocator*, 1 << 12),
393         Segregator!(
394 
395         128,
396         AlignedBlockList!(BitmappedBlock!128, AscendingPageAllocator*, 1 << 12),
397         AscendingPageAllocator*
398     )));
399 
400     SuperAllocator a;
401     auto pageAlloc = AscendingPageAllocator(128 * 4096);
402 
403     // Set the parent allocator for all the sub allocators
404     a.allocatorForSize!256 = &pageAlloc;
405     a.allocatorForSize!128.parent = &pageAlloc;
406     a.allocatorForSize!64.parent = &pageAlloc;
407     a.allocatorForSize!32.parent = &pageAlloc;
408 
409     enum testNum = 10;
410     void[][testNum] buf;
411 
412     // Allocations of size 32 will go to the first 'AlignedBlockList'
413     foreach (j; 0 .. testNum)
414     {
415         buf[j] = a.allocate(32);
416         assert(buf[j].length == 32);
417 
418         // This is owned by the first 'AlignedBlockList'
419         assert(a.allocatorForSize!32.owns(buf[j]) == Ternary.yes);
420     }
421 
422     // Free the memory
423     foreach (j; 0 .. testNum)
424         assert(a.deallocate(buf[j]));
425 
426     // Allocations of size 64 will go to the second 'AlignedBlockList'
427     foreach (j; 0 .. testNum)
428     {
429         buf[j] = a.allocate(64);
430         assert(buf[j].length == 64);
431 
432         // This is owned by the second 'AlignedBlockList'
433         assert(a.allocatorForSize!64.owns(buf[j]) == Ternary.yes);
434     }
435 
436     // Free the memory
437     foreach (j; 0 .. testNum)
438         assert(a.deallocate(buf[j]));
439 
440     // Allocations of size 128 will go to the third 'AlignedBlockList'
441     foreach (j; 0 .. testNum)
442     {
443         buf[j] = a.allocate(128);
444         assert(buf[j].length == 128);
445 
446         // This is owned by the third 'AlignedBlockList'
447         assert(a.allocatorForSize!128.owns(buf[j]) == Ternary.yes);
448     }
449 
450     // Free the memory
451     foreach (j; 0 .. testNum)
452         assert(a.deallocate(buf[j]));
453 
454     // Allocations which exceed 128, will go to the 'AscendingPageAllocator*'
455     void[] b = a.allocate(256);
456     assert(b.length == 256);
457     a.deallocate(b);
458 }
459 
460 /**
461 `SharedAlignedBlockList` is the threadsafe version of `AlignedBlockList`.
462 The `Allocator` template parameter must refer a shared allocator.
463 Also, `ParentAllocator` must be a shared allocator, supporting `alignedAllocate`.
464 
465 Params:
466     Allocator = the shared allocator which is used to manage each node; it must have a constructor which receives
467         `ubyte[]` and it must not have any parent allocators, except for the `NullAllocator`
468     ParentAllocator = each node draws memory from the parent allocator; it must be shared and support `alignedAllocate`
469     theAlignment = alignment of each block and at the same time length of each node
470 */
471 shared struct SharedAlignedBlockList(Allocator, ParentAllocator, ulong theAlignment = (1 << 21))
472 {
473     version (StdDdoc)
474     {
475         import std.typecons : Ternary;
476         import std.traits : hasMember;
477 
478         /**
479         Returns a chunk of memory of size `n`
480         It finds the first node in the `AlignedBlockNode` list which has available memory,
481         and moves it to the front of the list.
482 
483         All empty nodes which cannot return new memory, are removed from the list.
484 
485         Params:
486             n = bytes to allocate
487         Returns:
488             A chunk of memory of the required length or `null` on failure or
489         */
490         static if (hasMember!(ParentAllocator, "alignedAllocate"))
491         void[] allocate(size_t n);
492 
493         /**
494         Deallocates the buffer `b` given as parameter. Deallocations take place in constant
495         time, regardless of the number of nodes in the list. `b.ptr` is rounded down
496         to the nearest multiple of the `alignment` to quickly find the corresponding
497         `AlignedBlockNode`.
498 
499         Params:
500             b = buffer candidate for deallocation
501         Returns:
502             `true` on success and `false` on failure
503         */
504         bool deallocate(void[] b);
505 
506         /**
507         Returns `Ternary.yes` if the buffer belongs to the parent allocator and
508         `Ternary.no` otherwise.
509 
510         Params:
511             b = buffer tested if owned by this allocator
512         Returns:
513             `Ternary.yes` if owned by this allocator and `Ternary.no` otherwise
514         */
515         static if (hasMember!(ParentAllocator, "owns"))
516         Ternary owns(void[] b);
517     }
518     else
519     {
520         import std.math.traits : isPowerOf2;
521         static assert(isPowerOf2(alignment));
522         mixin AlignedBlockListImpl!true;
523     }
524 }
525 
526 ///
527 @system unittest
528 {
529     import std.experimental.allocator.building_blocks.region : SharedBorrowedRegion;
530     import std.experimental.allocator.building_blocks.ascending_page_allocator : SharedAscendingPageAllocator;
531     import std.experimental.allocator.building_blocks.null_allocator : NullAllocator;
532     import core.thread : ThreadGroup;
533 
534     enum numThreads = 8;
535     enum size = 2048;
536     enum maxIter = 10;
537 
538     /*
539     In this example we use 'SharedAlignedBlockList' together with
540     'SharedBorrowedRegion', in order to create a fast, thread-safe allocator.
541     */
542     alias SuperAllocator = SharedAlignedBlockList!(
543             SharedBorrowedRegion!(1),
544             SharedAscendingPageAllocator,
545             4096);
546 
547     SuperAllocator a;
548     // The 'SuperAllocator' will draw memory from a 'SharedAscendingPageAllocator'
549     a.parent = SharedAscendingPageAllocator(4096 * 1024);
550 
551     // Launch 'numThreads', each performing allocations
552     void fun()
553     {
554         foreach (i; 0 .. maxIter)
555         {
556             void[] b = a.allocate(size);
557             assert(b.length == size);
558         }
559     }
560 
561     auto tg = new ThreadGroup;
562     foreach (i; 0 .. numThreads)
563     {
564         tg.create(&fun);
565     }
566     tg.joinAll();
567 }
568 
569 version (StdUnittest)
570 {
571     static void testrw(void[] b)
572     {
573         ubyte* buf = cast(ubyte*) b.ptr;
574         size_t len = (b.length).roundUpToMultipleOf(4096);
575         for (int i = 0; i < len; i += 4096)
576         {
577             buf[i] =  (cast(ubyte) i % 256);
578             assert(buf[i] == (cast(ubyte) i % 256));
579         }
580     }
581 }
582 
583 @system unittest
584 {
585     import std.experimental.allocator.building_blocks.region;
586     import std.experimental.allocator.building_blocks.ascending_page_allocator;
587     import std.random;
588     import std.algorithm.sorting : sort;
589     import core.thread : ThreadGroup;
590     import core.internal.spinlock : SpinLock;
591 
592     enum pageSize = 4096;
593     enum numThreads = 10;
594     enum maxIter = 20;
595     enum totalAllocs = maxIter * numThreads;
596     size_t count = 0;
597     SpinLock lock = SpinLock(SpinLock.Contention.brief);
598 
599     alias SuperAllocator = SharedAlignedBlockList!(
600             SharedBorrowedRegion!(1),
601             SharedAscendingPageAllocator,
602             1 << 16);
603     void[][totalAllocs] buf;
604 
605     SuperAllocator a;
606     a.parent = SharedAscendingPageAllocator(4096 * 1024);
607 
608     void fun()
609     {
610         auto rnd = Random(1000);
611 
612         foreach (i; 0 .. maxIter)
613         {
614             auto size = uniform(1, pageSize + 1, rnd);
615             void[] b = a.allocate(size);
616             assert(b.length == size);
617             testrw(b);
618 
619             lock.lock();
620             buf[count++] = b;
621             lock.unlock();
622         }
623     }
624     auto tg = new ThreadGroup;
625     foreach (i; 0 .. numThreads)
626     {
627         tg.create(&fun);
628     }
629     tg.joinAll();
630 
631     sort!((a, b) => a.ptr < b.ptr)(buf[0 .. totalAllocs]);
632     foreach (i; 0 .. totalAllocs - 1)
633     {
634         assert(buf[i].ptr + a.goodAllocSize(buf[i].length) <= buf[i + 1].ptr);
635     }
636 
637     foreach (i; 0 .. totalAllocs)
638     {
639         assert(a.deallocate(buf[totalAllocs - 1 - i]));
640     }
641 }
642 
643 @system unittest
644 {
645     import std.experimental.allocator.building_blocks.ascending_page_allocator : AscendingPageAllocator;
646     import std.experimental.allocator.building_blocks.segregator : Segregator;
647     import std.experimental.allocator.building_blocks.bitmapped_block : BitmappedBlock;
648     import std.random;
649 
650     alias SuperAllocator = Segregator!(
651         256,
652         AlignedBlockList!(BitmappedBlock!256, AscendingPageAllocator*, 1 << 16),
653         Segregator!(
654 
655         512,
656         AlignedBlockList!(BitmappedBlock!512, AscendingPageAllocator*, 1 << 16),
657         Segregator!(
658 
659         1024,
660         AlignedBlockList!(BitmappedBlock!1024, AscendingPageAllocator*, 1 << 16),
661         Segregator!(
662 
663         2048,
664         AlignedBlockList!(BitmappedBlock!2048, AscendingPageAllocator*, 1 << 16),
665         AscendingPageAllocator*
666     ))));
667 
668     SuperAllocator a;
669     auto pageAlloc = AscendingPageAllocator(4096 * 4096);
670     a.allocatorForSize!4096 = &pageAlloc;
671     a.allocatorForSize!2048.parent = &pageAlloc;
672     a.allocatorForSize!1024.parent = &pageAlloc;
673     a.allocatorForSize!512.parent = &pageAlloc;
674     a.allocatorForSize!256.parent = &pageAlloc;
675 
676     auto rnd = Random(1000);
677 
678     size_t maxIter = 10;
679     enum testNum = 10;
680     void[][testNum] buf;
681     int maxSize = 8192;
682     foreach (i; 0 .. maxIter)
683     {
684         foreach (j; 0 .. testNum)
685         {
686             auto size = uniform(1, maxSize + 1, rnd);
687             buf[j] = a.allocate(size);
688             assert(buf[j].length == size);
689             testrw(buf[j]);
690         }
691 
692         randomShuffle(buf[]);
693 
694         foreach (j; 0 .. testNum)
695         {
696             assert(a.deallocate(buf[j]));
697         }
698     }
699 }