1 // Written in the D programming language.
2 /**
3 Source: $(PHOBOSSRC std/experimental/allocator/building_blocks/bitmapped_block.d)
4 */
5 module std.experimental.allocator.building_blocks.bitmapped_block;
6 
7 import std.experimental.allocator.building_blocks.null_allocator;
8 import std.experimental.allocator.common;
9 import std.typecons : Flag, Yes, No;
10 
11 
12 // Common implementation for shared and non-shared versions of the BitmappedBlock
13 private mixin template BitmappedBlockImpl(bool isShared, bool multiBlock)
14 {
15     import std.conv : text;
16     import std.traits : hasMember;
17     import std.typecons : Ternary;
18     import std.typecons : tuple, Tuple;
19 
20     static if (isShared && multiBlock)
21     import core.internal.spinlock : SpinLock;
22 
23     static assert(theBlockSize > 0 && theAlignment.isGoodStaticAlignment);
24     static assert(theBlockSize == chooseAtRuntime ||
25         theBlockSize % theAlignment == 0, "Block size must be a multiple of the alignment");
26 
27     static if (theBlockSize != chooseAtRuntime)
28     {
29         alias blockSize = theBlockSize;
30     }
31     else
32     {
33         // It is the caller's responsibilty to synchronize this with
34         // allocate/deallocate in shared environments
35         @property uint blockSize() { return _blockSize; }
36         @property void blockSize(uint s)
37         {
38             static if (multiBlock)
39             {
40                 assert((cast(BitVector) _control).length == 0 && s % alignment == 0);
41             }
42             else
43             {
44                 assert(_control.length == 0 && s % alignment == 0);
45             }
46             _blockSize = s;
47         }
48         private uint _blockSize;
49     }
50 
51     static if (is(ParentAllocator == NullAllocator))
52     {
53         private enum parentAlignment = platformAlignment;
54     }
55     else
56     {
57         private alias parentAlignment = ParentAllocator.alignment;
58         static assert(parentAlignment >= ulong.alignof);
59     }
60 
61     alias alignment = theAlignment;
62 
63     static if (stateSize!ParentAllocator)
64     {
65         ParentAllocator parent;
66     }
67     else
68     {
69         alias parent = ParentAllocator.instance;
70     }
71 
72     private size_t _blocks;
73     private void[] _payload;
74     private size_t _startIdx;
75 
76     // For multiblock, '_control' is a BitVector, otherwise just a regular ulong[]
77     static if (multiBlock)
78     {
79         // Keeps track of first block which has never been used in an allocation.
80         // All blocks which are located right to the '_freshBit', should have never been
81         // allocated
82         private ulong _freshBit;
83         private BitVector _control;
84     }
85     else
86     {
87         private ulong[] _control;
88     }
89 
90     static if (multiBlock && isShared)
91     {
92         SpinLock lock = SpinLock(SpinLock.Contention.brief);
93     }
94 
95     pure nothrow @safe @nogc
96     private size_t totalAllocation(size_t capacity)
97     {
98         auto blocks = capacity.divideRoundUp(blockSize);
99         auto leadingUlongs = blocks.divideRoundUp(64);
100         import std.algorithm.comparison : min;
101         immutable initialAlignment = min(parentAlignment,
102             1U << min(31U, trailingZeros(leadingUlongs * 8)));
103         auto maxSlack = alignment <= initialAlignment
104             ? 0
105             : alignment - initialAlignment;
106         return leadingUlongs * 8 + maxSlack + blockSize * blocks;
107     }
108 
109     this(ubyte[] data)
110     {
111         immutable a = data.ptr.effectiveAlignment;
112         assert(a >= size_t.alignof || !data.ptr,
113             "Data must be aligned properly");
114 
115         immutable ulong totalBits = data.length * 8;
116         immutable ulong bitsPerBlock = blockSize * 8 + 1;
117         _blocks = totalBits / bitsPerBlock;
118 
119         // Reality is a bit more complicated, iterate until a good number of
120         // blocks found.
121         size_t localBlocks;
122         for (localBlocks = _blocks; localBlocks; --localBlocks)
123         {
124             immutable controlWords = localBlocks.divideRoundUp(64);
125             auto payload = data[controlWords * 8 .. $].roundStartToMultipleOf(
126                 alignment);
127             if (payload.length < localBlocks * blockSize)
128             {
129                 // Overestimated
130                 continue;
131             }
132 
133             // Need the casts for shared versions
134             static if (multiBlock)
135             {
136                 _control = cast(typeof(_control)) BitVector((cast(ulong*) data.ptr)[0 .. controlWords]);
137                 (cast(BitVector) _control)[] = 0;
138             }
139             else
140             {
141                 _control = (cast(typeof(_control.ptr)) data.ptr)[0 .. controlWords];
142                 _control[] = 0;
143             }
144 
145             _payload = cast(typeof(_payload)) payload;
146             break;
147         }
148 
149         _blocks = cast(typeof(_blocks)) localBlocks;
150     }
151 
152     static if (chooseAtRuntime == theBlockSize)
153     this(ubyte[] data, uint blockSize)
154     {
155         this._blockSize = blockSize;
156         this(data);
157     }
158 
159     static if (!is(ParentAllocator == NullAllocator) && !stateSize!ParentAllocator)
160     this(size_t capacity)
161     {
162         size_t toAllocate = totalAllocation(capacity);
163         auto data = cast(ubyte[])(parent.allocate(toAllocate));
164         this(data);
165         assert(_blocks * blockSize >= capacity);
166     }
167 
168     static if (!is(ParentAllocator == NullAllocator) && stateSize!ParentAllocator)
169     this(ParentAllocator parent, size_t capacity)
170     {
171         this.parent = parent;
172         size_t toAllocate = totalAllocation(capacity);
173         auto data = cast(ubyte[])(parent.allocate(toAllocate));
174         this(data);
175     }
176 
177     static if (!is(ParentAllocator == NullAllocator) &&
178         chooseAtRuntime == theBlockSize &&
179         !stateSize!ParentAllocator)
180     this(size_t capacity, uint blockSize)
181     {
182         this._blockSize = blockSize;
183         this(capacity);
184     }
185 
186     static if (!is(ParentAllocator == NullAllocator) &&
187         chooseAtRuntime == theBlockSize &&
188         stateSize!ParentAllocator)
189     this(ParentAllocator parent, size_t capacity, uint blockSize)
190     {
191         this._blockSize = blockSize;
192         this(parent, capacity);
193     }
194 
195     static if (!is(ParentAllocator == NullAllocator)
196         && hasMember!(ParentAllocator, "deallocate"))
197     ~this()
198     {
199         // multiblock bitmapped blocks use a BitVector
200         static if (multiBlock)
201         {
202             void* start = cast(void*) _control.rep.ptr;
203         }
204         else
205         {
206             void* start = cast(void*) _control.ptr;
207         }
208         void* end = cast(void*) (_payload.ptr + _payload.length);
209         parent.deallocate(start[0 .. end - start]);
210     }
211 
212     pure nothrow @safe @nogc
213     size_t goodAllocSize(size_t n)
214     {
215         return n.roundUpToMultipleOf(blockSize);
216     }
217 
218     // Implementation of the 'multiBlock' BitmappedBlock
219     // For the shared version, the methods are protected by a common lock
220     static if (multiBlock)
221     {
222         /*
223         Adjusts the memoized _startIdx to the leftmost control word that has at
224         least one zero bit. Assumes all control words to the left of $(D
225         _control[_startIdx]) are already occupied.
226         */
227         private void adjustStartIdx()
228         {
229             while (_startIdx < _control.rep.length && _control.rep[_startIdx] == ulong.max)
230             {
231                 static if (isShared)
232                 {
233                     // Shared demands atomic increment, however this is protected
234                     // by a lock. Regular increment is fine
235                     auto localStart = _startIdx + 1;
236                     _startIdx = localStart;
237                 }
238                 else
239                 {
240                     ++_startIdx;
241                 }
242             }
243         }
244 
245         /*
246         Based on the latest allocated bit, 'newBit', it adjusts '_freshBit'
247         */
248         pure nothrow @safe @nogc
249         private void adjustFreshBit(const ulong newBit)
250         {
251             import std.algorithm.comparison : max;
252             static if (isShared)
253             {
254                 auto localFreshBit = max(newBit, _freshBit);
255                 _freshBit = localFreshBit;
256             }
257             else
258             {
259                 _freshBit = max(newBit, _freshBit);
260             }
261         }
262 
263         /*
264         Returns the blocks corresponding to the control bits starting at word index
265         wordIdx and bit index msbIdx (MSB=0) for a total of howManyBlocks.
266         */
267         @trusted
268         private void[] blocksFor(this _)(size_t wordIdx, uint msbIdx, size_t howManyBlocks)
269         {
270             assert(msbIdx <= 63);
271             const start = (wordIdx * 64 + msbIdx) * blockSize;
272             const end = start + blockSize * howManyBlocks;
273             if (start == end) return null;
274             if (end <= _payload.length) return cast(void[]) _payload[start .. end];
275             // This could happen if we have more control bits than available memory.
276             // That's possible because the control bits are rounded up to fit in
277             // 64-bit words.
278             return null;
279         }
280 
281         static if (isShared)
282         nothrow @safe @nogc
283         void[] allocate(const size_t s)
284         {
285             lock.lock();
286             scope(exit) lock.unlock();
287 
288             return allocateImpl(s);
289         }
290 
291         static if (!isShared)
292         pure nothrow @safe @nogc
293         void[] allocate(const size_t s)
294         {
295             return allocateImpl(s);
296         }
297 
298 
299         // If shared, this is protected by a lock inside 'allocate'
300         pure nothrow @trusted @nogc
301         private void[] allocateImpl(const size_t s)
302         {
303             const blocks = s.divideRoundUp(blockSize);
304             void[] result;
305 
306         Lswitch:
307             switch (blocks)
308             {
309             case 1:
310                 // inline code here for speed
311                 // find the next available block
312                 foreach (i; _startIdx .. _control.rep.length)
313                 {
314                     const w = _control.rep[i];
315                     if (w == ulong.max) continue;
316                     uint j = leadingOnes(w);
317                     assert(j < 64, "Invalid number of blocks");
318                     assert((_control.rep[i] & ((1UL << 63) >> j)) == 0, "Corrupted bitmap");
319                     static if (isShared)
320                     {
321                         // Need the cast because shared does not recognize the lock
322                         *(cast(ulong*) &_control._rep[i]) |= (1UL << 63) >> j;
323                     }
324                     else
325                     {
326                         _control.rep[i] |= (1UL << 63) >> j;
327                     }
328                     if (i == _startIdx)
329                     {
330                         adjustStartIdx();
331                     }
332                     result = blocksFor(i, j, 1);
333                     break Lswitch;
334                 }
335                 goto case 0; // fall through
336             case 0:
337                 return null;
338             case 2: .. case 64:
339                 result = smallAlloc(cast(uint) blocks);
340                 break;
341             default:
342                 result = hugeAlloc(blocks);
343                 break;
344             }
345             if (result)
346             {
347                 adjustFreshBit((result.ptr - _payload.ptr) / blockSize + blocks);
348             }
349             return result.ptr ? result.ptr[0 .. s] : null;
350         }
351 
352         @trusted void[] allocateFresh(const size_t s)
353         {
354             static if (isShared)
355             {
356                 lock.lock();
357                 scope(exit) lock.unlock();
358             }
359 
360             const blocks = s.divideRoundUp(blockSize);
361 
362             void[] result = blocksFor(cast(size_t) (_freshBit / 64),
363                 cast(uint) (_freshBit % 64), blocks);
364             if (result)
365             {
366                 (cast(BitVector) _control)[_freshBit .. _freshBit + blocks] = 1;
367                 static if (isShared)
368                 {
369                     ulong localFreshBit = _freshBit;
370                     localFreshBit += blocks;
371                     _freshBit = localFreshBit;
372                 }
373                 else
374                 {
375                     _freshBit += blocks;
376                 }
377             }
378             return result;
379         }
380 
381         void[] alignedAllocate(size_t n, uint a)
382         {
383             static if (isShared)
384             {
385                 lock.lock();
386                 scope(exit) lock.unlock();
387             }
388 
389             return alignedAllocateImpl(n, a);
390         }
391 
392         // If shared, this is protected by a lock inside 'alignedAllocate'
393         private void[] alignedAllocateImpl(size_t n, uint a)
394         {
395             import std.math.traits : isPowerOf2;
396             assert(a.isPowerOf2);
397             if (a <= alignment) return allocate(n);
398 
399             // Overallocate to make sure we can get an aligned block
400             auto b = allocateImpl((n + a - alignment).roundUpToMultipleOf(blockSize));
401             if (!b.ptr) return null;
402             auto result = b.roundStartToMultipleOf(a);
403             assert(result.length >= n);
404             result = result.ptr[0 .. n]; // final result
405 
406             // Free any blocks that might be slack at the beginning
407             auto slackHeadingBlocks = (result.ptr - b.ptr) / blockSize;
408             if (slackHeadingBlocks)
409             {
410                 deallocateImpl(b[0 .. slackHeadingBlocks * blockSize]);
411             }
412 
413             // Free any blocks that might be slack at the end
414             auto slackTrailingBlocks = ((b.ptr + b.length)
415                 - (result.ptr + result.length)) / blockSize;
416             if (slackTrailingBlocks)
417             {
418                 deallocateImpl(b[$ - slackTrailingBlocks * blockSize .. $]);
419             }
420 
421             return result;
422         }
423 
424         /*
425         Tries to allocate "blocks" blocks at the exact position indicated by the
426         position wordIdx/msbIdx (msbIdx counts from MSB, i.e. MSB has index 0). If
427         it succeeds, fills "result" with the result and returns tuple(size_t.max,
428         0). Otherwise, returns a tuple with the next position to search.
429         */
430         private Tuple!(size_t, uint) allocateAt(size_t wordIdx, uint msbIdx,
431                 size_t blocks, ref void[] result)
432         {
433             assert(blocks > 0);
434             assert(wordIdx < _control.rep.length);
435             assert(msbIdx <= 63);
436             void[] tmpResult;
437             result = null;
438             if (msbIdx + blocks <= 64)
439             {
440                 // Allocation should fit this control word
441                 static if (isShared)
442                 {
443                     ulong localControl = _control.rep[wordIdx];
444                     bool didSetBit = setBitsIfZero(localControl,
445                         cast(uint) (64 - msbIdx - blocks), 63 - msbIdx);
446                     _control.rep[wordIdx] = localControl;
447                 }
448                 else
449                 {
450                     bool didSetBit = setBitsIfZero(_control.rep[wordIdx],
451                         cast(uint) (64 - msbIdx - blocks), 63 - msbIdx);
452                 }
453                 if (didSetBit)
454                 {
455                     tmpResult = blocksFor(wordIdx, msbIdx, blocks);
456                     if (!tmpResult)
457                     {
458                         static if (isShared)
459                         {
460                             localControl = _control.rep[wordIdx];
461                             resetBits(localControl,
462                                 cast(uint) (64 - msbIdx - blocks), 63 - msbIdx);
463                             _control.rep[wordIdx] = localControl;
464                         }
465                         else
466                         {
467                             resetBits(_control.rep[wordIdx],
468                                 cast(uint) (64 - msbIdx - blocks), 63 - msbIdx);
469                         }
470                         return tuple(size_t.max - 1, 0u);
471                     }
472                     result = tmpResult;
473                     tmpResult = null;
474                     return tuple(size_t.max, 0u);
475                 }
476                 // Can't allocate, make a suggestion
477                 return msbIdx + blocks == 64
478                     ? tuple(wordIdx + 1, 0u)
479                     : tuple(wordIdx, cast(uint) (msbIdx + blocks));
480             }
481             // Allocation spans two control words or more
482             immutable mask = ulong.max >> msbIdx;
483             if (_control.rep[wordIdx] & mask)
484             {
485                 // We can't allocate the rest of this control word,
486                 // return a suggestion.
487                 return tuple(wordIdx + 1, 0u);
488             }
489             // We can allocate the rest of this control word, but we first need to
490             // make sure we can allocate the tail.
491             if (wordIdx + 1 == _control.rep.length)
492             {
493                 // No more memory
494                 return tuple(_control.rep.length, 0u);
495             }
496             auto hint = allocateAt(wordIdx + 1, 0, blocks - 64 + msbIdx, result);
497             if (hint[0] == size_t.max)
498             {
499                 tmpResult = blocksFor(wordIdx, msbIdx, blocks);
500                 if (!tmpResult)
501                 {
502                     return tuple(size_t.max - 1, 0u);
503                 }
504                 static if (isShared)
505                 {
506                     // Dont want atomics, because this is protected by 'lock'
507                     ulong localControl = _control.rep[wordIdx];
508                     localControl |= mask;
509                     _control.rep[wordIdx] = localControl;
510                 }
511                 else
512                 {
513                     _control.rep[wordIdx] |= mask;
514                 }
515                 result = tmpResult;
516                 tmpResult = null;
517                 return tuple(size_t.max, 0u);
518             }
519             // Failed, return a suggestion that skips this whole run.
520             return hint;
521         }
522 
523         /* Allocates as many blocks as possible at the end of the blocks indicated
524         by wordIdx. Returns the number of blocks allocated. */
525         private uint allocateAtTail(size_t wordIdx)
526         {
527             assert(wordIdx < _control.rep.length);
528             const available = trailingZeros(_control.rep[wordIdx]);
529             static if (isShared)
530             {
531                 ulong localControl = _control.rep[wordIdx];
532                 localControl |= ulong.max >> available;
533                 _control.rep[wordIdx] = localControl;
534             }
535             else
536             {
537                 _control.rep[wordIdx] |= ulong.max >> available;
538             }
539             return available;
540         }
541 
542         pure nothrow @safe @nogc
543         private void[] smallAlloc(uint blocks) return scope
544         {
545             assert(blocks >= 2 && blocks <= 64);
546             void[] result;
547             foreach (i; _startIdx .. _control.rep.length)
548             {
549                 // Test within the current 64-bit word
550                 const v = _control.rep[i];
551                 if (v == ulong.max) continue;
552                 auto j = findContigOnes(~v, blocks);
553                 if (j < 64)
554                 {
555                     // yay, found stuff
556                     result = blocksFor(i, j, blocks);
557                     if (result)
558                     {
559                         static if (isShared)
560                         {
561                             ulong localControl = _control.rep[i];
562                             setBits(localControl, 64 - j - blocks, 63 - j);
563                             _control.rep[i] = localControl;
564                         }
565                         else
566                         {
567                             setBits(_control.rep[i], 64 - j - blocks, 63 - j);
568                         }
569                     }
570                     return result;
571                 }
572                 // Next, try allocations that cross a word
573                 auto available = trailingZeros(v);
574                 if (available == 0) continue;
575                 if (i + 1 >= _control.rep.length) break;
576                 assert(available < blocks); // otherwise we should have found it
577                 auto needed = blocks - available;
578                 assert(needed > 0 && needed < 64);
579                 result = blocksFor(i, 64 - available, blocks);
580                 if (result && allocateAtFront(i + 1, needed))
581                 {
582                     static if (isShared)
583                     {
584                         ulong localControl = _control.rep[i];
585                         localControl |= (1UL << available) - 1;
586                         _control.rep[i] = localControl;
587                     }
588                     else
589                     {
590                         _control.rep[i] |= (1UL << available) - 1;
591                     }
592                     return result;
593                 }
594             }
595             return null;
596         }
597 
598         pure nothrow @trusted @nogc
599         private void[] hugeAlloc(size_t blocks) return scope
600         {
601             assert(blocks > 64);
602             if (_startIdx == _control._rep.length)
603             {
604                 assert((cast(BitVector) _control).allAre1);
605                 return null;
606             }
607 
608             auto i = (cast(BitVector)_control).findZeros(blocks, _startIdx * 64);
609             if (i == i.max || i + blocks > _blocks) return null;
610             // Allocate those bits
611             (cast(BitVector) _control)[i .. i + blocks] = 1;
612             return cast(void[]) _payload[cast(size_t) (i * blockSize)
613                 .. cast(size_t) ((i + blocks) * blockSize)];
614         }
615 
616         // Rounds sizeInBytes to a multiple of blockSize.
617         private size_t bytes2blocks(size_t sizeInBytes)
618         {
619             return (sizeInBytes + blockSize - 1) / blockSize;
620         }
621 
622         /* Allocates given blocks at the beginning blocks indicated by wordIdx.
623         Returns true if allocation was possible, false otherwise. */
624         private bool allocateAtFront(size_t wordIdx, uint blocks)
625         {
626             assert(wordIdx < _control.rep.length && blocks >= 1 && blocks <= 64);
627             const mask = (1UL << (64 - blocks)) - 1;
628             if (_control.rep[wordIdx] > mask) return false;
629             static if (isShared)
630             {
631                 ulong localControl = _control.rep[wordIdx];
632                 localControl |= ~mask;
633                 _control.rep[wordIdx] = localControl;
634             }
635             else
636             {
637                 _control.rep[wordIdx] |= ~mask;
638             }
639             return true;
640         }
641 
642         // Since the lock is not pure, only the single threaded 'expand' is pure
643         static if (isShared)
644         {
645             nothrow @trusted @nogc
646             bool expand(ref void[] b, immutable size_t delta)
647             {
648                 lock.lock();
649                 scope(exit) lock.unlock();
650 
651                 return expandImpl(b, delta);
652             }
653         }
654         else
655         {
656             pure nothrow @trusted @nogc
657             bool expand(ref void[] b, immutable size_t delta)
658             {
659                 return expandImpl(b, delta);
660             }
661         }
662 
663         // If shared, this is protected by a lock inside 'expand'
664         pure nothrow @trusted @nogc
665         private bool expandImpl(ref void[] b, immutable size_t delta)
666         {
667             // Dispose with trivial corner cases
668             if (b is null || delta == 0) return delta == 0;
669 
670             /* To simplify matters, refuse to expand buffers that don't start at a block start (this may be the case for blocks allocated with alignedAllocate).
671             */
672             if ((b.ptr - _payload.ptr) % blockSize) return false;
673 
674             const blocksOld = bytes2blocks(b.length);
675             const blocksNew = bytes2blocks(b.length + delta);
676             assert(blocksOld <= blocksNew);
677 
678             // Possibly we have enough slack at the end of the block!
679             if (blocksOld == blocksNew)
680             {
681                 b = b.ptr[0 .. b.length + delta];
682                 return true;
683             }
684 
685             assert((b.ptr - _payload.ptr) % blockSize == 0);
686             const blockIdx = (b.ptr - _payload.ptr) / blockSize;
687             const blockIdxAfter = blockIdx + blocksOld;
688 
689             // Try the maximum
690             const wordIdx = blockIdxAfter / 64,
691                 msbIdx = cast(uint) (blockIdxAfter % 64);
692             void[] p;
693             auto hint = allocateAt(wordIdx, msbIdx,  blocksNew - blocksOld, p);
694             if (hint[0] != size_t.max)
695             {
696                 return false;
697             }
698             // Expansion successful
699             assert(p.ptr == b.ptr + blocksOld * blockSize);
700             b = b.ptr[0 .. b.length + delta];
701             adjustFreshBit(blockIdx + blocksNew);
702             return true;
703         }
704 
705         @system bool reallocate(ref void[] b, size_t newSize)
706         {
707             static if (isShared)
708             {
709                 lock.lock();
710                 scope(exit) lock.unlock();
711             }
712 
713             return reallocateImpl(b, newSize);
714         }
715 
716         // If shared, this is protected by a lock inside 'reallocate'
717         private @system bool reallocateImpl(ref void[] b, size_t newSize)
718         {
719             static bool slowReallocate(Allocator)(ref Allocator a, ref void[] b, size_t s)
720             {
721                 if (b.length == s) return true;
722                 if (b.length <= s && a.expandImpl(b, s - b.length)) return true;
723                 auto newB = a.allocateImpl(s);
724                 if (newB.length != s) return false;
725                 if (newB.length <= b.length) newB[] = b[0 .. newB.length];
726                 else newB[0 .. b.length] = b[];
727                 a.deallocateImpl(b);
728                 b = newB;
729                 return true;
730             }
731 
732             if (!b.ptr)
733             {
734                 b = allocateImpl(newSize);
735                 return b.length == newSize;
736             }
737             if (newSize == 0)
738             {
739                 deallocateImpl(b);
740                 b = null;
741                 return true;
742             }
743             if (newSize < b.length)
744             {
745                 // Shrink. Will shrink in place by deallocating the trailing part.
746                 auto newCapacity = bytes2blocks(newSize) * blockSize;
747                 deallocateImpl(b[newCapacity .. $]);
748                 b = b[0 .. newSize];
749                 return true;
750             }
751             // Go the slow route
752             return slowReallocate(this, b, newSize);
753         }
754 
755         @system bool alignedReallocate(ref void[] b, size_t newSize, uint a)
756         {
757             static if (isShared)
758             {
759                 lock.lock();
760                 scope(exit) lock.unlock();
761             }
762 
763             return alignedReallocateImpl(b, newSize, a);
764         }
765 
766         // If shared, this is protected by a lock inside 'alignedReallocate'
767         private @system bool alignedReallocateImpl(ref void[] b, size_t newSize, uint a)
768         {
769             static bool slowAlignedReallocate(Allocator)(ref Allocator alloc,
770                     ref void[] b, size_t s, uint a)
771             {
772                 if (b.length <= s && b.ptr.alignedAt(a)
773                     && alloc.expandImpl(b, s - b.length)) return true;
774 
775                 auto newB = alloc.alignedAllocateImpl(s, a);
776                 if (newB.length != s) return false;
777                 if (newB.length <= b.length) newB[] = b[0 .. newB.length];
778                 else newB[0 .. b.length] = b[];
779                 alloc.deallocateImpl(b);
780                 b = newB;
781                 return true;
782             }
783 
784             if (newSize == 0)
785             {
786                 deallocateImpl(b);
787                 b = null;
788                 return true;
789             }
790             // Go the slow route
791             return slowAlignedReallocate(this, b, newSize, a);
792         }
793 
794         nothrow @nogc
795         bool deallocate(void[] b)
796         {
797             static if (isShared)
798             {
799                 lock.lock();
800                 scope(exit) lock.unlock();
801             }
802 
803             return deallocateImpl(b);
804         }
805 
806         // If shared, this is protected by a lock inside 'deallocate'
807         nothrow @nogc
808         private bool deallocateImpl(void[] b)
809         {
810             if (b is null) return true;
811 
812             // Locate position
813             immutable pos = b.ptr - _payload.ptr;
814             immutable blockIdx = pos / blockSize;
815 
816             // Adjust pointer, might be inside a block due to alignedAllocate
817             void* begin = cast(void*) (_payload.ptr + blockIdx * blockSize),
818                 end = cast(void*) (b.ptr + b.length);
819             b = begin[0 .. end - begin];
820             // Round up size to multiple of block size
821             auto blocks = b.length.divideRoundUp(blockSize);
822 
823             // Get into details
824             auto wordIdx = blockIdx / 64, msbIdx = cast(uint) (blockIdx % 64);
825             if (_startIdx > wordIdx) _startIdx = wordIdx;
826 
827             // Three stages: heading bits, full words, leftover bits
828             if (msbIdx)
829             {
830                 if (blocks + msbIdx <= 64)
831                 {
832                     static if (isShared)
833                     {
834                         ulong localControl = _control.rep[wordIdx];
835                         resetBits(localControl,
836                             cast(uint) (64 - msbIdx - blocks),
837                             63 - msbIdx);
838                         _control.rep[wordIdx] = localControl;
839                     }
840                     else
841                     {
842                         resetBits(_control.rep[wordIdx],
843                             cast(uint) (64 - msbIdx - blocks),
844                             63 - msbIdx);
845                     }
846                     return true;
847                 }
848                 else
849                 {
850                     static if (isShared)
851                     {
852                         ulong localControl = _control.rep[wordIdx];
853                         localControl &= ulong.max << (64 - msbIdx);
854                         _control.rep[wordIdx] = localControl;
855                     }
856                     else
857                     {
858                         _control.rep[wordIdx] &= ulong.max << (64 - msbIdx);
859                     }
860                     blocks -= 64 - msbIdx;
861                     ++wordIdx;
862                     msbIdx = 0;
863                 }
864             }
865 
866             // Stage 2: reset one word at a time
867             for (; blocks >= 64; blocks -= 64)
868             {
869                 _control.rep[wordIdx++] = 0;
870             }
871 
872             // Stage 3: deal with leftover bits, if any
873             assert(wordIdx <= _control.rep.length);
874             if (blocks)
875             {
876                 static if (isShared)
877                 {
878                     ulong localControl = _control.rep[wordIdx];
879                     localControl &= ulong.max >> blocks;
880                     _control.rep[wordIdx] = localControl;
881                 }
882                 else
883                 {
884                     _control.rep[wordIdx] &= ulong.max >> blocks;
885                 }
886             }
887             return true;
888         }
889 
890         // Since the lock is not pure, only the single threaded version is pure
891         static if (isShared)
892         {
893             nothrow @nogc
894             bool deallocateAll()
895             {
896                 lock.lock();
897                 scope(exit) lock.unlock();
898 
899                 (cast(BitVector) _control)[] = 0;
900                 _startIdx = 0;
901                 return true;
902             }
903         }
904         else
905         {
906             pure nothrow @nogc
907             bool deallocateAll()
908             {
909                 _control[] = 0;
910                 _startIdx = 0;
911                 return true;
912             }
913         }
914 
915         // Since the lock is not pure, only the single threaded version is pure
916         static if (isShared)
917         {
918             nothrow @safe @nogc
919             Ternary empty()
920             {
921                 lock.lock();
922                 scope(exit) lock.unlock();
923 
924                 return emptyImpl();
925             }
926         }
927         else
928         {
929             pure nothrow @safe @nogc
930             Ternary empty()
931             {
932                 return Ternary(_control.allAre0());
933             }
934         }
935 
936         pure nothrow @trusted @nogc
937         private Ternary emptyImpl()
938         {
939             return Ternary((cast(BitVector) _control).allAre0());
940         }
941 
942         // Debug helper
943         debug(StdBitmapped)
944         private void dump()
945         {
946             import std.stdio : writefln, writeln;
947 
948             ulong controlLen = (cast(BitVector) _control).length;
949             writefln("%s @ %s {", typeid(this), cast(void*) (cast(BitVector) _control)._rep.ptr);
950             scope(exit) writeln("}");
951             assert(_payload.length >= blockSize * _blocks);
952             assert(controlLen >= _blocks);
953             writefln("  _startIdx=%s; blockSize=%s; blocks=%s",
954                 _startIdx, blockSize, _blocks);
955             if (!controlLen) return;
956             uint blockCount = 1;
957             bool inAllocatedStore = (cast(BitVector) _control)[0];
958             void* start = cast(void*) _payload.ptr;
959             for (size_t i = 1;; ++i)
960             {
961                 if (i >= _blocks || (cast(BitVector) _control)[i] != inAllocatedStore)
962                 {
963                     writefln("  %s block at 0x%s, length: %s (%s*%s)",
964                         inAllocatedStore ? "Busy" : "Free",
965                         cast(void*) start,
966                         blockCount * blockSize,
967                         blockCount, blockSize);
968                     if (i >= _blocks) break;
969                     assert(i < controlLen);
970                     inAllocatedStore = (cast(BitVector) _control)[i];
971                     start = cast(void*) (_payload.ptr + blockCount * blockSize);
972                     blockCount = 1;
973                 }
974                 else
975                 {
976                     ++blockCount;
977                 }
978             }
979         }
980 
981         void[] allocateAll() return scope
982         {
983             static if (isShared)
984             {
985                 lock.lock();
986                 scope(exit) lock.unlock();
987             }
988 
989             if (emptyImpl != Ternary.yes) return null;
990             (cast(BitVector) _control)[] = 1;
991             return cast(void[]) _payload;
992         }
993     } // Finish Yes.multiblock implementation specifics
994     else
995     {
996         static if (isShared)
997         pure nothrow @trusted @nogc
998         void[] allocate(const size_t s)
999         {
1000             import core.atomic : cas, atomicLoad, atomicOp;
1001             import core.bitop : bsr;
1002             import std.range : iota;
1003             import std.algorithm.iteration : map;
1004             import std.array : array;
1005 
1006             if (s.divideRoundUp(blockSize) != 1)
1007                 return null;
1008 
1009             // First zero bit position for all values in the 0 - 255 range
1010             // for fast lookup
1011             static immutable ubyte[255] firstZero = iota(255U).map!
1012                 (x => (7 - (bsr((~x) & 0x000000ff)))).array;
1013 
1014             foreach (size_t i; 0 .. _control.length)
1015             {
1016                 ulong controlVal, newControlVal, bitIndex;
1017                 do
1018                 {
1019                     bitIndex = 0;
1020                     newControlVal = 0;
1021                     controlVal = atomicLoad(_control[i]);
1022 
1023                     // skip all control words which have all bits set
1024                     if (controlVal == ulong.max)
1025                         break;
1026 
1027                     // fast lookup of first byte which has at least one zero bit
1028                     foreach (byteIndex; 0 .. 8)
1029                     {
1030                         ulong mask = (0xFFUL << (8 * (7 - byteIndex)));
1031                         if ((mask & controlVal) != mask)
1032                         {
1033                             ubyte byteVal = cast(ubyte) ((mask & controlVal) >> (8 * (7 - byteIndex)));
1034                             bitIndex += firstZero[byteVal];
1035                             newControlVal = controlVal | (1UL << (63 - bitIndex));
1036                             break;
1037                         }
1038                         bitIndex += 8;
1039                     }
1040                 } while (!cas(&_control[i], controlVal, newControlVal));
1041 
1042                 auto blockIndex = bitIndex + 64 * i;
1043                 if (controlVal != ulong.max && blockIndex < _blocks)
1044                 {
1045                     size_t payloadBlockStart = cast(size_t) blockIndex * blockSize;
1046                     return cast(void[]) _payload[payloadBlockStart .. payloadBlockStart + s];
1047                 }
1048             }
1049 
1050             return null;
1051         }
1052 
1053         static if (!isShared)
1054         pure nothrow @trusted @nogc
1055         void[] allocate(const size_t s)
1056         {
1057             import core.bitop : bsr;
1058             import std.range : iota;
1059             import std.algorithm.iteration : map;
1060             import std.array : array;
1061 
1062             if (s.divideRoundUp(blockSize) != 1)
1063                 return null;
1064 
1065             // First zero bit position for all values in the 0 - 255 range
1066             // for fast lookup
1067             static immutable ubyte[255] firstZero = iota(255U).map!
1068                 (x => (7 - (bsr((~x) & 0x000000ff)))).array;
1069 
1070             _startIdx = (_startIdx + 1) % _control.length;
1071             foreach (size_t idx; 0 .. _control.length)
1072             {
1073                 size_t i = (idx + _startIdx) % _control.length;
1074                 size_t bitIndex = 0;
1075                 // skip all control words which have all bits set
1076                 if (_control[i] == ulong.max)
1077                     continue;
1078 
1079                 // fast lookup of first byte which has at least one zero bit
1080                 foreach (byteIndex; 0 .. 8)
1081                 {
1082                     ulong mask = (0xFFUL << (8 * (7 - byteIndex)));
1083                     if ((mask & _control[i]) != mask)
1084                     {
1085                         ubyte byteVal = cast(ubyte) ((mask & _control[i]) >> (8 * (7 - byteIndex)));
1086                         bitIndex += firstZero[byteVal];
1087                         _control[i] |= (1UL << (63 - bitIndex));
1088                         break;
1089                     }
1090                     bitIndex += 8;
1091                 }
1092 
1093                 auto blockIndex = bitIndex + 64 * i;
1094                 if (blockIndex < _blocks)
1095                 {
1096                     size_t payloadBlockStart = cast(size_t) blockIndex * blockSize;
1097                     return cast(void[]) _payload[payloadBlockStart .. payloadBlockStart + s];
1098                 }
1099             }
1100 
1101             return null;
1102         }
1103 
1104         nothrow @nogc
1105         bool deallocate(void[] b)
1106         {
1107             static if (isShared)
1108             import core.atomic : atomicOp;
1109 
1110             if (b is null)
1111                 return true;
1112 
1113             auto blockIndex = (b.ptr - _payload.ptr) / blockSize;
1114             auto controlIndex = blockIndex / 64;
1115             auto bitIndex = blockIndex % 64;
1116             static if (isShared)
1117             {
1118                 atomicOp!"&="(_control[controlIndex], ~(1UL << (63 - bitIndex)));
1119             }
1120             else
1121             {
1122                 _control[controlIndex] &= ~(1UL << (63 - bitIndex));
1123             }
1124 
1125             return true;
1126         }
1127 
1128         pure nothrow @trusted @nogc
1129         bool expand(ref void[] b, immutable size_t delta)
1130         {
1131             if (delta == 0)
1132                 return true;
1133 
1134             immutable newLength = delta + b.length;
1135             if (b is null || newLength > blockSize)
1136                 return false;
1137 
1138             b = b.ptr[0 .. newLength];
1139             return true;
1140         }
1141     } // Finish No.multiblock implementation specifics
1142 
1143     pure nothrow @trusted @nogc
1144     Ternary owns(const void[] b) const
1145     {
1146         assert(b || b.length == 0, "Corrupt block.");
1147         return Ternary(b && _payload && (&b[0] >= &_payload[0])
1148                && (&b[0] + b.length) <= (&_payload[0] + _payload.length));
1149     }
1150 }
1151 
1152 /**
1153 `BitmappedBlock` implements a simple heap consisting of one contiguous area
1154 of memory organized in blocks, each of size `theBlockSize`. A block is a unit
1155 of allocation. A bitmap serves as bookkeeping data, more precisely one bit per
1156 block indicating whether that block is currently allocated or not.
1157 
1158 Passing `NullAllocator` as `ParentAllocator` (the default) means user code
1159 manages allocation of the memory block from the outside; in that case
1160 `BitmappedBlock` must be constructed with a `ubyte[]` preallocated block and
1161 has no responsibility regarding the lifetime of its support underlying storage.
1162 If another allocator type is passed, `BitmappedBlock` defines a destructor that
1163 uses the parent allocator to release the memory block. That makes the combination of `AllocatorList`,
1164 `BitmappedBlock`, and a back-end allocator such as `MmapAllocator`
1165 a simple and scalable solution for memory allocation.
1166 
1167 There are advantages to storing bookkeeping data separated from the payload
1168 (as opposed to e.g. using `AffixAllocator` to store metadata together with
1169 each allocation). The layout is more compact (overhead is one bit per block),
1170 searching for a free block during allocation enjoys better cache locality, and
1171 deallocation does not touch memory around the payload being deallocated (which
1172 is often cold).
1173 
1174 Allocation requests are handled on a first-fit basis. Although linear in
1175 complexity, allocation is in practice fast because of the compact bookkeeping
1176 representation, use of simple and fast bitwise routines, and caching of the
1177 first available block position. A known issue with this general approach is
1178 fragmentation, partially mitigated by coalescing. Since `BitmappedBlock` does
1179 not need to maintain the allocated size, freeing memory implicitly coalesces
1180 free blocks together. Also, tuning `blockSize` has a considerable impact on
1181 both internal and external fragmentation.
1182 
1183 If the last template parameter is set to `No.multiblock`, the allocator will only serve
1184 allocations which require at most `theBlockSize`. The `BitmappedBlock` has a specialized
1185 implementation for single-block allocations which allows for greater performance,
1186 at the cost of not being able to allocate more than one block at a time.
1187 
1188 The size of each block can be selected either during compilation or at run
1189 time. Statically-known block sizes are frequent in practice and yield slightly
1190 better performance. To choose a block size statically, pass it as the `blockSize`
1191 parameter as in `BitmappedBlock!(4096)`. To choose a block
1192 size parameter, use `BitmappedBlock!(chooseAtRuntime)` and pass the
1193 block size to the constructor.
1194 
1195 Params:
1196     theBlockSize = the length of a block, which must be a multiple of `theAlignment`
1197 
1198     theAlignment = alignment of each block
1199 
1200     ParentAllocator = allocator from which the `BitmappedBlock` will draw memory.
1201         If set to `NullAllocator`, the storage must be passed via the constructor
1202 
1203     f = `Yes.multiblock` to support allocations spanning across multiple blocks and
1204         `No.multiblock` to support single block allocations.
1205         Although limited by single block allocations, `No.multiblock` will generally
1206         provide higher performance.
1207 */
1208 struct BitmappedBlock(size_t theBlockSize, uint theAlignment = platformAlignment,
1209    ParentAllocator = NullAllocator, Flag!"multiblock" f = Yes.multiblock)
1210 {
1211     version (StdDdoc)
1212     {
1213         /**
1214         Constructs a block allocator given a hunk of memory, or a desired capacity
1215         in bytes.
1216         $(UL
1217         $(LI If `ParentAllocator` is $(REF_ALTTEXT `NullAllocator`, NullAllocator, std,experimental,allocator,building_blocks,null_allocator),
1218         only the constructor taking `data` is defined and the user is responsible for freeing `data` if desired.)
1219         $(LI Otherwise, both constructors are defined. The `data`-based
1220         constructor assumes memory has been allocated with the parent allocator.
1221         The `capacity`-based constructor uses `ParentAllocator` to allocate
1222         an appropriate contiguous hunk of memory. Regardless of the constructor
1223         used, the destructor releases the memory by using `ParentAllocator.deallocate`.)
1224         )
1225         */
1226         this(ubyte[] data);
1227 
1228         /// Ditto
1229         this(ubyte[] data, uint blockSize);
1230 
1231         /// Ditto
1232         this(size_t capacity);
1233 
1234         /// Ditto
1235         this(ParentAllocator parent, size_t capacity);
1236 
1237         /// Ditto
1238         this(size_t capacity, uint blockSize);
1239 
1240         /// Ditto
1241         this(ParentAllocator parent, size_t capacity, uint blockSize);
1242 
1243         /**
1244         If `blockSize == chooseAtRuntime`, `BitmappedBlock` offers a read/write
1245         property `blockSize`. It must be set before any use of the allocator.
1246         Otherwise (i.e. `theBlockSize` is a legit constant), `blockSize` is
1247         an alias for `theBlockSize`. Whether constant or variable, must also be
1248         a multiple of `alignment`. This constraint is `assert`ed statically
1249         and dynamically.
1250         */
1251         alias blockSize = theBlockSize;
1252 
1253         /**
1254         The _alignment offered is user-configurable statically through parameter
1255         `theAlignment`, defaulted to `platformAlignment`.
1256         */
1257         alias alignment = theAlignment;
1258 
1259         /**
1260         The _parent allocator. Depending on whether `ParentAllocator` holds state
1261         or not, this is a member variable or an alias for
1262         `ParentAllocator.instance`.
1263         */
1264         ParentAllocator parent;
1265 
1266         /**
1267         Returns the actual bytes allocated when `n` bytes are requested, i.e.
1268         `n.roundUpToMultipleOf(blockSize)`.
1269         */
1270         pure nothrow @safe @nogc
1271         size_t goodAllocSize(size_t n);
1272 
1273         /**
1274         Returns `Ternary.yes` if `b` belongs to the `BitmappedBlock` object,
1275         `Ternary.no` otherwise. Never returns `Ternary.unkown`. (This
1276         method is somewhat tolerant in that accepts an interior slice.)
1277         */
1278         pure nothrow @trusted @nogc
1279         Ternary owns(const void[] b) const;
1280 
1281         /**
1282         Expands in place a buffer previously allocated by `BitmappedBlock`.
1283         If instantiated with `No.multiblock`, the expansion fails if the new length
1284         exceeds `theBlockSize`.
1285         */
1286         pure nothrow @trusted @nogc
1287         bool expand(ref void[] b, immutable size_t delta);
1288 
1289         /**
1290         Deallocates a block previously allocated with this allocator.
1291         */
1292         nothrow @nogc
1293         bool deallocate(void[] b);
1294 
1295         /**
1296         Allocates `s` bytes of memory and returns it, or `null` if memory
1297         could not be allocated.
1298 
1299         The following information might be of help with choosing the appropriate
1300         block size. Actual allocation occurs in sizes multiple of the block size.
1301         Allocating one block is the fastest because only one 0 bit needs to be
1302         found in the metadata. Allocating 2 through 64 blocks is the next cheapest
1303         because it affects a maximum of two `ulong` in the metadata.
1304         Allocations greater than 64 blocks require a multiword search through the
1305         metadata.
1306 
1307         If instantiated with `No.multiblock`, it performs a search for the first zero
1308         bit in the bitmap and sets it.
1309         */
1310         pure nothrow @trusted @nogc
1311         void[] allocate(const size_t s);
1312 
1313         /**
1314         Allocates s bytes of memory and returns it, or `null` if memory could not be allocated.
1315         `allocateFresh` behaves just like allocate, the only difference being that this always
1316         returns unused(fresh) memory. Although there may still be available space in the `BitmappedBlock`,
1317         `allocateFresh` could still return null, because all the available blocks have been previously deallocated.
1318         */
1319         @trusted void[] allocateFresh(const size_t s);
1320 
1321         /**
1322         If the `BitmappedBlock` object is empty (has no active allocation), allocates
1323         all memory within and returns a slice to it. Otherwise, returns `null`
1324         (i.e. no attempt is made to allocate the largest available block).
1325         */
1326         void[] allocateAll();
1327 
1328         /**
1329         Returns `Ternary.yes` if no memory is currently allocated with this
1330         allocator, otherwise `Ternary.no`. This method never returns
1331         `Ternary.unknown`.
1332         */
1333         pure nothrow @safe @nogc
1334         Ternary empty();
1335 
1336         /**
1337         Forcibly deallocates all memory allocated by this allocator, making it
1338         available for further allocations. Does not return memory to `ParentAllocator`.
1339         */
1340         pure nothrow @nogc
1341         bool deallocateAll();
1342 
1343         /**
1344         Reallocates a block previously allocated with `alignedAllocate`. Contractions do not occur in place.
1345         */
1346         @system bool alignedReallocate(ref void[] b, size_t newSize, uint a);
1347 
1348         /**
1349         Reallocates a previously-allocated block. Contractions occur in place.
1350         */
1351         @system bool reallocate(ref void[] b, size_t newSize);
1352 
1353         /**
1354         Allocates a block with specified alignment `a`. The alignment must be a
1355         power of 2. If `a <= alignment`, function forwards to `allocate`.
1356         Otherwise, it attempts to overallocate and then adjust the result for
1357         proper alignment. In the worst case the slack memory is around two blocks.
1358         */
1359         void[] alignedAllocate(size_t n, uint a);
1360 
1361         /**
1362         If `ParentAllocator` is not `NullAllocator` and defines `deallocate`,
1363         the destructor is defined to deallocate the block held.
1364         */
1365         ~this();
1366     }
1367     else
1368     {
1369         version (StdUnittest)
1370         @system unittest
1371         {
1372             import std.algorithm.comparison : max;
1373             import std.experimental.allocator.mallocator : AlignedMallocator;
1374             auto m = cast(ubyte[])(AlignedMallocator.instance.alignedAllocate(1024 * 64,
1375                                     max(theAlignment, cast(uint) size_t.sizeof)));
1376             scope(exit) () nothrow @nogc { AlignedMallocator.instance.deallocate(m); }();
1377             static if (theBlockSize == chooseAtRuntime)
1378             {
1379                 testAllocator!(() => BitmappedBlock!(theBlockSize, theAlignment, NullAllocator)(m, 64));
1380             }
1381             else
1382             {
1383                 testAllocator!(() => BitmappedBlock!(theBlockSize, theAlignment, NullAllocator)(m));
1384             }
1385         }
1386         mixin BitmappedBlockImpl!(false, f == Yes.multiblock);
1387     }
1388 }
1389 
1390 ///
1391 @system unittest
1392 {
1393     // Create a block allocator on top of a 10KB stack region.
1394     import std.experimental.allocator.building_blocks.region : InSituRegion;
1395     import std.traits : hasMember;
1396     InSituRegion!(10_240, 64) r;
1397     auto a = BitmappedBlock!(64, 64)(cast(ubyte[])(r.allocateAll()));
1398     static assert(hasMember!(InSituRegion!(10_240, 64), "allocateAll"));
1399     const b = a.allocate(100);
1400     assert(b.length == 100);
1401 }
1402 
1403 ///
1404 @system unittest
1405 {
1406     import std.experimental.allocator.mallocator : Mallocator;
1407     import std.typecons : Flag, Yes;
1408 
1409     enum blockSize = 64;
1410     enum numBlocks = 10;
1411 
1412     // The 'BitmappedBlock' is implicitly instantiated with Yes.multiblock
1413     auto a = BitmappedBlock!(blockSize, 8, Mallocator, Yes.multiblock)(numBlocks * blockSize);
1414 
1415     // Instantiated with Yes.multiblock, can allocate more than one block at a time
1416     void[] buf = a.allocate(2 * blockSize);
1417     assert(buf.length == 2 * blockSize);
1418     assert(a.deallocate(buf));
1419 
1420     // Can also allocate less than one block
1421     buf = a.allocate(blockSize / 2);
1422     assert(buf.length == blockSize / 2);
1423 
1424     // Expands inside the same block
1425     assert(a.expand(buf, blockSize / 2));
1426     assert(buf.length == blockSize);
1427 
1428     // If Yes.multiblock, can expand past the size of a single block
1429     assert(a.expand(buf, 3 * blockSize));
1430     assert(buf.length == 4 * blockSize);
1431     assert(a.deallocate(buf));
1432 }
1433 
1434 ///
1435 @system unittest
1436 {
1437     import std.experimental.allocator.mallocator : Mallocator;
1438     import std.typecons : Flag, No;
1439 
1440     enum blockSize = 64;
1441     auto a = BitmappedBlock!(blockSize, 8, Mallocator, No.multiblock)(1024 * blockSize);
1442 
1443     // Since instantiated with No.multiblock, can only allocate at most the block size
1444     void[] buf = a.allocate(blockSize + 1);
1445     assert(buf is null);
1446 
1447     buf = a.allocate(blockSize);
1448     assert(buf.length == blockSize);
1449     assert(a.deallocate(buf));
1450 
1451     // This is also fine, because it's less than the block size
1452     buf = a.allocate(blockSize / 2);
1453     assert(buf.length == blockSize / 2);
1454 
1455     // Can expand the buffer until its length is at most 64
1456     assert(a.expand(buf, blockSize / 2));
1457     assert(buf.length == blockSize);
1458 
1459     // Cannot expand anymore
1460     assert(!a.expand(buf, 1));
1461     assert(a.deallocate(buf));
1462 }
1463 
1464 // Test instantiation with stateful allocators
1465 @system unittest
1466 {
1467     import std.experimental.allocator.mallocator : Mallocator;
1468     import std.experimental.allocator.building_blocks.region : Region;
1469     auto r = Region!Mallocator(1024 * 96);
1470     auto a = BitmappedBlock!(chooseAtRuntime, 8, Region!Mallocator*, No.multiblock)(&r, 1024 * 64, 1024);
1471 }
1472 
1473 /**
1474 The threadsafe version of the $(LREF BitmappedBlock).
1475 The semantics of the `SharedBitmappedBlock` are identical to the regular $(LREF BitmappedBlock).
1476 
1477 Params:
1478     theBlockSize = the length of a block, which must be a multiple of `theAlignment`
1479 
1480     theAlignment = alignment of each block
1481 
1482     ParentAllocator = allocator from which the `BitmappedBlock` will draw memory.
1483         If set to `NullAllocator`, the storage must be passed via the constructor
1484 
1485     f = `Yes.multiblock` to support allocations spanning across multiple blocks and
1486         `No.multiblock` to support single block allocations.
1487         Although limited by single block allocations, `No.multiblock` will generally
1488         provide higher performance.
1489 */
1490 shared struct SharedBitmappedBlock(size_t theBlockSize, uint theAlignment = platformAlignment,
1491    ParentAllocator = NullAllocator, Flag!"multiblock" f = Yes.multiblock)
1492 {
1493     version (StdDdoc)
1494     {
1495         /**
1496         Constructs a block allocator given a hunk of memory, or a desired capacity
1497         in bytes.
1498         $(UL
1499         $(LI If `ParentAllocator` is $(REF_ALTTEXT `NullAllocator`, NullAllocator, std,experimental,allocator,building_blocks,null_allocator),
1500         only the constructor taking `data` is defined and the user is responsible for freeing `data` if desired.)
1501         $(LI Otherwise, both constructors are defined. The `data`-based
1502         constructor assumes memory has been allocated with the parent allocator.
1503         The `capacity`-based constructor uses `ParentAllocator` to allocate
1504         an appropriate contiguous hunk of memory. Regardless of the constructor
1505         used, the destructor releases the memory by using `ParentAllocator.deallocate`.)
1506         )
1507         */
1508         this(ubyte[] data);
1509 
1510         /// Ditto
1511         this(ubyte[] data, uint blockSize);
1512 
1513         /// Ditto
1514         this(size_t capacity);
1515 
1516         /// Ditto
1517         this(ParentAllocator parent, size_t capacity);
1518 
1519         /// Ditto
1520         this(size_t capacity, uint blockSize);
1521 
1522         /// Ditto
1523         this(ParentAllocator parent, size_t capacity, uint blockSize);
1524 
1525         /**
1526         If `blockSize == chooseAtRuntime`, `SharedBitmappedBlock` offers a read/write
1527         property `blockSize`. It must be set before any use of the allocator.
1528         Otherwise (i.e. `theBlockSize` is a legit constant), `blockSize` is
1529         an alias for `theBlockSize`. Whether constant or variable, must also be
1530         a multiple of `alignment`. This constraint is `assert`ed statically
1531         and dynamically.
1532         */
1533         alias blockSize = theBlockSize;
1534 
1535         /**
1536         The _alignment offered is user-configurable statically through parameter
1537         `theAlignment`, defaulted to `platformAlignment`.
1538         */
1539         alias alignment = theAlignment;
1540 
1541         /**
1542         The _parent allocator. Depending on whether `ParentAllocator` holds state
1543         or not, this is a member variable or an alias for
1544         `ParentAllocator.instance`.
1545         */
1546         ParentAllocator parent;
1547 
1548         /**
1549         Returns the actual bytes allocated when `n` bytes are requested, i.e.
1550         `n.roundUpToMultipleOf(blockSize)`.
1551         */
1552         pure nothrow @safe @nogc
1553         size_t goodAllocSize(size_t n);
1554 
1555         /**
1556         Returns `Ternary.yes` if `b` belongs to the `SharedBitmappedBlock` object,
1557         `Ternary.no` otherwise. Never returns `Ternary.unkown`. (This
1558         method is somewhat tolerant in that accepts an interior slice.)
1559         */
1560         pure nothrow @trusted @nogc
1561         Ternary owns(const void[] b) const;
1562 
1563         /**
1564         Expands in place a buffer previously allocated by `SharedBitmappedBlock`.
1565         Expansion fails if the new length exceeds the block size.
1566         */
1567         bool expand(ref void[] b, immutable size_t delta);
1568 
1569         /**
1570         Deallocates the given buffer `b`, by atomically setting the corresponding
1571         bit to `0`. `b` must be valid, and cannot contain multiple adjacent `blocks`.
1572         */
1573         nothrow @nogc
1574         bool deallocate(void[] b);
1575 
1576         /**
1577         Allocates `s` bytes of memory and returns it, or `null` if memory
1578         could not be allocated.
1579 
1580         The `SharedBitmappedBlock` cannot allocate more than the given block size.
1581         Allocations are satisfied by searching the first unset bit in the bitmap,
1582         and atomically setting it.
1583         In rare memory pressure scenarios, the allocation could fail.
1584         */
1585         nothrow @trusted @nogc
1586         void[] allocate(const size_t s);
1587 
1588         /**
1589         Allocates s bytes of memory and returns it, or `null` if memory could not be allocated.
1590         `allocateFresh` behaves just like allocate, the only difference being that this always
1591         returns unused(fresh) memory. Although there may still be available space in the `SharedBitmappedBlock`,
1592         `allocateFresh` could still return null, because all the available blocks have been previously deallocated.
1593         */
1594         @trusted void[] allocateFresh(const size_t s);
1595 
1596         /**
1597         If the `SharedBitmappedBlock` object is empty (has no active allocation), allocates
1598         all memory within and returns a slice to it. Otherwise, returns `null`
1599         (i.e. no attempt is made to allocate the largest available block).
1600         */
1601         void[] allocateAll();
1602 
1603         /**
1604         Returns `Ternary.yes` if no memory is currently allocated with this
1605         allocator, otherwise `Ternary.no`. This method never returns
1606         `Ternary.unknown`.
1607         */
1608         nothrow @safe @nogc
1609         Ternary empty();
1610 
1611         /**
1612         Forcibly deallocates all memory allocated by this allocator, making it
1613         available for further allocations. Does not return memory to `ParentAllocator`.
1614         */
1615         nothrow @nogc
1616         bool deallocateAll();
1617 
1618         /**
1619         Reallocates a block previously allocated with `alignedAllocate`. Contractions do not occur in place.
1620         */
1621         @system bool alignedReallocate(ref void[] b, size_t newSize, uint a);
1622 
1623         /**
1624         Reallocates a previously-allocated block. Contractions occur in place.
1625         */
1626         @system bool reallocate(ref void[] b, size_t newSize);
1627 
1628         /**
1629         Allocates a block with specified alignment `a`. The alignment must be a
1630         power of 2. If `a <= alignment`, function forwards to `allocate`.
1631         Otherwise, it attempts to overallocate and then adjust the result for
1632         proper alignment. In the worst case the slack memory is around two blocks.
1633         */
1634         void[] alignedAllocate(size_t n, uint a);
1635 
1636         /**
1637         If `ParentAllocator` is not `NullAllocator` and defines `deallocate`,
1638         the destructor is defined to deallocate the block held.
1639         */
1640         ~this();
1641     }
1642     else
1643     {
1644         version (StdUnittest)
1645         @system unittest
1646         {
1647             import std.algorithm.comparison : max;
1648             import std.experimental.allocator.mallocator : AlignedMallocator;
1649             auto m = cast(ubyte[])(AlignedMallocator.instance.alignedAllocate(1024 * 64,
1650                                     max(theAlignment, cast(uint) size_t.sizeof)));
1651             scope(exit) () nothrow @nogc { AlignedMallocator.instance.deallocate(m); }();
1652             static if (theBlockSize == chooseAtRuntime)
1653             {
1654                 testAllocator!(() => SharedBitmappedBlock!(theBlockSize, theAlignment, NullAllocator)(m, 64));
1655             }
1656             else
1657             {
1658                 testAllocator!(() => SharedBitmappedBlock!(theBlockSize, theAlignment, NullAllocator)(m));
1659             }
1660         }
1661         mixin BitmappedBlockImpl!(true, f == Yes.multiblock);
1662     }
1663 }
1664 
1665 ///
1666 @system unittest
1667 {
1668     import std.experimental.allocator.mallocator : Mallocator;
1669     import std.experimental.allocator.common : platformAlignment;
1670     import std.typecons : Flag, Yes, No;
1671 
1672     // Create 'numThreads' threads, each allocating in parallel a chunk of memory
1673     static void testAlloc(Allocator)(ref Allocator a, size_t allocSize)
1674     {
1675         import core.thread : ThreadGroup;
1676         import std.algorithm.sorting : sort;
1677         import core.internal.spinlock : SpinLock;
1678 
1679         SpinLock lock = SpinLock(SpinLock.Contention.brief);
1680         enum numThreads = 10;
1681         void[][numThreads] buf;
1682         size_t count = 0;
1683 
1684         // Each threads allocates 'allocSize'
1685         void fun()
1686         {
1687             void[] b = a.allocate(allocSize);
1688             assert(b.length == allocSize);
1689 
1690             lock.lock();
1691             scope(exit) lock.unlock();
1692 
1693             buf[count] = b;
1694             count++;
1695         }
1696 
1697         auto tg = new ThreadGroup;
1698         foreach (i; 0 .. numThreads)
1699         {
1700             tg.create(&fun);
1701         }
1702         tg.joinAll();
1703 
1704         // Sorting the allocations made by each thread, we expect the buffers to be
1705         // adjacent inside the SharedBitmappedBlock
1706         sort!((a, b) => a.ptr < b.ptr)(buf[0 .. numThreads]);
1707         foreach (i; 0 .. numThreads - 1)
1708         {
1709             assert(buf[i].ptr + a.goodAllocSize(buf[i].length) <= buf[i + 1].ptr);
1710         }
1711 
1712         // Deallocate everything
1713         foreach (i; 0 .. numThreads)
1714         {
1715             assert(a.deallocate(buf[i]));
1716         }
1717     }
1718 
1719     enum blockSize = 64;
1720     auto alloc1 = SharedBitmappedBlock!(blockSize, platformAlignment, Mallocator, Yes.multiblock)(1024 * 1024);
1721     auto alloc2 = SharedBitmappedBlock!(blockSize, platformAlignment, Mallocator, No.multiblock)(1024 * 1024);
1722     testAlloc(alloc1, 2 * blockSize);
1723     testAlloc(alloc2, blockSize);
1724 }
1725 
1726 @system unittest
1727 {
1728     // Test chooseAtRuntime
1729     // Create a block allocator on top of a 10KB stack region.
1730     import std.experimental.allocator.building_blocks.region : InSituRegion;
1731     import std.traits : hasMember;
1732     InSituRegion!(10_240, 64) r;
1733     uint blockSize = 64;
1734     auto a = BitmappedBlock!(chooseAtRuntime, 64)(cast(ubyte[])(r.allocateAll()), blockSize);
1735     static assert(hasMember!(InSituRegion!(10_240, 64), "allocateAll"));
1736     const b = (() pure nothrow @safe @nogc => a.allocate(100))();
1737     assert(b.length == 100);
1738 }
1739 
1740 pure @safe unittest
1741 {
1742     import std.typecons : Ternary;
1743 
1744     auto a = (() @trusted => BitmappedBlock!(64, 64, NullAllocator, Yes.multiblock)(new ubyte[10_240]))();
1745     () nothrow @nogc {
1746         assert(a.empty == Ternary.yes);
1747         const b = a.allocate(100);
1748         assert(b.length == 100);
1749         assert(a.empty == Ternary.no);
1750     }();
1751 }
1752 
1753 @safe unittest
1754 {
1755     import std.typecons : Ternary;
1756 
1757     auto a = (() @trusted => SharedBitmappedBlock!(64, 64, NullAllocator, Yes.multiblock)(new ubyte[10_240]))();
1758     assert((() nothrow @safe @nogc => a.empty)() == Ternary.yes);
1759     const b = a.allocate(100);
1760     assert(b.length == 100);
1761 }
1762 
1763 version (StdUnittest)
1764 @system unittest
1765 {
1766     import std.experimental.allocator.gc_allocator : GCAllocator;
1767     testAllocator!(() => BitmappedBlock!(64, 8, GCAllocator)(1024 * 64));
1768 }
1769 
1770 version (StdUnittest)
1771 @system unittest
1772 {
1773     // Test chooseAtRuntime
1774     import std.experimental.allocator.gc_allocator : GCAllocator;
1775     uint blockSize = 64;
1776     testAllocator!(() => BitmappedBlock!(chooseAtRuntime, 8, GCAllocator, Yes.multiblock)(1024 * 64, blockSize));
1777     testAllocator!(() => BitmappedBlock!(chooseAtRuntime, 8, GCAllocator, No.multiblock)(1024 * 64, blockSize));
1778 }
1779 
1780 version (StdUnittest)
1781 @system unittest
1782 {
1783     import std.experimental.allocator.mallocator : Mallocator;
1784     testAllocator!(() => SharedBitmappedBlock!(64, 8, Mallocator, Yes.multiblock)(1024 * 64));
1785     testAllocator!(() => SharedBitmappedBlock!(64, 8, Mallocator, No.multiblock)(1024 * 64));
1786 }
1787 
1788 version (StdUnittest)
1789 @system unittest
1790 {
1791     // Test chooseAtRuntime
1792     import std.experimental.allocator.mallocator : Mallocator;
1793     uint blockSize = 64;
1794     testAllocator!(() => SharedBitmappedBlock!(chooseAtRuntime, 8, Mallocator, Yes.multiblock)(1024 * 64, blockSize));
1795     testAllocator!(() => SharedBitmappedBlock!(chooseAtRuntime, 8, Mallocator, No.multiblock)(1024 * 64, blockSize));
1796 }
1797 
1798 @system unittest
1799 {
1800     static void testAllocateAll(size_t bs, bool isShared = true)(size_t blocks, uint blocksAtATime)
1801     {
1802         template attribAllocate(string size)
1803         {
1804             static if (isShared)
1805             {
1806                 const char[] attribAllocate = "(() nothrow @safe @nogc => a.allocate(" ~ size ~ "))()";
1807             }
1808             else
1809             {
1810                 const char[] attribAllocate = "(() pure nothrow @safe @nogc => a.allocate(" ~ size ~ "))()";
1811             }
1812         }
1813 
1814         assert(bs);
1815         import std.typecons : Ternary;
1816         import std.algorithm.comparison : min;
1817         import std.experimental.allocator.gc_allocator : GCAllocator;
1818 
1819         static if (isShared)
1820         {
1821             auto a = SharedBitmappedBlock!(bs, min(bs, platformAlignment), NullAllocator)(
1822                 cast(ubyte[])(GCAllocator.instance.allocate((blocks * bs * 8 + blocks) / 8)));
1823         }
1824         else
1825         {
1826             auto a = BitmappedBlock!(bs, min(bs, platformAlignment), NullAllocator)(
1827                 cast(ubyte[])(GCAllocator.instance.allocate((blocks * bs * 8 + blocks) / 8)));
1828         }
1829 
1830         import std.conv : text;
1831         assert(blocks >= a._blocks, text(blocks, " < ", a._blocks));
1832         blocks = a._blocks;
1833 
1834         // test allocation of 0 bytes
1835         auto x = mixin(attribAllocate!("0"));
1836         assert(x is null);
1837         // test allocation of 1 byte
1838         x = mixin(attribAllocate!("1"));
1839         assert(x.length == 1 || blocks == 0);
1840         assert((() nothrow @nogc => a.deallocateAll())());
1841         assert(a.empty() == Ternary.yes);
1842         bool twice = true;
1843 
1844     begin:
1845         foreach (i; 0 .. blocks / blocksAtATime)
1846         {
1847             auto b = mixin(attribAllocate!("bs * blocksAtATime"));
1848             assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
1849         }
1850 
1851         assert(mixin(attribAllocate!("bs * blocksAtATime")) is null);
1852         if (a._blocks % blocksAtATime == 0)
1853         {
1854             assert(mixin(attribAllocate!("1")) is null);
1855         }
1856 
1857         // Now deallocate all and do it again!
1858         assert((() nothrow @nogc => a.deallocateAll())());
1859 
1860         // Test deallocation
1861 
1862         auto v = new void[][blocks / blocksAtATime];
1863         foreach (i; 0 .. blocks / blocksAtATime)
1864         {
1865             auto b = mixin(attribAllocate!("bs * blocksAtATime"));
1866             assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
1867             v[i] = b;
1868         }
1869         assert(mixin(attribAllocate!("bs * blocksAtATime")) is null);
1870         if (a._blocks % blocksAtATime == 0)
1871         {
1872             assert(mixin(attribAllocate!("1")) is null);
1873         }
1874 
1875         foreach (i; 0 .. blocks / blocksAtATime)
1876         {
1877             () nothrow @nogc { a.deallocate(v[i]); }();
1878         }
1879 
1880         foreach (i; 0 .. blocks / blocksAtATime)
1881         {
1882             auto b = mixin(attribAllocate!("bs * blocksAtATime"));
1883             assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
1884             v[i] = b;
1885         }
1886 
1887         foreach (i; 0 .. v.length)
1888         {
1889             () nothrow @nogc { a.deallocate(v[i]); }();
1890         }
1891 
1892         if (twice)
1893         {
1894             twice = false;
1895             goto begin;
1896         }
1897 
1898         assert((() nothrow @nogc => a.deallocateAll())());
1899 
1900         // test expansion
1901         if (blocks >= blocksAtATime)
1902         {
1903             foreach (i; 0 .. blocks / blocksAtATime - 1)
1904             {
1905                 auto b = mixin(attribAllocate!("bs * blocksAtATime"));
1906                 assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
1907                 (cast(ubyte[]) b)[] = 0xff;
1908                 static if (isShared)
1909                 {
1910                     assert((() nothrow @safe @nogc => a.expand(b, blocksAtATime * bs))()
1911                             , text(i));
1912                 }
1913                 else
1914                 {
1915                     assert((() pure nothrow @safe @nogc => a.expand(b, blocksAtATime * bs))()
1916                             , text(i));
1917                 }
1918                 (cast(ubyte[]) b)[] = 0xfe;
1919                 assert(b.length == bs * blocksAtATime * 2, text(i, ": ", b.length));
1920                 a.reallocate(b, blocksAtATime * bs) || assert(0);
1921                 assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
1922             }
1923         }
1924     }
1925 
1926     testAllocateAll!(1)(0, 1);
1927     testAllocateAll!(1, false)(0, 1);
1928     testAllocateAll!(1)(8, 1);
1929     testAllocateAll!(1, false)(8, 1);
1930 
1931     testAllocateAll!(4096)(128, 1);
1932     testAllocateAll!(4096, false)(128, 1);
1933 
1934     testAllocateAll!(1)(0, 2);
1935     testAllocateAll!(1)(128, 2);
1936     testAllocateAll!(4096)(128, 2);
1937 
1938     testAllocateAll!(1, false)(0, 2);
1939     testAllocateAll!(1, false)(128, 2);
1940     testAllocateAll!(4096, false)(128, 2);
1941 
1942     testAllocateAll!(1)(0, 4);
1943     testAllocateAll!(1)(128, 4);
1944     testAllocateAll!(4096)(128, 4);
1945 
1946     testAllocateAll!(1, false)(0, 4);
1947     testAllocateAll!(1, false)(128, 4);
1948     testAllocateAll!(4096, false)(128, 4);
1949 
1950     testAllocateAll!(1)(0, 3);
1951     testAllocateAll!(1)(24, 3);
1952     testAllocateAll!(3008)(100, 1);
1953     testAllocateAll!(3008)(100, 3);
1954 
1955     testAllocateAll!(1, false)(0, 3);
1956     testAllocateAll!(1, false)(24, 3);
1957     testAllocateAll!(3008, false)(100, 1);
1958     testAllocateAll!(3008, false)(100, 3);
1959 
1960     testAllocateAll!(1)(0, 128);
1961     testAllocateAll!(1)(128 * 1, 128);
1962     testAllocateAll!(128 * 20)(13 * 128, 128);
1963 
1964     testAllocateAll!(1, false)(0, 128);
1965     testAllocateAll!(1, false)(128 * 1, 128);
1966     testAllocateAll!(128 * 20, false)(13 * 128, 128);
1967 }
1968 
1969 @system unittest
1970 {
1971     import std.experimental.allocator.mallocator : Mallocator;
1972 
1973     enum blocks = 10000;
1974     int count = 0;
1975 
1976     ubyte[] payload = cast(ubyte[]) Mallocator.instance.allocate(blocks * 16);
1977     auto a = BitmappedBlock!(16, 16)(payload);
1978     void[][] buf = cast(void[][]) Mallocator.instance.allocate((void[]).sizeof * blocks);
1979 
1980     assert(!a.allocateFresh(0));
1981     assert(!a._control[0]);
1982 
1983     void[] b = a.allocate(256 * 16);
1984     assert(b.length == 256 * 16);
1985     count += 256;
1986 
1987     assert(!a._control[count]);
1988     b = a.allocateFresh(16);
1989     assert(b.length == 16);
1990     count++;
1991     assert(a._control[count - 1]);
1992 
1993     b = a.allocateFresh(16 * 300);
1994     assert(b.length == 16 * 300);
1995     count += 300;
1996 
1997     for (int i = 0; i < count; i++)
1998         assert(a._control[i]);
1999     assert(!a._control[count]);
2000 
2001     assert(a.expand(b, 313 * 16));
2002     count += 313;
2003 
2004     for (int i = 0; i < count; i++)
2005         assert(a._control[i]);
2006     assert(!a._control[count]);
2007 
2008     b = a.allocate(64 * 16);
2009     assert(b.length == 64 * 16);
2010     count += 64;
2011 
2012     b = a.allocateFresh(16);
2013     assert(b.length == 16);
2014     count++;
2015 
2016     for (int i = 0; i < count; i++)
2017         assert(a._control[i]);
2018     assert(!a._control[count]);
2019 
2020     assert(a.deallocateAll());
2021     for (int i = 0; i < a._blocks; i++)
2022         assert(!a._control[i]);
2023 
2024     b = a.allocateFresh(257 * 16);
2025     assert(b.length == 257 * 16);
2026     for (int i = 0; i < count; i++)
2027         assert(!a._control[i]);
2028     for (int i = count; i < count + 257; i++)
2029         assert(a._control[i]);
2030     count += 257;
2031     assert(!a._control[count]);
2032 
2033     while (true)
2034     {
2035         b = a.allocate(16);
2036         if (!b)
2037             break;
2038         assert(b.length == 16);
2039     }
2040 
2041     assert(!a.allocateFresh(16));
2042     assert(a.deallocateAll());
2043 
2044     assert(a.allocate(16).length == 16);
2045     assert(!a.allocateFresh(16));
2046 }
2047 
2048 
2049 @system unittest
2050 {
2051     import std.experimental.allocator.mallocator : Mallocator;
2052     import std.random;
2053 
2054     static void testAlloc(Allocator)()
2055     {
2056         auto numBlocks = [1, 64, 256];
2057         enum blocks = 10000;
2058         int iter = 0;
2059 
2060         ubyte[] payload = cast(ubyte[]) Mallocator.instance.allocate(blocks * 16);
2061         auto a = Allocator(payload);
2062         void[][] buf = cast(void[][]) Mallocator.instance.allocate((void[]).sizeof * blocks);
2063 
2064         auto rnd = Random();
2065         while (iter < blocks)
2066         {
2067             int event = uniform(0, 2, rnd);
2068             int doExpand = uniform(0, 2, rnd);
2069             int allocSize = numBlocks[uniform(0, 3, rnd)] * 16;
2070             int expandSize = numBlocks[uniform(0, 3, rnd)] * 16;
2071             int doDeallocate = uniform(0, 2, rnd);
2072 
2073             if (event) buf[iter] = a.allocate(allocSize);
2074             else buf[iter] = a.allocateFresh(allocSize);
2075 
2076             if (!buf[iter])
2077                 break;
2078             assert(buf[iter].length == allocSize);
2079 
2080             auto oldSize = buf[iter].length;
2081             if (doExpand && a.expand(buf[iter], expandSize))
2082                 assert(buf[iter].length == expandSize + oldSize);
2083 
2084             if (doDeallocate)
2085             {
2086                 assert(a.deallocate(buf[iter]));
2087                 buf[iter] = null;
2088             }
2089 
2090             iter++;
2091         }
2092 
2093         while (iter < blocks)
2094         {
2095             buf[iter++] = a.allocate(16);
2096             if (!buf[iter - 1])
2097                 break;
2098             assert(buf[iter - 1].length == 16);
2099         }
2100 
2101         for (size_t i = 0; i < a._blocks; i++)
2102             assert((cast(BitVector) a._control)[i]);
2103 
2104         assert(!a.allocate(16));
2105         for (size_t i = 0; i < iter; i++)
2106         {
2107             if (buf[i])
2108                 assert(a.deallocate(buf[i]));
2109         }
2110 
2111         for (size_t i = 0; i < a._blocks; i++)
2112             assert(!(cast(BitVector) a._control)[i]);
2113     }
2114 
2115     testAlloc!(BitmappedBlock!(16, 16))();
2116     testAlloc!(SharedBitmappedBlock!(16, 16))();
2117 }
2118 
2119 // Test totalAllocation and goodAllocSize
2120 nothrow @safe @nogc unittest
2121 {
2122     BitmappedBlock!(8, 8, NullAllocator) h1;
2123     assert(h1.goodAllocSize(1) == 8);
2124     assert(h1.totalAllocation(1) >= 8);
2125     assert(h1.totalAllocation(64) >= 64);
2126     assert(h1.totalAllocation(8 * 64) >= 8 * 64);
2127     assert(h1.totalAllocation(8 * 63) >= 8 * 63);
2128     assert(h1.totalAllocation(8 * 64 + 1) >= 8 * 65);
2129 
2130     BitmappedBlock!(64, 8, NullAllocator) h2;
2131     assert(h2.goodAllocSize(1) == 64);
2132     assert(h2.totalAllocation(1) >= 64);
2133     assert(h2.totalAllocation(64 * 64) >= 64 * 64);
2134 
2135     BitmappedBlock!(4096, 4096, NullAllocator) h3;
2136     assert(h3.goodAllocSize(1) == 4096);
2137     assert(h3.totalAllocation(1) >= 4096);
2138     assert(h3.totalAllocation(64 * 4096) >= 64 * 4096);
2139     assert(h3.totalAllocation(64 * 4096 + 1) >= 65 * 4096);
2140 }
2141 
2142 // Test owns
2143 @system unittest
2144 {
2145     import std.experimental.allocator.gc_allocator : GCAllocator;
2146     import std.typecons : Ternary;
2147 
2148     auto a = BitmappedBlock!(64, 8, GCAllocator)(1024 * 64);
2149     const void[] buff = (() pure nothrow @safe @nogc => a.allocate(42))();
2150 
2151     assert((() nothrow @safe @nogc => a.owns(buff))() == Ternary.yes);
2152     assert((() nothrow @safe @nogc => a.owns(null))() == Ternary.no);
2153 }
2154 
2155 // BitmappedBlockWithInternalPointers
2156 /**
2157 
2158 A `BitmappedBlock` with additional structure for supporting `resolveInternalPointer`.
2159 To that end, `BitmappedBlockWithInternalPointers` adds a
2160 bitmap (one bit per block) that marks object starts. The bitmap itself has
2161 variable size and is allocated together with regular allocations.
2162 
2163 The time complexity of `resolveInternalPointer` is $(BIGOH k), where `k`
2164 is the size of the object within which the internal pointer is looked up.
2165 
2166 */
2167 struct BitmappedBlockWithInternalPointers(
2168     size_t theBlockSize, uint theAlignment = platformAlignment,
2169     ParentAllocator = NullAllocator)
2170 {
2171     import std.conv : text;
2172     import std.typecons : Ternary;
2173 
2174     static if (!stateSize!ParentAllocator)
2175     version (StdUnittest)
2176     @system unittest
2177     {
2178         import std.experimental.allocator.mallocator : AlignedMallocator;
2179         auto m = cast(ubyte[])(AlignedMallocator.instance.alignedAllocate(1024 * 64,
2180             theAlignment));
2181         scope(exit) () nothrow @nogc { AlignedMallocator.instance.deallocate(m); }();
2182         testAllocator!(() => BitmappedBlockWithInternalPointers(m));
2183     }
2184 
2185     // state {
2186     private BitmappedBlock!(theBlockSize, theAlignment, ParentAllocator) _heap;
2187     private BitVector _allocStart;
2188     // }
2189 
2190     /**
2191     Constructors accepting desired capacity or a preallocated buffer, similar
2192     in semantics to those of `BitmappedBlock`.
2193     */
2194     static if (!stateSize!ParentAllocator)
2195     this(ubyte[] data)
2196     {
2197         _heap = BitmappedBlock!(theBlockSize, theAlignment, ParentAllocator)(data);
2198     }
2199 
2200     static if (stateSize!ParentAllocator)
2201     this(ParentAllocator parent, ubyte[] data)
2202     {
2203         _heap = BitmappedBlock!(theBlockSize, theAlignment, ParentAllocator)(data);
2204         _heap.parent = parent;
2205     }
2206 
2207     /// Ditto
2208     static if (!is(ParentAllocator == NullAllocator) && !stateSize!ParentAllocator)
2209     this(size_t capacity)
2210     {
2211         // Add room for the _allocStart vector
2212         _heap = BitmappedBlock!(theBlockSize, theAlignment, ParentAllocator)
2213             (capacity + capacity.divideRoundUp(64));
2214     }
2215 
2216     /// Ditto
2217     static if (!is(ParentAllocator == NullAllocator) && stateSize!ParentAllocator)
2218     this(ParentAllocator parent, size_t capacity)
2219     {
2220         // Add room for the _allocStart vector
2221         _heap = BitmappedBlock!(theBlockSize, theAlignment, ParentAllocator)
2222             (parent, capacity + capacity.divideRoundUp(64));
2223     }
2224 
2225     // Makes sure there's enough room for _allocStart
2226     @safe
2227     private bool ensureRoomForAllocStart(size_t len)
2228     {
2229         if (_allocStart.length >= len) return true;
2230         // Must ensure there's room
2231         immutable oldLength = _allocStart.rep.length;
2232         immutable bits = len.roundUpToMultipleOf(64);
2233         void[] b = _allocStart.rep;
2234         if ((() @trusted => !_heap.reallocate(b, bits / 8))()) return false;
2235         assert(b.length * 8 == bits);
2236         _allocStart = BitVector((() @trusted => cast(ulong[]) b)());
2237         assert(_allocStart.rep.length * 64 == bits);
2238         _allocStart.rep[oldLength .. $] = ulong.max;
2239         return true;
2240     }
2241 
2242     /**
2243     Allocator primitives.
2244     */
2245     alias alignment = theAlignment;
2246 
2247     /// Ditto
2248     pure nothrow @safe @nogc
2249     size_t goodAllocSize(size_t n)
2250     {
2251         return n.roundUpToMultipleOf(_heap.blockSize);
2252     }
2253 
2254     /// Ditto
2255     void[] allocate(size_t bytes)
2256     {
2257         auto r = _heap.allocate(bytes);
2258         if (!r.ptr) return r;
2259         immutable block = (() @trusted => (r.ptr - _heap._payload.ptr) / _heap.blockSize)();
2260         immutable blocks =
2261             (r.length + _heap.blockSize - 1) / _heap.blockSize;
2262         if (!ensureRoomForAllocStart(block + blocks))
2263         {
2264             // Failed, free r and bailout
2265             () @trusted { _heap.deallocate(r); r = null; }();
2266             return null;
2267         }
2268         assert(block < _allocStart.length);
2269         assert(block + blocks <= _allocStart.length);
2270         // Mark the _allocStart bits
2271         assert(blocks > 0);
2272         _allocStart[block] = 1;
2273         _allocStart[block + 1 .. block + blocks] = 0;
2274         assert(block + blocks == _allocStart.length
2275             || _allocStart[block + blocks] == 1);
2276         return r;
2277     }
2278 
2279     /// Ditto
2280     void[] allocateAll()
2281     {
2282         auto r = _heap.allocateAll();
2283         if (!r.ptr) return r;
2284         // Carve space at the end for _allocStart
2285         auto p = alignDownTo(r.ptr + r.length - 8, ulong.alignof);
2286         r = r[0 .. p - r.ptr];
2287         // Initialize _allocStart
2288         _allocStart = BitVector(cast(ulong[]) p[0 .. 8]);
2289         _allocStart[] = 0;
2290         immutable block = (r.ptr - _heap._payload.ptr) / _heap.blockSize;
2291         assert(block < _allocStart.length);
2292         _allocStart[block] = 1;
2293         return r;
2294     }
2295 
2296     /// Ditto
2297     bool expand(ref void[] b, size_t bytes)
2298     {
2299         if (!bytes) return true;
2300         if (b is null) return false;
2301         immutable oldBlocks =
2302             (b.length + _heap.blockSize - 1) / _heap.blockSize;
2303         assert(oldBlocks);
2304         immutable newBlocks =
2305             (b.length + bytes + _heap.blockSize - 1) / _heap.blockSize;
2306         assert(newBlocks >= oldBlocks);
2307         immutable block = (() @trusted => (b.ptr - _heap._payload.ptr) / _heap.blockSize)();
2308         assert(_allocStart[block]);
2309         if (!ensureRoomForAllocStart(block + newBlocks)
2310                 || !_heap.expand(b, bytes))
2311         {
2312             return false;
2313         }
2314         // Zero only the expanded bits
2315         _allocStart[block + oldBlocks .. block + newBlocks] = 0;
2316         assert(_allocStart[block]);
2317         return true;
2318     }
2319 
2320     /// Ditto
2321     bool deallocate(void[] b)
2322     {
2323         // No need to touch _allocStart here - except for the first bit, it's
2324         // meaningless in freed memory. The first bit is already 1.
2325         return _heap.deallocate(b);
2326         // TODO: one smart thing to do is reduce memory occupied by
2327         // _allocStart if we're freeing the rightmost block.
2328     }
2329 
2330     /// Ditto
2331     nothrow @safe @nogc
2332     Ternary resolveInternalPointer(const void* p, ref void[] result)
2333     {
2334         if ((() @trusted => _heap._payload
2335                     && (p < &_heap._payload[0]
2336                         || p >= &_heap._payload[0] + _heap._payload.length))())
2337         {
2338             return Ternary.no;
2339         }
2340         // Find block start
2341         auto block = (() @trusted => (p - &_heap._payload[0]) / _heap.blockSize)();
2342         if (block >= _allocStart.length) return Ternary.no;
2343         // Within an allocation, must find the 1 just to the left of it
2344         auto i = _allocStart.find1Backward(block);
2345         if (i == i.max) return Ternary.no;
2346         auto j = _allocStart.find1(i + 1);
2347         result = (() @trusted => _heap._payload.ptr[cast(size_t) (_heap.blockSize * i)
2348                                                     .. cast(size_t) (_heap.blockSize * j)])();
2349         return Ternary.yes;
2350     }
2351 
2352     /// Ditto
2353     Ternary empty()
2354     {
2355         return _heap.empty;
2356     }
2357 
2358     // Currently unused
2359     private void markAllAsUnused()
2360     {
2361         // Mark all deallocated memory with 1 so we minimize damage created by
2362         // false pointers. TODO: improve speed.
2363         foreach (i, ref e; _allocStart.rep)
2364         {
2365             // Set to 1 all bits in _allocStart[i] that were 0 in control, and
2366             // leave the others unchanged.
2367             // (0, 0) => 1; (0, 1) => 0; (1, 0) => 1; (1, 1) => 1
2368             e |= ~_heap._control.rep[i];
2369         }
2370         // Now zero all control bits
2371         _heap._control[] = 0;
2372         // EXCEPT for the _allocStart block itself
2373         markAsUsed(_allocStart.rep);
2374     }
2375 
2376     // Currently unused
2377     private bool markAsUsed(void[] b)
2378     {
2379         // Locate position
2380         immutable pos = b.ptr - _heap._payload.ptr;
2381         assert(pos % _heap.blockSize == 0);
2382         auto blockIdx = pos / _heap.blockSize;
2383         if (_heap._control[blockIdx]) return false;
2384         // Round up size to multiple of block size
2385         auto blocks = b.length.divideRoundUp(_heap.blockSize);
2386         _heap._control[blockIdx .. blockIdx + blocks] = 1;
2387         return true;
2388     }
2389 
2390     // Currently unused
2391     private void doneMarking()
2392     {
2393         // Nothing to do, what's free stays free.
2394     }
2395 }
2396 
2397 @system unittest
2398 {
2399     import std.typecons : Ternary;
2400 
2401     auto h = BitmappedBlockWithInternalPointers!(4096)(new ubyte[4096 * 1024]);
2402     assert((() nothrow @safe @nogc => h.empty)() == Ternary.yes);
2403     auto b = (() pure nothrow @safe @nogc => h.allocate(123))();
2404     assert(b.length == 123);
2405     assert((() nothrow @safe @nogc => h.empty)() == Ternary.no);
2406 
2407     void[] p;
2408     void* offset = &b[0] + 17;
2409     assert((() nothrow @safe @nogc => h.resolveInternalPointer(offset, p))() == Ternary.yes);
2410     assert(p.ptr is b.ptr);
2411     assert(p.length >= b.length);
2412     b = (() pure nothrow @safe @nogc => h.allocate(4096))();
2413 
2414     offset = &b[0];
2415     assert((() nothrow @safe @nogc => h.resolveInternalPointer(offset, p))() == Ternary.yes);
2416     assert(p is b);
2417 
2418     offset = &b[0] + 11;
2419     assert((() nothrow @safe @nogc => h.resolveInternalPointer(offset, p))() == Ternary.yes);
2420     assert(p is b);
2421 
2422     void[] unchanged = p;
2423     offset = &b[0] - 40_970;
2424     assert((() nothrow @safe @nogc => h.resolveInternalPointer(offset, p))() == Ternary.no);
2425     assert(p is unchanged);
2426 
2427     assert((() @safe => h.expand(b, 1))());
2428     assert(b.length == 4097);
2429     offset = &b[0] + 4096;
2430     assert((() nothrow @safe @nogc => h.resolveInternalPointer(offset, p))() == Ternary.yes);
2431     assert(p.ptr is b.ptr);
2432 
2433     // Ensure deallocate inherits from parent
2434     () nothrow @nogc { h.deallocate(b); }();
2435 }
2436 
2437 @system unittest
2438 {
2439     auto h = BitmappedBlockWithInternalPointers!(4096)(new ubyte[4096 * 1024]);
2440     assert((() pure nothrow @safe @nogc => h.goodAllocSize(1))() == 4096);
2441 }
2442 
2443 // Test instantiation with stateful allocators
2444 @system unittest
2445 {
2446     import std.experimental.allocator.mallocator : Mallocator;
2447     import std.experimental.allocator.building_blocks.region : Region;
2448     auto r = Region!Mallocator(1024 * 1024);
2449     auto h = BitmappedBlockWithInternalPointers!(4096, 8, Region!Mallocator*)(&r, 4096 * 1024);
2450 }
2451 
2452 /**
2453 Returns the number of most significant ones before a zero can be found in `x`.
2454 If `x` contains no zeros (i.e. is equal to `ulong.max`), returns 64.
2455 */
2456 pure nothrow @safe @nogc
2457 private uint leadingOnes(ulong x)
2458 {
2459     import core.bitop : bsr;
2460     const x_ = ~x;
2461     return x_ == 0 ? 64 : (63 - bsr(x_));
2462 }
2463 
2464 @safe unittest
2465 {
2466     assert(leadingOnes(0) == 0);
2467     assert(leadingOnes(~0UL) == 64);
2468     assert(leadingOnes(0xF000_0000_0000_0000) == 4);
2469     assert(leadingOnes(0xE400_0000_0000_0000) == 3);
2470     assert(leadingOnes(0xC700_0200_0000_0000) == 2);
2471     assert(leadingOnes(0x8000_0030_0000_0000) == 1);
2472     assert(leadingOnes(0x2000_0000_0000_0000) == 0);
2473 }
2474 
2475 /**
2476 Finds a run of contiguous ones in `x` of length at least `n`.
2477 */
2478 pure nothrow @safe @nogc
2479 private uint findContigOnes(ulong x, uint n)
2480 {
2481     while (n > 1)
2482     {
2483         immutable s = n >> 1;
2484         x &= x << s;
2485         n -= s;
2486     }
2487     return leadingOnes(~x);
2488 }
2489 
2490 @safe unittest
2491 {
2492     assert(findContigOnes(0x0000_0000_0000_0300, 2) == 54);
2493 
2494     assert(findContigOnes(~0UL, 1) == 0);
2495     assert(findContigOnes(~0UL, 2) == 0);
2496     assert(findContigOnes(~0UL, 32) == 0);
2497     assert(findContigOnes(~0UL, 64) == 0);
2498     assert(findContigOnes(0UL, 1) == 64);
2499 
2500     assert(findContigOnes(0x4000_0000_0000_0000, 1) == 1);
2501     assert(findContigOnes(0x0000_0F00_0000_0000, 4) == 20);
2502 }
2503 
2504 /*
2505 Unconditionally sets the bits from lsb through msb in w to zero.
2506 */
2507 pure nothrow @safe @nogc
2508 private void setBits(ref ulong w, uint lsb, uint msb)
2509 {
2510     assert(lsb <= msb && msb < 64);
2511     const mask = (ulong.max << lsb) & (ulong.max >> (63 - msb));
2512     w |= mask;
2513 }
2514 
2515 @safe unittest
2516 {
2517     ulong w;
2518     w = 0; setBits(w, 0, 63); assert(w == ulong.max);
2519     w = 0; setBits(w, 1, 63); assert(w == ulong.max - 1);
2520     w = 6; setBits(w, 0, 1); assert(w == 7);
2521     w = 6; setBits(w, 3, 3); assert(w == 14);
2522 }
2523 
2524 /* Are bits from lsb through msb in w zero? If so, make then 1
2525 and return the resulting w. Otherwise, just return 0.
2526 */
2527 pure nothrow @safe @nogc
2528 private bool setBitsIfZero(ref ulong w, uint lsb, uint msb)
2529 {
2530     assert(lsb <= msb && msb < 64);
2531     const mask = (ulong.max << lsb) & (ulong.max >> (63 - msb));
2532     if (w & mask) return false;
2533     w |= mask;
2534     return true;
2535 }
2536 
2537 // Assigns bits in w from lsb through msb to zero.
2538 pure nothrow @safe @nogc
2539 private void resetBits(ref ulong w, uint lsb, uint msb)
2540 {
2541     assert(lsb <= msb && msb < 64);
2542     const mask = (ulong.max << lsb) & (ulong.max >> (63 - msb));
2543     w &= ~mask;
2544 }
2545 
2546 /*
2547 Bit disposition is MSB=0 (leftmost, big endian).
2548 */
2549 private struct BitVector
2550 {
2551     ulong[] _rep;
2552 
2553     auto rep(this _)() { return _rep; }
2554 
2555     pure nothrow @safe @nogc
2556     this(ulong[] data) { _rep = data; }
2557 
2558     pure nothrow @safe @nogc
2559     void opSliceAssign(bool b) { _rep[] = b ? ulong.max : 0; }
2560 
2561     pure nothrow @safe @nogc
2562     void opSliceAssign(bool b, ulong x, ulong y)
2563     {
2564         assert(x <= y && y <= _rep.length * 64);
2565         if (x == y) return;
2566         --y;
2567         assert(x / 64 <= size_t.max);
2568         immutable i1 = cast(size_t) (x / 64);
2569         immutable uint b1 = 63 - x % 64;
2570         assert(y / 64 <= size_t.max);
2571         immutable i2 = cast(size_t) (y / 64);
2572         immutable uint b2 = 63 - y % 64;
2573         assert(i1 <= i2 && i2 < _rep.length);
2574         if (i1 == i2)
2575         {
2576             // Inside the same word
2577             assert(b1 >= b2);
2578             if (b) setBits(_rep[i1], b2, b1);
2579             else resetBits(_rep[i1], b2, b1);
2580         }
2581         else
2582         {
2583             // Spans multiple words
2584             assert(i1 < i2);
2585             if (b) setBits(_rep[i1], 0, b1);
2586             else resetBits(_rep[i1], 0, b1);
2587             _rep[i1 + 1 .. i2] = (b ? ulong.max : 0);
2588             if (b) setBits(_rep[i2], b2, 63);
2589             else resetBits(_rep[i2], b2, 63);
2590         }
2591     }
2592 
2593     pure nothrow @safe @nogc
2594     bool opIndex(ulong x)
2595     {
2596         assert(x < length);
2597         return (_rep[cast(size_t) (x / 64)]
2598             & (0x8000_0000_0000_0000UL >> (x % 64))) != 0;
2599     }
2600 
2601     pure nothrow @safe @nogc
2602     void opIndexAssign(bool b, ulong x)
2603     {
2604         assert(x / 64 <= size_t.max);
2605         immutable i = cast(size_t) (x / 64);
2606         immutable j = 0x8000_0000_0000_0000UL >> (x % 64);
2607         if (b) _rep[i] |= j;
2608         else _rep[i] &= ~j;
2609     }
2610 
2611     pure nothrow @safe @nogc
2612     ulong length() const
2613     {
2614         return _rep.length * 64;
2615     }
2616 
2617     /* Returns the index of the first 1 to the right of i (including i itself),
2618     or length if not found.
2619     */
2620     pure nothrow @safe @nogc
2621     ulong find1(ulong i)
2622     {
2623         assert(i < length);
2624         assert(i / 64 <= size_t.max);
2625         auto w = cast(size_t) (i / 64);
2626         immutable b = i % 64; // 0 through 63, 0 when i == 0
2627         immutable mask = ulong.max >> b;
2628         if (auto current = _rep[w] & mask)
2629         {
2630             // Great, found
2631             return w * 64 + leadingOnes(~current);
2632         }
2633         // The current word doesn't have the solution, find the leftmost 1
2634         // going to the right.
2635         for (++w; w < _rep.length; ++w)
2636         {
2637             if (auto current = _rep[w])
2638             {
2639                 return w * 64 + leadingOnes(~current);
2640             }
2641         }
2642         return length;
2643     }
2644 
2645     /* Returns the index of the first 1 to the left of i (including i itself),
2646     or ulong.max if not found.
2647     */
2648     pure nothrow @safe @nogc
2649     ulong find1Backward(ulong i)
2650     {
2651         assert(i < length);
2652         auto w = cast(size_t) (i / 64);
2653         immutable b = 63 - (i % 64); // 0 through 63, 63 when i == 0
2654         immutable mask = ~((1UL << b) - 1);
2655         assert(mask != 0);
2656         // First, let's see if the current word has a bit larger than ours.
2657         if (auto currentWord = _rep[w] & mask)
2658         {
2659             // Great, this word contains the result.
2660             return w * 64 + 63 - currentWord.trailingZeros;
2661         }
2662         // The current word doesn't have the solution, find the rightmost 1
2663         // going to the left.
2664         while (w >= 1)
2665         {
2666             --w;
2667             if (auto currentWord = _rep[w])
2668                 return w * 64 + (63 - currentWord.trailingZeros);
2669         }
2670         return ulong.max;
2671     }
2672 
2673     /// Are all bits zero?
2674     pure nothrow @safe @nogc
2675     bool allAre0() const
2676     {
2677         foreach (w; _rep) if (w) return false;
2678         return true;
2679     }
2680 
2681     /// Are all bits one?
2682     pure nothrow @safe @nogc
2683     bool allAre1() const
2684     {
2685         foreach (w; _rep) if (w != ulong.max) return false;
2686         return true;
2687     }
2688 
2689     pure nothrow @safe @nogc
2690     ulong findZeros(immutable size_t howMany, ulong start)
2691     {
2692         assert(start < length);
2693         assert(howMany > 64);
2694         auto i = cast(size_t) (start / 64);
2695         while (_rep[i] & 1)
2696         {
2697             // No trailing zeros in this word, try the next one
2698             if (++i == _rep.length) return ulong.max;
2699             start = i * 64;
2700         }
2701         // Adjust start to have only trailing zeros after it
2702         auto prefixLength = 64;
2703         while (_rep[i] & (ulong.max >> (64 - prefixLength)))
2704         {
2705             assert(prefixLength > 0);
2706             --prefixLength;
2707             ++start;
2708         }
2709 
2710         assert(howMany > prefixLength);
2711         auto needed = howMany - prefixLength;
2712         for (++i; needed >= 64; needed -= 64, ++i)
2713         {
2714             if (i >= _rep.length) return ulong.max;
2715             if (_rep[i] != 0) return findZeros(howMany, i * 64);
2716         }
2717         // Leftover < 64 bits
2718         assert(needed < 64);
2719         if (!needed) return start;
2720         if (i >= _rep.length) return ulong.max;
2721         if (leadingOnes(~_rep[i]) >= needed) return start;
2722         return findZeros(howMany, i * 64);
2723     }
2724 }
2725 
2726 @safe unittest
2727 {
2728     auto v = BitVector(new ulong[10]);
2729     assert(v.length == 640);
2730 
2731     v[] = 0;
2732     v[53] = 1;
2733     assert(v[52] == 0);
2734     assert(v[53] == 1);
2735     assert(v[54] == 0);
2736 
2737     v[] = 0;
2738     v[53 .. 55] = 1;
2739     assert(v[52] == 0);
2740     assert(v[53] == 1);
2741     assert(v[54] == 1);
2742     assert(v[55] == 0);
2743 
2744     v[] = 0;
2745     v[2 .. 65] = 1;
2746     assert(v.rep[0] == 0x3FFF_FFFF_FFFF_FFFF);
2747     assert(v.rep[1] == 0x8000_0000_0000_0000);
2748     assert(v.rep[2] == 0);
2749 
2750     v[] = 0;
2751     assert(v.find1Backward(0) == ulong.max);
2752     assert(v.find1Backward(43) == ulong.max);
2753     assert(v.find1Backward(83) == ulong.max);
2754 
2755     v[0] = 1;
2756     assert(v.find1Backward(0) == 0);
2757     assert(v.find1Backward(43) == 0);
2758     import std.conv : text;
2759     assert(v.find1Backward(83) == 0, text(v.find1Backward(83)));
2760 
2761     v[0] = 0;
2762     v[101] = 1;
2763     assert(v.find1Backward(0) == ulong.max);
2764     assert(v.find1Backward(43) == ulong.max);
2765     assert(v.find1Backward(83) == ulong.max);
2766     assert(v.find1Backward(100) == ulong.max);
2767     assert(v.find1Backward(101) == 101);
2768     assert(v.find1Backward(553) == 101);
2769 
2770     v[0 .. v.length] = 0;
2771     v[v.length .. v.length] = 0;
2772     v[0 .. 0] = 0;
2773 
2774     v[] = 0;
2775     assert(v.find1(0) == v.length);
2776     v[139] = 1;
2777     assert(v.find1(0) == 139);
2778     assert(v.find1(100) == 139);
2779     assert(v.find1(138) == 139);
2780     assert(v.find1(139) == 139);
2781     assert(v.find1(140) == v.length);
2782 
2783     v[] = 0;
2784     assert(v.findZeros(100, 0) == 0);
2785     foreach (i; 0 .. 500)
2786         assert(v.findZeros(100, i) == i, text(v.findZeros(100, i), " != ", i));
2787     assert(v.findZeros(540, 99) == 99);
2788     assert(v.findZeros(99, 540) == 540);
2789     assert(v.findZeros(540, 100) == 100);
2790     assert(v.findZeros(640, 0) == 0);
2791     assert(v.findZeros(641, 1) == ulong.max);
2792     assert(v.findZeros(641, 100) == ulong.max);
2793 }