Stxxl  1.2.1
priority_queue.h
1 /***************************************************************************
2  * include/stxxl/bits/containers/priority_queue.h
3  *
4  * Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  * Copyright (C) 1999 Peter Sanders <sanders@mpi-sb.mpg.de>
7  * Copyright (C) 2003, 2004, 2007 Roman Dementiev <dementiev@mpi-sb.mpg.de>
8  * Copyright (C) 2007 Johannes Singler <singler@ira.uka.de>
9  * Copyright (C) 2007, 2008 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
10  *
11  * Distributed under the Boost Software License, Version 1.0.
12  * (See accompanying file LICENSE_1_0.txt or copy at
13  * http://www.boost.org/LICENSE_1_0.txt)
14  **************************************************************************/
15 
16 #ifndef STXXL_PRIORITY_QUEUE_HEADER
17 #define STXXL_PRIORITY_QUEUE_HEADER
18 
19 #include <list>
20 #include <iterator>
21 #include <vector>
22 
23 #include <stxxl/bits/mng/mng.h>
24 #include <stxxl/bits/mng/prefetch_pool.h>
25 #include <stxxl/bits/mng/write_pool.h>
26 #include <stxxl/bits/common/tmeta.h>
27 
28 
29 __STXXL_BEGIN_NAMESPACE
30 
34 
37 namespace priority_queue_local
38 {
45  template <typename _Tp, typename _Sequence = std::vector<_Tp>,
46  typename _Compare = std::less<typename _Sequence::value_type> >
48  {
49  // concept requirements
50  typedef typename _Sequence::value_type _Sequence_value_type;
51 
52  public:
53  typedef typename _Sequence::value_type value_type;
54  typedef typename _Sequence::reference reference;
55  typedef typename _Sequence::const_reference const_reference;
56  typedef typename _Sequence::size_type size_type;
57  typedef _Sequence container_type;
58 
59  protected:
60  // See queue::c for notes on these names.
61  _Sequence c;
62  _Compare comp;
63  size_type N;
64 
65  public:
69  explicit
70  internal_priority_queue(size_type capacity)
71  : c(capacity), N(0)
72  { }
73 
77  bool
78  empty() const
79  { return N == 0; }
80 
82  size_type
83  size() const
84  { return N; }
85 
90  const_reference
91  top() const
92  {
93  return c.front();
94  }
95 
104  void
105  push(const value_type & __x)
106  {
107  c[N] = __x;
108  ++N;
109  std::push_heap(c.begin(), c.begin() + N, comp);
110  }
111 
123  void
124  pop()
125  {
126  std::pop_heap(c.begin(), c.begin() + N, comp);
127  --N;
128  }
129 
133  void sort_to(value_type * target)
134  {
135  std::sort(c.begin(), c.begin() + N, comp);
136  std::reverse_copy(c.begin(), c.begin() + N, target);
137  }
138 
142  void clear()
143  {
144  N = 0;
145  }
146  };
147 
148 
150 // auxiliary functions
151 
152 // merge sz element from the two sentinel terminated input
153 // sequences from0 and from1 to "to"
154 // advance from0 and from1 accordingly
155 // require: at least sz nonsentinel elements available in from0, from1
156 // require: to may overwrite one of the sources as long as
157 // *(fromx + sz) is before the end of fromx
158  template <class InputIterator, class OutputIterator, class Cmp_>
159  void merge_iterator(
160  InputIterator & from0,
161  InputIterator & from1,
162  OutputIterator to, unsigned_type sz, Cmp_ cmp)
163  {
164  OutputIterator done = to + sz;
165 
166  while (to != done)
167  {
168  if (cmp(*from0, *from1))
169  {
170  *to = *from1;
171  ++from1;
172  }
173  else
174  {
175  *to = *from0;
176  ++from0;
177  }
178  ++to;
179  }
180  }
181 
182 // merge sz element from the three sentinel terminated input
183 // sequences from0, from1 and from2 to "to"
184 // advance from0, from1 and from2 accordingly
185 // require: at least sz nonsentinel elements available in from0, from1 and from2
186 // require: to may overwrite one of the sources as long as
187 // *(fromx + sz) is before the end of fromx
188  template <class InputIterator, class OutputIterator, class Cmp_>
189  void merge3_iterator(
190  InputIterator & from0,
191  InputIterator & from1,
192  InputIterator & from2,
193  OutputIterator to, unsigned_type sz, Cmp_ cmp)
194  {
195  OutputIterator done = to + sz;
196 
197  if (cmp(*from1, *from0)) {
198  if (cmp(*from2, *from1)) {
199  goto s012;
200  }
201  else {
202  if (cmp(*from0, *from2)) {
203  goto s201;
204  }
205  else {
206  goto s021;
207  }
208  }
209  } else {
210  if (cmp(*from2, *from1)) {
211  if (cmp(*from2, *from0)) {
212  goto s102;
213  }
214  else {
215  goto s120;
216  }
217  } else {
218  goto s210;
219  }
220  }
221 
222 #define Merge3Case(a, b, c) \
223  s ## a ## b ## c : \
224  if (to == done) \
225  return;\
226  *to = *from ## a; \
227  ++to; \
228  ++from ## a; \
229  if (cmp(*from ## b, *from ## a)) \
230  goto s ## a ## b ## c;\
231  if (cmp(*from ## c, *from ## a)) \
232  goto s ## b ## a ## c;\
233  goto s ## b ## c ## a;
234 
235  // the order is chosen in such a way that
236  // four of the trailing gotos can be eliminated by the optimizer
237  Merge3Case(0, 1, 2);
238  Merge3Case(1, 2, 0);
239  Merge3Case(2, 0, 1);
240  Merge3Case(1, 0, 2);
241  Merge3Case(0, 2, 1);
242  Merge3Case(2, 1, 0);
243 
244 #undef Merge3Case
245  }
246 
247 
248 // merge sz element from the four sentinel terminated input
249 // sequences from0, from1, from2 and from3 to "to"
250 // advance from0, from1, from2 and from3 accordingly
251 // require: at least sz nonsentinel elements available in from0, from1, from2 and from3
252 // require: to may overwrite one of the sources as long as
253 // *(fromx + sz) is before the end of fromx
254  template <class InputIterator, class OutputIterator, class Cmp_>
255  void merge4_iterator(
256  InputIterator & from0,
257  InputIterator & from1,
258  InputIterator & from2,
259  InputIterator & from3,
260  OutputIterator to, unsigned_type sz, Cmp_ cmp)
261  {
262  OutputIterator done = to + sz;
263 
264 #define StartMerge4(a, b, c, d) \
265  if ((!cmp(*from ## a, *from ## b)) && (!cmp(*from ## b, *from ## c)) && (!cmp(*from ## c, *from ## d))) \
266  goto s ## a ## b ## c ## d;
267 
268  // b>a c>b d>c
269  // a<b b<c c<d
270  // a<=b b<=c c<=d
271  // !(a>b) !(b>c) !(c>d)
272 
273  StartMerge4(0, 1, 2, 3);
274  StartMerge4(1, 2, 3, 0);
275  StartMerge4(2, 3, 0, 1);
276  StartMerge4(3, 0, 1, 2);
277 
278  StartMerge4(0, 3, 1, 2);
279  StartMerge4(3, 1, 2, 0);
280  StartMerge4(1, 2, 0, 3);
281  StartMerge4(2, 0, 3, 1);
282 
283  StartMerge4(0, 2, 3, 1);
284  StartMerge4(2, 3, 1, 0);
285  StartMerge4(3, 1, 0, 2);
286  StartMerge4(1, 0, 2, 3);
287 
288  StartMerge4(2, 0, 1, 3);
289  StartMerge4(0, 1, 3, 2);
290  StartMerge4(1, 3, 2, 0);
291  StartMerge4(3, 2, 0, 1);
292 
293  StartMerge4(3, 0, 2, 1);
294  StartMerge4(0, 2, 1, 3);
295  StartMerge4(2, 1, 3, 0);
296  StartMerge4(1, 3, 0, 2);
297 
298  StartMerge4(1, 0, 3, 2);
299  StartMerge4(0, 3, 2, 1);
300  StartMerge4(3, 2, 1, 0);
301  StartMerge4(2, 1, 0, 3);
302 
303 #define Merge4Case(a, b, c, d) \
304  s ## a ## b ## c ## d : \
305  if (to == done) \
306  return;\
307  *to = *from ## a; \
308  ++to; \
309  ++from ## a; \
310  if (cmp(*from ## c, *from ## a)) \
311  { \
312  if (cmp(*from ## b, *from ## a)) \
313  goto s ## a ## b ## c ## d;\
314  else \
315  goto s ## b ## a ## c ## d;\
316  } \
317  else \
318  { \
319  if (cmp(*from ## d, *from ## a)) \
320  goto s ## b ## c ## a ## d;\
321  else \
322  goto s ## b ## c ## d ## a;\
323  }
324 
325  Merge4Case(0, 1, 2, 3);
326  Merge4Case(1, 2, 3, 0);
327  Merge4Case(2, 3, 0, 1);
328  Merge4Case(3, 0, 1, 2);
329 
330  Merge4Case(0, 3, 1, 2);
331  Merge4Case(3, 1, 2, 0);
332  Merge4Case(1, 2, 0, 3);
333  Merge4Case(2, 0, 3, 1);
334 
335  Merge4Case(0, 2, 3, 1);
336  Merge4Case(2, 3, 1, 0);
337  Merge4Case(3, 1, 0, 2);
338  Merge4Case(1, 0, 2, 3);
339 
340  Merge4Case(2, 0, 1, 3);
341  Merge4Case(0, 1, 3, 2);
342  Merge4Case(1, 3, 2, 0);
343  Merge4Case(3, 2, 0, 1);
344 
345  Merge4Case(3, 0, 2, 1);
346  Merge4Case(0, 2, 1, 3);
347  Merge4Case(2, 1, 3, 0);
348  Merge4Case(1, 3, 0, 2);
349 
350  Merge4Case(1, 0, 3, 2);
351  Merge4Case(0, 3, 2, 1);
352  Merge4Case(3, 2, 1, 0);
353  Merge4Case(2, 1, 0, 3);
354 
355 #undef StartMerge4
356 #undef Merge4Case
357  }
358 
359 
365  template <typename Tp_, unsigned_type max_size_>
367  {
368  typedef Tp_ value_type;
369  typedef unsigned_type size_type;
370  enum { max_size = max_size_ };
371 
372  size_type size_;
373  value_type array[max_size];
374 
375  public:
376  internal_bounded_stack() : size_(0) { }
377 
378  void push(const value_type & x)
379  {
380  assert(size_ < max_size);
381  array[size_++] = x;
382  }
383 
384  const value_type & top() const
385  {
386  assert(size_ > 0);
387  return array[size_ - 1];
388  }
389 
390  void pop()
391  {
392  assert(size_ > 0);
393  --size_;
394  }
395 
396  void clear()
397  {
398  size_ = 0;
399  }
400 
401  size_type size() const
402  {
403  return size_;
404  }
405 
406  bool empty() const
407  {
408  return size_ == 0;
409  }
410  };
411 
412 
417  template <class BlockType_,
418  class Cmp_,
419  unsigned Arity_,
420  class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY>
421  class ext_merger : private noncopyable
422  {
423  public:
424  typedef stxxl::uint64 size_type;
425  typedef BlockType_ block_type;
426  typedef typename block_type::bid_type bid_type;
427  typedef typename block_type::value_type value_type;
428  typedef Cmp_ comparator_type;
429  typedef AllocStr_ alloc_strategy;
430  typedef value_type Element;
432 
433  // KNKMAX / 2 < arity <= KNKMAX
434  enum { arity = Arity_, KNKMAX = 1UL << (LOG2<Arity_>::ceil) };
435 
436  block_type * convert_block_pointer(sentinel_block_type * arg)
437  {
438  return reinterpret_cast<block_type *>(arg);
439  }
440 
441  protected:
442  comparator_type cmp;
443 
444  bool is_sentinel(const Element & a) const
445  {
446  return !(cmp(cmp.min_value(), a)); // a <= cmp.min_value()
447  }
448 
449  bool not_sentinel(const Element & a) const
450  {
451  return cmp(cmp.min_value(), a); // a > cmp.min_value()
452  }
453 
454  struct sequence_state : private noncopyable
455  {
456  unsigned_type current; //current index
457  block_type * block; //current block
458  std::list<bid_type> * bids; //list of blocks forming this sequence
459  comparator_type cmp;
460  ext_merger * merger;
461  bool allocated;
462 
464  const value_type & operator * () const
465  {
466  return (*block)[current];
467  }
468 
469  sequence_state() : bids(NULL), allocated(false)
470  { }
471 
472  ~sequence_state()
473  {
474  STXXL_VERBOSE2("ext_merger sequence_state::~sequence_state()");
475  if (bids != NULL)
476  {
477  block_manager * bm = block_manager::get_instance();
478  bm->delete_blocks(bids->begin(), bids->end());
479  delete bids;
480  }
481  }
482 
483  void make_inf()
484  {
485  current = 0;
486  (*block)[0] = cmp.min_value();
487  }
488 
489  bool is_sentinel(const Element & a) const
490  {
491  return !(cmp(cmp.min_value(), a));
492  }
493 
494  bool not_sentinel(const Element & a) const
495  {
496  return cmp(cmp.min_value(), a);
497  }
498 
499  void swap(sequence_state & obj)
500  {
501  if (&obj != this)
502  {
503  std::swap(current, obj.current);
504  std::swap(block, obj.block);
505  std::swap(bids, obj.bids);
506  assert(merger == obj.merger);
507  std::swap(allocated, obj.allocated);
508  }
509  }
510 
511  sequence_state & operator ++ ()
512  {
513  assert(not_sentinel((*block)[current]));
514  assert(current < block->size);
515 
516  ++current;
517 
518  if (current == block->size)
519  {
520  STXXL_VERBOSE2("ext_merger sequence_state operator++ crossing block border ");
521  // go to the next block
522  assert(bids != NULL);
523  if (bids->empty()) // if there is no next block
524  {
525  STXXL_VERBOSE2("ext_merger sequence_state operator++ it was the last block in the sequence ");
526  delete bids;
527  bids = NULL;
528  make_inf();
529  }
530  else
531  {
532  STXXL_VERBOSE2("ext_merger sequence_state operator++ there is another block ");
533  bid_type bid = bids->front();
534  bids->pop_front();
535  if (!(bids->empty()))
536  {
537  STXXL_VERBOSE2("ext_merger sequence_state operator++ one more block exists in a sequence: " <<
538  "flushing this block in write cache (if not written yet) and giving hint to prefetcher");
539  bid_type next_bid = bids->front();
540  //Hint next block of sequence.
541  //This is mandatory to ensure proper synchronization between prefetch pool and write pool.
542  merger->p_pool->hint(next_bid, *(merger->w_pool));
543  }
544  merger->p_pool->read(block, bid)->wait();
545  STXXL_VERBOSE2("first element of read block " << bid << " " << *(block->begin()) << " cached in " << block);
546  block_manager::get_instance()->delete_block(bid);
547  current = 0;
548  }
549  }
550  return *this;
551  }
552  };
553 
554 
555  //a pair consisting a value
556  struct Entry
557  {
558  value_type key; // Key of Loser element (winner for 0)
559  unsigned_type index; // the number of losing segment
560  };
561 
562  size_type size_; // total number of elements stored
563  unsigned_type logK; // log of current tree size
564  unsigned_type k; // invariant (k == 1 << logK), always a power of two
565  // only entries 0 .. arity-1 may hold actual sequences, the other
566  // entries arity .. KNKMAX-1 are sentinels to make the size of the tree
567  // a power of two always
568 
569  // stack of empty segment indices
571 
572  // upper levels of loser trees
573  // entry[0] contains the winner info
574  Entry entry[KNKMAX];
575 
576  // leaf information
577  // note that Knuth uses indices k..k-1
578  // while we use 0..k-1
579  sequence_state states[KNKMAX]; // sequence including current position, dereference gives current element
580 
581  prefetch_pool<block_type> * p_pool;
582  write_pool<block_type> * w_pool;
583 
584  sentinel_block_type sentinel_block;
585 
586  public:
587  ext_merger() :
588  size_(0), logK(0), k(1), p_pool(0), w_pool(0)
589  {
590  init();
591  }
592 
594  write_pool<block_type> * w_pool_) :
595  size_(0), logK(0), k(1),
596  p_pool(p_pool_),
597  w_pool(w_pool_)
598  {
599  init();
600  }
601 
602  virtual ~ext_merger()
603  {
604  STXXL_VERBOSE1("ext_merger::~ext_merger()");
605  for (unsigned_type i = 0; i < arity; ++i)
606  {
607  delete states[i].block;
608  }
609  }
610 
611  void set_pools(prefetch_pool<block_type> * p_pool_,
612  write_pool<block_type> * w_pool_)
613  {
614  p_pool = p_pool_;
615  w_pool = w_pool_;
616  }
617 
618  private:
619  void init()
620  {
621  STXXL_VERBOSE2("ext_merger::init()");
622  assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering
623 
624  sentinel_block[0] = cmp.min_value();
625 
626  for (unsigned_type i = 0; i < KNKMAX; ++i)
627  {
628  states[i].merger = this;
629  if (i < arity)
630  states[i].block = new block_type;
631  else
632  states[i].block = convert_block_pointer(&(sentinel_block));
633 
634  states[i].make_inf();
635  }
636 
637  assert(k == 1);
638  free_segments.push(0); //total state: one free sequence
639 
640  rebuildLoserTree();
641  assert(is_sentinel(*states[entry[0].index]));
642  }
643 
644  // rebuild loser tree information from the values in current
645  void rebuildLoserTree()
646  {
647  unsigned_type winner = initWinner(1);
648  entry[0].index = winner;
649  entry[0].key = *(states[winner]);
650  }
651 
652 
653  // given any values in the leaves this
654  // routing recomputes upper levels of the tree
655  // from scratch in linear time
656  // initialize entry[root].index and the subtree rooted there
657  // return winner index
658  unsigned_type initWinner(unsigned_type root)
659  {
660  if (root >= k) { // leaf reached
661  return root - k;
662  } else {
663  unsigned_type left = initWinner(2 * root);
664  unsigned_type right = initWinner(2 * root + 1);
665  Element lk = *(states[left]);
666  Element rk = *(states[right]);
667  if (!(cmp(lk, rk))) { // right subtree looses
668  entry[root].index = right;
669  entry[root].key = rk;
670  return left;
671  } else {
672  entry[root].index = left;
673  entry[root].key = lk;
674  return right;
675  }
676  }
677  }
678 
679  // first go up the tree all the way to the root
680  // hand down old winner for the respective subtree
681  // based on new value, and old winner and loser
682  // update each node on the path to the root top down.
683  // This is implemented recursively
684  void update_on_insert(
685  unsigned_type node,
686  const Element & newKey,
687  unsigned_type newIndex,
688  Element * winnerKey,
689  unsigned_type * winnerIndex, // old winner
690  unsigned_type * mask) // 1 << (ceil(log KNK) - dist-from-root)
691  {
692  if (node == 0) { // winner part of root
693  *mask = 1 << (logK - 1);
694  *winnerKey = entry[0].key;
695  *winnerIndex = entry[0].index;
696  if (cmp(entry[node].key, newKey))
697  {
698  entry[node].key = newKey;
699  entry[node].index = newIndex;
700  }
701  } else {
702  update_on_insert(node >> 1, newKey, newIndex, winnerKey, winnerIndex, mask);
703  Element loserKey = entry[node].key;
704  unsigned_type loserIndex = entry[node].index;
705  if ((*winnerIndex & *mask) != (newIndex & *mask)) { // different subtrees
706  if (cmp(loserKey, newKey)) { // newKey will have influence here
707  if (cmp(*winnerKey, newKey)) { // old winner loses here
708  entry[node].key = *winnerKey;
709  entry[node].index = *winnerIndex;
710  } else { // new entry looses here
711  entry[node].key = newKey;
712  entry[node].index = newIndex;
713  }
714  }
715  *winnerKey = loserKey;
716  *winnerIndex = loserIndex;
717  }
718  // note that nothing needs to be done if
719  // the winner came from the same subtree
720  // a) newKey <= winnerKey => even more reason for the other tree to loose
721  // b) newKey > winnerKey => the old winner will beat the new
722  // entry further down the tree
723  // also the same old winner is handed down the tree
724 
725  *mask >>= 1; // next level
726  }
727  }
728 
729  // make the tree two times as wide
730  void doubleK()
731  {
732  STXXL_VERBOSE1("ext_merger::doubleK (before) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " arity=" << arity << " #free=" << free_segments.size());
733  assert(k > 0);
734  assert(k < arity);
735  assert(free_segments.empty()); // stack was free (probably not needed)
736 
737  // make all new entries free
738  // and push them on the free stack
739  for (unsigned_type i = 2 * k - 1; i >= k; i--) //backwards
740  {
741  states[i].make_inf();
742  if (i < arity)
743  free_segments.push(i);
744  }
745 
746  // double the size
747  k *= 2;
748  logK++;
749 
750  STXXL_VERBOSE1("ext_merger::doubleK (after) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " arity=" << arity << " #free=" << free_segments.size());
751  assert(!free_segments.empty());
752 
753  // recompute loser tree information
754  rebuildLoserTree();
755  }
756 
757 
758  // compact nonempty segments in the left half of the tree
759  void compactTree()
760  {
761  STXXL_VERBOSE1("ext_merger::compactTree (before) k=" << k << " logK=" << logK << " #free=" << free_segments.size());
762  assert(logK > 0);
763 
764  // compact all nonempty segments to the left
765 
766  unsigned_type to = 0;
767  for (unsigned_type from = 0; from < k; from++)
768  {
769  if (!is_segment_empty(from))
770  {
771  assert(is_segment_allocated(from));
772  if (from != to) {
773  assert(!is_segment_allocated(to));
774  states[to].swap(states[from]);
775  }
776  ++to;
777  }
778  }
779 
780  // half degree as often as possible
781  while (k > 1 && to <= (k / 2)) {
782  k /= 2;
783  logK--;
784  }
785 
786  // overwrite garbage and compact the stack of free segment indices
787  free_segments.clear(); // none free
788  for ( ; to < k; to++) {
789  assert(!is_segment_allocated(to));
790  states[to].make_inf();
791  if (to < arity)
792  free_segments.push(to);
793  }
794 
795  STXXL_VERBOSE1("ext_merger::compactTree (after) k=" << k << " logK=" << logK << " #free=" << free_segments.size());
796  assert(k > 0);
797 
798  // recompute loser tree information
799  rebuildLoserTree();
800  }
801 
802 
803 #if 0
804  void swap(ext_merger & obj)
805  {
806  std::swap(cmp, obj.cmp);
807  std::swap(free_segments, obj.free_segments);
808  std::swap(size_, obj.size_);
809  std::swap(logK, obj.logK);
810  std::swap(k, obj.k);
811  swap_1D_arrays(entry, obj.entry, KNKMAX);
812  swap_1D_arrays(states, obj.states, KNKMAX);
813 
814  // std::swap(p_pool,obj.p_pool);
815  // std::swap(w_pool,obj.w_pool);
816  }
817 #endif
818 
819  public:
820  unsigned_type mem_cons() const // only rough estimation
821  {
822  return (arity * block_type::raw_size);
823  }
824 
825  // delete the (length = end-begin) smallest elements and write them to "begin..end"
826  // empty segments are deallocated
827  // require:
828  // - there are at least length elements
829  // - segments are ended by sentinels
830  template <class OutputIterator>
831  void multi_merge(OutputIterator begin, OutputIterator end)
832  {
833  size_type length = end - begin;
834 
835  STXXL_VERBOSE1("ext_merger::multi_merge from " << k << " sequence(s), length = " << length);
836 
837  if (length == 0)
838  return;
839 
840  assert(k > 0);
841  assert(length <= size_);
842 
843  //Hint first non-internal (actually second) block of each sequence.
844  //This is mandatory to ensure proper synchronization between prefetch pool and write pool.
845  for (unsigned_type i = 0; i < k; ++i)
846  {
847  if (states[i].bids != NULL && !states[i].bids->empty())
848  p_pool->hint(states[i].bids->front(), *w_pool);
849  }
850 
851  switch (logK) {
852  case 0:
853  assert(k == 1);
854  assert(entry[0].index == 0);
855  assert(free_segments.empty());
856  //memcpy(to, states[0], length * sizeof(Element));
857  //std::copy(states[0],states[0]+length,to);
858  for (size_type i = 0; i < length; ++i, ++(states[0]), ++begin)
859  *begin = *(states[0]);
860 
861  entry[0].key = **states;
862  if (is_segment_empty(0))
863  deallocate_segment(0);
864 
865  break;
866  case 1:
867  assert(k == 2);
868  merge_iterator(states[0], states[1], begin, length, cmp);
869  rebuildLoserTree();
870  if (is_segment_empty(0) && is_segment_allocated(0))
871  deallocate_segment(0);
872 
873  if (is_segment_empty(1) && is_segment_allocated(1))
874  deallocate_segment(1);
875 
876  break;
877  case 2:
878  assert(k == 4);
879  if (is_segment_empty(3))
880  merge3_iterator(states[0], states[1], states[2], begin, length, cmp);
881  else
882  merge4_iterator(states[0], states[1], states[2], states[3], begin, length, cmp);
883  rebuildLoserTree();
884  if (is_segment_empty(0) && is_segment_allocated(0))
885  deallocate_segment(0);
886 
887  if (is_segment_empty(1) && is_segment_allocated(1))
888  deallocate_segment(1);
889 
890  if (is_segment_empty(2) && is_segment_allocated(2))
891  deallocate_segment(2);
892 
893  if (is_segment_empty(3) && is_segment_allocated(3))
894  deallocate_segment(3);
895 
896  break;
897  case 3: multi_merge_f<OutputIterator, 3>(begin, end);
898  break;
899  case 4: multi_merge_f<OutputIterator, 4>(begin, end);
900  break;
901  case 5: multi_merge_f<OutputIterator, 5>(begin, end);
902  break;
903  case 6: multi_merge_f<OutputIterator, 6>(begin, end);
904  break;
905  case 7: multi_merge_f<OutputIterator, 7>(begin, end);
906  break;
907  case 8: multi_merge_f<OutputIterator, 8>(begin, end);
908  break;
909  case 9: multi_merge_f<OutputIterator, 9>(begin, end);
910  break;
911  case 10: multi_merge_f<OutputIterator, 10>(begin, end);
912  break;
913  default: multi_merge_k(begin, end);
914  break;
915  }
916 
917 
918  size_ -= length;
919 
920  // compact tree if it got considerably smaller
921  {
922  const unsigned_type num_segments_used = std::min<unsigned_type>(arity, k) - free_segments.size();
923  const unsigned_type num_segments_trigger = k - (3 * k / 5);
924  // using k/2 would be worst case inefficient (for large k)
925  // for k \in {2, 4, 8} the trigger is k/2 which is good
926  // because we have special mergers for k \in {1, 2, 4}
927  // there is also a special 3-way-merger, that will be
928  // triggered if k == 4 && is_segment_empty(3)
929  STXXL_VERBOSE3("ext_merger compact? k=" << k << " #used=" << num_segments_used
930  << " <= #trigger=" << num_segments_trigger << " ==> "
931  << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ")
932  << " || "
933  << ((k == 4 && !free_segments.empty() && !is_segment_empty(3)) ? "yes" : "no ")
934  << " #free=" << free_segments.size());
935  if (k > 1 && ((num_segments_used <= num_segments_trigger) ||
936  (k == 4 && !free_segments.empty() && !is_segment_empty(3))))
937  {
938  compactTree();
939  }
940  }
941  }
942 
943  private:
944  // multi-merge for arbitrary K
945  template <class OutputIterator>
946  void multi_merge_k(OutputIterator begin, OutputIterator end)
947  {
948  Entry * currentPos;
949  Element currentKey;
950  unsigned_type currentIndex; // leaf pointed to by current entry
951  unsigned_type kReg = k;
952  OutputIterator done = end;
953  OutputIterator to = begin;
954  unsigned_type winnerIndex = entry[0].index;
955  Element winnerKey = entry[0].key;
956 
957  while (to != done)
958  {
959  // write result
960  *to = *(states[winnerIndex]);
961 
962  // advance winner segment
963  ++(states[winnerIndex]);
964 
965  winnerKey = *(states[winnerIndex]);
966 
967  // remove winner segment if empty now
968  if (is_sentinel(winnerKey)) //
969  deallocate_segment(winnerIndex);
970 
971 
972  // go up the entry-tree
973  for (unsigned_type i = (winnerIndex + kReg) >> 1; i > 0; i >>= 1) {
974  currentPos = entry + i;
975  currentKey = currentPos->key;
976  if (cmp(winnerKey, currentKey)) {
977  currentIndex = currentPos->index;
978  currentPos->key = winnerKey;
979  currentPos->index = winnerIndex;
980  winnerKey = currentKey;
981  winnerIndex = currentIndex;
982  }
983  }
984 
985  ++to;
986  }
987  entry[0].index = winnerIndex;
988  entry[0].key = winnerKey;
989  }
990 
991  template <class OutputIterator, unsigned LogK>
992  void multi_merge_f(OutputIterator begin, OutputIterator end)
993  {
994  OutputIterator done = end;
995  OutputIterator to = begin;
996  unsigned_type winnerIndex = entry[0].index;
997  Entry * regEntry = entry;
998  sequence_state * regStates = states;
999  Element winnerKey = entry[0].key;
1000 
1001  assert(logK >= LogK);
1002  while (to != done)
1003  {
1004  // write result
1005  *to = *(regStates[winnerIndex]);
1006 
1007  // advance winner segment
1008  ++(regStates[winnerIndex]);
1009 
1010  winnerKey = *(regStates[winnerIndex]);
1011 
1012 
1013  // remove winner segment if empty now
1014  if (is_sentinel(winnerKey))
1015  deallocate_segment(winnerIndex);
1016 
1017 
1018  ++to;
1019 
1020  // update loser tree
1021 #define TreeStep(L) \
1022  if (1 << LogK >= 1 << L) { \
1023  Entry * pos ## L = regEntry + ((winnerIndex + (1 << LogK)) >> (((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0)); \
1024  Element key ## L = pos ## L->key; \
1025  if (cmp(winnerKey, key ## L)) { \
1026  unsigned_type index ## L = pos ## L->index; \
1027  pos ## L->key = winnerKey; \
1028  pos ## L->index = winnerIndex; \
1029  winnerKey = key ## L; \
1030  winnerIndex = index ## L; \
1031  } \
1032  }
1033  TreeStep(10);
1034  TreeStep(9);
1035  TreeStep(8);
1036  TreeStep(7);
1037  TreeStep(6);
1038  TreeStep(5);
1039  TreeStep(4);
1040  TreeStep(3);
1041  TreeStep(2);
1042  TreeStep(1);
1043 #undef TreeStep
1044  }
1045  regEntry[0].index = winnerIndex;
1046  regEntry[0].key = winnerKey;
1047  }
1048 
1049  public:
1050  bool spaceIsAvailable() const // for new segment
1051  {
1052  return k < arity || !free_segments.empty();
1053  }
1054 
1055 
1056  // insert segment beginning at to
1057  // require: spaceIsAvailable() == 1
1058  template <class Merger>
1059  void insert_segment(Merger & another_merger, size_type segment_size)
1060  {
1061  STXXL_VERBOSE1("ext_merger::insert_segment(merger,...)" << this);
1062 
1063  if (segment_size > 0)
1064  {
1065  // get a free slot
1066  if (free_segments.empty()) { // tree is too small
1067  doubleK();
1068  }
1069  assert(!free_segments.empty());
1070  unsigned_type free_slot = free_segments.top();
1071  free_segments.pop();
1072 
1073 
1074  // link new segment
1075  assert(segment_size);
1076  unsigned_type nblocks = segment_size / block_type::size;
1077  //assert(nblocks); // at least one block
1078  STXXL_VERBOSE1("ext_merger::insert_segment nblocks=" << nblocks);
1079  if (nblocks == 0)
1080  {
1081  STXXL_VERBOSE1("ext_merger::insert_segment(merger,...) WARNING: inserting a segment with " <<
1082  nblocks << " blocks");
1083  STXXL_VERBOSE1("THIS IS INEFFICIENT: TRY TO CHANGE PRIORITY QUEUE PARAMETERS");
1084  }
1085  unsigned_type first_size = segment_size % block_type::size;
1086  if (first_size == 0)
1087  {
1088  first_size = block_type::size;
1089  --nblocks;
1090  }
1091  block_manager * bm = block_manager::get_instance();
1092  std::list<bid_type> * bids = new std::list<bid_type>(nblocks);
1093  bm->new_blocks(alloc_strategy(), bids->begin(), bids->end());
1094  block_type * first_block = new block_type;
1095 
1096  another_merger.multi_merge(
1097  first_block->begin() + (block_type::size - first_size),
1098  first_block->end());
1099 
1100  STXXL_VERBOSE1("last element of first block " << *(first_block->end() - 1));
1101  assert(!cmp(*(first_block->begin() + (block_type::size - first_size)), *(first_block->end() - 1)));
1102 
1103  assert(w_pool->size() > 0);
1104 
1105  for (typename std::list<bid_type>::iterator curbid = bids->begin(); curbid != bids->end(); ++curbid)
1106  {
1107  block_type * b = w_pool->steal();
1108  another_merger.multi_merge(b->begin(), b->end());
1109  STXXL_VERBOSE1("first element of following block " << *curbid << " " << *(b->begin()));
1110  STXXL_VERBOSE1("last element of following block " << *curbid << " " << *(b->end() - 1));
1111  assert(!cmp(*(b->begin()), *(b->end() - 1)));
1112  w_pool->write(b, *curbid); //->wait() does not help
1113  STXXL_VERBOSE1("written to block " << *curbid << " cached in " << b);
1114  }
1115 
1116  insert_segment(bids, first_block, first_size, free_slot);
1117 
1118  size_ += segment_size;
1119 
1120  // propagate new information up the tree
1121  Element dummyKey;
1122  unsigned_type dummyIndex;
1123  unsigned_type dummyMask;
1124  update_on_insert((free_slot + k) >> 1, *(states[free_slot]), free_slot,
1125  &dummyKey, &dummyIndex, &dummyMask);
1126  } else {
1127  // deallocate memory ?
1128  STXXL_VERBOSE1("Merged segment with zero size.");
1129  }
1130  }
1131 
1132  size_type size() const { return size_; }
1133 
1134  protected:
1137  void insert_segment(std::list<bid_type> * bidlist, block_type * first_block,
1138  unsigned_type first_size, unsigned_type slot)
1139  {
1140  STXXL_VERBOSE1("ext_merger::insert_segment(bidlist,...) " << this << " " << bidlist->size() << " " << slot);
1141  assert(!is_segment_allocated(slot));
1142  assert(first_size > 0);
1143 
1144  sequence_state & new_sequence = states[slot];
1145  new_sequence.current = block_type::size - first_size;
1146  std::swap(new_sequence.block, first_block);
1147  delete first_block;
1148  std::swap(new_sequence.bids, bidlist);
1149  if (bidlist) // the old list
1150  {
1151  assert(bidlist->empty());
1152  delete bidlist;
1153  }
1154  new_sequence.allocated = true;
1155  assert(is_segment_allocated(slot));
1156  }
1157 
1158  // free an empty segment .
1159  void deallocate_segment(unsigned_type slot)
1160  {
1161  STXXL_VERBOSE1("ext_merger::deallocate_segment() deleting segment " << slot << " allocated=" << int(is_segment_allocated(slot)));
1162  assert(is_segment_allocated(slot));
1163  states[slot].allocated = false;
1164  states[slot].make_inf();
1165 
1166  // push on the stack of free segment indices
1167  free_segments.push(slot);
1168  }
1169 
1170  // is this segment empty ?
1171  bool is_segment_empty(unsigned_type slot) const
1172  {
1173  return is_sentinel(*(states[slot]));
1174  }
1175 
1176  // Is this segment allocated? Otherwise it's empty,
1177  // already on the stack of free segment indices and can be reused.
1178  bool is_segment_allocated(unsigned_type slot) const
1179  {
1180  return states[slot].allocated;
1181  }
1182  }; //ext_merger
1183 
1184 
1186 // The data structure from Knuth, "Sorting and Searching", Section 5.4.1
1191  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1192  class loser_tree : private noncopyable
1193  {
1194  public:
1195  typedef ValTp_ value_type;
1196  typedef Cmp_ comparator_type;
1197  typedef value_type Element;
1198 
1199  private:
1200  struct Entry
1201  {
1202  value_type key; // Key of Loser element (winner for 0)
1203  unsigned_type index; // number of losing segment
1204  };
1205 
1206  comparator_type cmp;
1207  // stack of free segment indices
1209 
1210  unsigned_type size_; // total number of elements stored
1211  unsigned_type logK; // log of current tree size
1212  unsigned_type k; // invariant (k == 1 << logK), always a power of two
1213 
1214  Element sentinel; // target of free segment pointers
1215 
1216  // upper levels of loser trees
1217  // entry[0] contains the winner info
1218  Entry entry[KNKMAX];
1219 
1220  // leaf information
1221  // note that Knuth uses indices k..k-1
1222  // while we use 0..k-1
1223  Element * current[KNKMAX]; // pointer to current element
1224  Element * segment[KNKMAX]; // start of Segments
1225  unsigned_type segment_size[KNKMAX]; // just to count the internal memory consumption
1226 
1227  unsigned_type mem_cons_;
1228 
1229  // private member functions
1230  unsigned_type initWinner(unsigned_type root);
1231  void update_on_insert(unsigned_type node, const Element & newKey, unsigned_type newIndex,
1232  Element * winnerKey, unsigned_type * winnerIndex, unsigned_type * mask);
1233  void deallocate_segment(unsigned_type slot);
1234  void doubleK();
1235  void compactTree();
1236  void rebuildLoserTree();
1237  bool is_segment_empty(unsigned_type slot);
1238  void multi_merge_k(Element * to, unsigned_type length);
1239 
1240  template <unsigned LogK>
1241  void multi_merge_f(Element * to, unsigned_type length)
1242  {
1243  //Entry *currentPos;
1244  //Element currentKey;
1245  //int currentIndex; // leaf pointed to by current entry
1246  Element * done = to + length;
1247  Entry * regEntry = entry;
1248  Element ** regStates = current;
1249  unsigned_type winnerIndex = regEntry[0].index;
1250  Element winnerKey = regEntry[0].key;
1251  Element * winnerPos;
1252  //Element sup = sentinel; // supremum
1253 
1254  assert(logK >= LogK);
1255  while (to != done)
1256  {
1257  winnerPos = regStates[winnerIndex];
1258 
1259  // write result
1260  *to = winnerKey;
1261 
1262  // advance winner segment
1263  ++winnerPos;
1264  regStates[winnerIndex] = winnerPos;
1265  winnerKey = *winnerPos;
1266 
1267  // remove winner segment if empty now
1268  if (is_sentinel(winnerKey))
1269  {
1270  deallocate_segment(winnerIndex);
1271  }
1272  ++to;
1273 
1274  // update loser tree
1275 #define TreeStep(L) \
1276  if (1 << LogK >= 1 << L) { \
1277  Entry * pos ## L = regEntry + ((winnerIndex + (1 << LogK)) >> (((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0)); \
1278  Element key ## L = pos ## L->key; \
1279  if (cmp(winnerKey, key ## L)) { \
1280  unsigned_type index ## L = pos ## L->index; \
1281  pos ## L->key = winnerKey; \
1282  pos ## L->index = winnerIndex; \
1283  winnerKey = key ## L; \
1284  winnerIndex = index ## L; \
1285  } \
1286  }
1287  TreeStep(10);
1288  TreeStep(9);
1289  TreeStep(8);
1290  TreeStep(7);
1291  TreeStep(6);
1292  TreeStep(5);
1293  TreeStep(4);
1294  TreeStep(3);
1295  TreeStep(2);
1296  TreeStep(1);
1297 #undef TreeStep
1298  }
1299  regEntry[0].index = winnerIndex;
1300  regEntry[0].key = winnerKey;
1301  }
1302 
1303  public:
1304  bool is_sentinel(const Element & a)
1305  {
1306  return !(cmp(cmp.min_value(), a));
1307  }
1308  bool not_sentinel(const Element & a)
1309  {
1310  return cmp(cmp.min_value(), a);
1311  }
1312 
1313  public:
1314  loser_tree();
1315  ~loser_tree();
1316  void init();
1317 
1318  void swap(loser_tree & obj)
1319  {
1320  std::swap(cmp, obj.cmp);
1321  std::swap(free_segments, obj.free_segments);
1322  std::swap(size_, obj.size_);
1323  std::swap(logK, obj.logK);
1324  std::swap(k, obj.k);
1325  std::swap(sentinel, obj.sentinel);
1326  swap_1D_arrays(entry, obj.entry, KNKMAX);
1327  swap_1D_arrays(current, obj.current, KNKMAX);
1328  swap_1D_arrays(segment, obj.segment, KNKMAX);
1329  swap_1D_arrays(segment_size, obj.segment_size, KNKMAX);
1330  std::swap(mem_cons_, obj.mem_cons_);
1331  }
1332 
1333  void multi_merge(Element * begin, Element * end)
1334  {
1335  multi_merge(begin, end - begin);
1336  }
1337  void multi_merge(Element *, unsigned_type length);
1338 
1339  unsigned_type mem_cons() const { return mem_cons_; }
1340 
1341  bool spaceIsAvailable() const // for new segment
1342  {
1343  return k < KNKMAX || !free_segments.empty();
1344  }
1345 
1346  void insert_segment(Element * to, unsigned_type sz); // insert segment beginning at to
1347  unsigned_type size() { return size_; }
1348  };
1349 
1351  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1352  loser_tree<ValTp_, Cmp_, KNKMAX>::loser_tree() : size_(0), logK(0), k(1), mem_cons_(0)
1353  {
1354  free_segments.push(0);
1355  segment[0] = 0;
1356  current[0] = &sentinel;
1357  // entry and sentinel are initialized by init
1358  // since they need the value of supremum
1359  init();
1360  }
1361 
1362  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1363  void loser_tree<ValTp_, Cmp_, KNKMAX>::init()
1364  {
1365  assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering
1366  sentinel = cmp.min_value();
1367  rebuildLoserTree();
1368  assert(current[entry[0].index] == &sentinel);
1369  }
1370 
1371 
1372 // rebuild loser tree information from the values in current
1373  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1374  void loser_tree<ValTp_, Cmp_, KNKMAX>::rebuildLoserTree()
1375  {
1376  assert(LOG2<KNKMAX>::floor == LOG2<KNKMAX>::ceil); // KNKMAX needs to be a power of two
1377  unsigned_type winner = initWinner(1);
1378  entry[0].index = winner;
1379  entry[0].key = *(current[winner]);
1380  }
1381 
1382 
1383 // given any values in the leaves this
1384 // routing recomputes upper levels of the tree
1385 // from scratch in linear time
1386 // initialize entry[root].index and the subtree rooted there
1387 // return winner index
1388  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1389  unsigned_type loser_tree<ValTp_, Cmp_, KNKMAX>::initWinner(unsigned_type root)
1390  {
1391  if (root >= k) { // leaf reached
1392  return root - k;
1393  } else {
1394  unsigned_type left = initWinner(2 * root);
1395  unsigned_type right = initWinner(2 * root + 1);
1396  Element lk = *(current[left]);
1397  Element rk = *(current[right]);
1398  if (!(cmp(lk, rk))) { // right subtree looses
1399  entry[root].index = right;
1400  entry[root].key = rk;
1401  return left;
1402  } else {
1403  entry[root].index = left;
1404  entry[root].key = lk;
1405  return right;
1406  }
1407  }
1408  }
1409 
1410 
1411 // first go up the tree all the way to the root
1412 // hand down old winner for the respective subtree
1413 // based on new value, and old winner and loser
1414 // update each node on the path to the root top down.
1415 // This is implemented recursively
1416  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1417  void loser_tree<ValTp_, Cmp_, KNKMAX>::update_on_insert(
1418  unsigned_type node,
1419  const Element & newKey,
1420  unsigned_type newIndex,
1421  Element * winnerKey,
1422  unsigned_type * winnerIndex, // old winner
1423  unsigned_type * mask) // 1 << (ceil(log KNK) - dist-from-root)
1424  {
1425  if (node == 0) { // winner part of root
1426  *mask = 1 << (logK - 1);
1427  *winnerKey = entry[0].key;
1428  *winnerIndex = entry[0].index;
1429  if (cmp(entry[node].key, newKey))
1430  {
1431  entry[node].key = newKey;
1432  entry[node].index = newIndex;
1433  }
1434  } else {
1435  update_on_insert(node >> 1, newKey, newIndex, winnerKey, winnerIndex, mask);
1436  Element loserKey = entry[node].key;
1437  unsigned_type loserIndex = entry[node].index;
1438  if ((*winnerIndex & *mask) != (newIndex & *mask)) { // different subtrees
1439  if (cmp(loserKey, newKey)) { // newKey will have influence here
1440  if (cmp(*winnerKey, newKey)) { // old winner loses here
1441  entry[node].key = *winnerKey;
1442  entry[node].index = *winnerIndex;
1443  } else { // new entry looses here
1444  entry[node].key = newKey;
1445  entry[node].index = newIndex;
1446  }
1447  }
1448  *winnerKey = loserKey;
1449  *winnerIndex = loserIndex;
1450  }
1451  // note that nothing needs to be done if
1452  // the winner came from the same subtree
1453  // a) newKey <= winnerKey => even more reason for the other tree to loose
1454  // b) newKey > winnerKey => the old winner will beat the new
1455  // entry further down the tree
1456  // also the same old winner is handed down the tree
1457 
1458  *mask >>= 1; // next level
1459  }
1460  }
1461 
1462 
1463 // make the tree two times as wide
1464  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1465  void loser_tree<ValTp_, Cmp_, KNKMAX>::doubleK()
1466  {
1467  STXXL_VERBOSE3("loser_tree::doubleK (before) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_segments.size());
1468  assert(k > 0);
1469  assert(k < KNKMAX);
1470  assert(free_segments.empty()); // stack was free (probably not needed)
1471 
1472  // make all new entries free
1473  // and push them on the free stack
1474  for (unsigned_type i = 2 * k - 1; i >= k; i--) // backwards
1475  {
1476  current[i] = &sentinel;
1477  segment[i] = NULL;
1478  free_segments.push(i);
1479  }
1480 
1481  // double the size
1482  k *= 2;
1483  logK++;
1484 
1485  STXXL_VERBOSE3("loser_tree::doubleK (after) k=" << k << " logK=" << logK << " KNKMAX=" << KNKMAX << " #free=" << free_segments.size());
1486  assert(!free_segments.empty());
1487 
1488  // recompute loser tree information
1489  rebuildLoserTree();
1490  }
1491 
1492 
1493 // compact nonempty segments in the left half of the tree
1494  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1495  void loser_tree<ValTp_, Cmp_, KNKMAX>::compactTree()
1496  {
1497  STXXL_VERBOSE3("loser_tree::compactTree (before) k=" << k << " logK=" << logK << " #free=" << free_segments.size());
1498  assert(logK > 0);
1499 
1500  // compact all nonempty segments to the left
1501  unsigned_type from = 0;
1502  unsigned_type to = 0;
1503  for ( ; from < k; from++)
1504  {
1505  if (not_sentinel(*(current[from])))
1506  {
1507  segment_size[to] = segment_size[from];
1508  current[to] = current[from];
1509  segment[to] = segment[from];
1510  to++;
1511  }/*
1512  else
1513  {
1514  if(segment[from])
1515  {
1516  STXXL_VERBOSE2("loser_tree::compactTree() deleting segment "<<from<<
1517  " address: "<<segment[from]<<" size: "<<segment_size[from]);
1518  delete [] segment[from];
1519  segment[from] = 0;
1520  mem_cons_ -= segment_size[from];
1521  }
1522  }*/
1523  }
1524 
1525  // half degree as often as possible
1526  while (k > 1 && to <= (k / 2)) {
1527  k /= 2;
1528  logK--;
1529  }
1530 
1531  // overwrite garbage and compact the stack of free segment indices
1532  free_segments.clear(); // none free
1533  for ( ; to < k; to++) {
1534  current[to] = &sentinel;
1535  free_segments.push(to);
1536  }
1537 
1538  STXXL_VERBOSE3("loser_tree::compactTree (after) k=" << k << " logK=" << logK << " #free=" << free_segments.size());
1539 
1540  // recompute loser tree information
1541  rebuildLoserTree();
1542  }
1543 
1544 
1545 // insert segment beginning at to
1546 // require: spaceIsAvailable() == 1
1547  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1548  void loser_tree<ValTp_, Cmp_, KNKMAX>::insert_segment(Element * to, unsigned_type sz)
1549  {
1550  STXXL_VERBOSE2("loser_tree::insert_segment(" << to << "," << sz << ")");
1551  //std::copy(to,to + sz,std::ostream_iterator<ValTp_>(std::cout, "\n"));
1552 
1553  if (sz > 0)
1554  {
1555  assert(not_sentinel(to[0]));
1556  assert(not_sentinel(to[sz - 1]));
1557  assert(is_sentinel(to[sz]));
1558 
1559  // get a free slot
1560  if (free_segments.empty()) { // tree is too small
1561  doubleK();
1562  }
1563  assert(!free_segments.empty());
1564  unsigned_type index = free_segments.top();
1565  free_segments.pop();
1566 
1567 
1568  // link new segment
1569  current[index] = segment[index] = to;
1570  segment_size[index] = (sz + 1) * sizeof(value_type);
1571  mem_cons_ += (sz + 1) * sizeof(value_type);
1572  size_ += sz;
1573 
1574  // propagate new information up the tree
1575  Element dummyKey;
1576  unsigned_type dummyIndex;
1577  unsigned_type dummyMask;
1578  update_on_insert((index + k) >> 1, *to, index,
1579  &dummyKey, &dummyIndex, &dummyMask);
1580  } else {
1581  // immediately deallocate
1582  // this is not only an optimization
1583  // but also needed to keep free segments from
1584  // clogging up the tree
1585  delete[] to;
1586  }
1587  }
1588 
1589 
1590  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1591  loser_tree<ValTp_, Cmp_, KNKMAX>::~loser_tree()
1592  {
1593  STXXL_VERBOSE1("loser_tree::~loser_tree()");
1594  for (unsigned_type i = 0; i < k; ++i)
1595  {
1596  if (segment[i])
1597  {
1598  STXXL_VERBOSE2("loser_tree::~loser_tree() deleting segment " << i);
1599  delete[] segment[i];
1600  mem_cons_ -= segment_size[i];
1601  }
1602  }
1603  // check whether we did not loose memory
1604  assert(mem_cons_ == 0);
1605  }
1606 
1607 // free an empty segment .
1608  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1609  void loser_tree<ValTp_, Cmp_, KNKMAX>::deallocate_segment(unsigned_type slot)
1610  {
1611  // reroute current pointer to some empty sentinel segment
1612  // with a sentinel key
1613  STXXL_VERBOSE2("loser_tree::deallocate_segment() deleting segment " <<
1614  slot << " address: " << segment[slot] << " size: " << segment_size[slot]);
1615  current[slot] = &sentinel;
1616 
1617  // free memory
1618  delete[] segment[slot];
1619  segment[slot] = NULL;
1620  mem_cons_ -= segment_size[slot];
1621 
1622  // push on the stack of free segment indices
1623  free_segments.push(slot);
1624  }
1625 
1626 
1627 // delete the length smallest elements and write them to "to"
1628 // empty segments are deallocated
1629 // require:
1630 // - there are at least length elements
1631 // - segments are ended by sentinels
1632  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1633  void loser_tree<ValTp_, Cmp_, KNKMAX>::multi_merge(Element * to, unsigned_type length)
1634  {
1635  STXXL_VERBOSE3("loser_tree::multi_merge(to=" << to << ", len=" << length << ") k=" << k);
1636 
1637  if (length == 0)
1638  return;
1639 
1640  assert(k > 0);
1641  assert(length <= size_);
1642 
1643  //This is the place to make statistics about internal multi_merge calls.
1644 
1645  switch (logK) {
1646  case 0:
1647  assert(k == 1);
1648  assert(entry[0].index == 0);
1649  assert(free_segments.empty());
1650  //memcpy(to, current[0], length * sizeof(Element));
1651  std::copy(current[0], current[0] + length, to);
1652  current[0] += length;
1653  entry[0].key = **current;
1654  if (is_segment_empty(0))
1655  deallocate_segment(0);
1656 
1657  break;
1658  case 1:
1659  assert(k == 2);
1660  merge_iterator(current[0], current[1], to, length, cmp);
1661  rebuildLoserTree();
1662  if (is_segment_empty(0))
1663  deallocate_segment(0);
1664 
1665  if (is_segment_empty(1))
1666  deallocate_segment(1);
1667 
1668  break;
1669  case 2:
1670  assert(k == 4);
1671  if (is_segment_empty(3))
1672  merge3_iterator(current[0], current[1], current[2], to, length, cmp);
1673  else
1674  merge4_iterator(current[0], current[1], current[2], current[3], to, length, cmp);
1675 
1676  rebuildLoserTree();
1677  if (is_segment_empty(0))
1678  deallocate_segment(0);
1679 
1680  if (is_segment_empty(1))
1681  deallocate_segment(1);
1682 
1683  if (is_segment_empty(2))
1684  deallocate_segment(2);
1685 
1686  if (is_segment_empty(3))
1687  deallocate_segment(3);
1688 
1689  break;
1690  case 3: multi_merge_f<3>(to, length);
1691  break;
1692  case 4: multi_merge_f<4>(to, length);
1693  break;
1694  case 5: multi_merge_f<5>(to, length);
1695  break;
1696  case 6: multi_merge_f<6>(to, length);
1697  break;
1698  case 7: multi_merge_f<7>(to, length);
1699  break;
1700  case 8: multi_merge_f<8>(to, length);
1701  break;
1702  case 9: multi_merge_f<9>(to, length);
1703  break;
1704  case 10: multi_merge_f<10>(to, length);
1705  break;
1706  default: multi_merge_k(to, length);
1707  break;
1708  }
1709 
1710 
1711  size_ -= length;
1712 
1713  // compact tree if it got considerably smaller
1714  {
1715  const unsigned_type num_segments_used = k - free_segments.size();
1716  const unsigned_type num_segments_trigger = k - (3 * k / 5);
1717  // using k/2 would be worst case inefficient (for large k)
1718  // for k \in {2, 4, 8} the trigger is k/2 which is good
1719  // because we have special mergers for k \in {1, 2, 4}
1720  // there is also a special 3-way-merger, that will be
1721  // triggered if k == 4 && is_segment_empty(3)
1722  STXXL_VERBOSE3("loser_tree compact? k=" << k << " #used=" << num_segments_used
1723  << " <= #trigger=" << num_segments_trigger << " ==> "
1724  << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ")
1725  << " || "
1726  << ((k == 4 && !free_segments.empty() && !is_segment_empty(3)) ? "yes" : "no ")
1727  << " #free=" << free_segments.size());
1728  if (k > 1 && ((num_segments_used <= num_segments_trigger) ||
1729  (k == 4 && !free_segments.empty() && !is_segment_empty(3))))
1730  {
1731  compactTree();
1732  }
1733  }
1734  //std::copy(to,to + length,std::ostream_iterator<ValTp_>(std::cout, "\n"));
1735  }
1736 
1737 
1738 // is this segment empty and does not point to sentinel yet?
1739  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1740  inline bool loser_tree<ValTp_, Cmp_, KNKMAX>::is_segment_empty(unsigned_type slot)
1741  {
1742  return (is_sentinel(*(current[slot])) && (current[slot] != &sentinel));
1743  }
1744 
1745 // multi-merge for arbitrary K
1746  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1747  void loser_tree<ValTp_, Cmp_, KNKMAX>::
1748  multi_merge_k(Element * to, unsigned_type length)
1749  {
1750  Entry * currentPos;
1751  Element currentKey;
1752  unsigned_type currentIndex; // leaf pointed to by current entry
1753  unsigned_type kReg = k;
1754  Element * done = to + length;
1755  unsigned_type winnerIndex = entry[0].index;
1756  Element winnerKey = entry[0].key;
1757  Element * winnerPos;
1758 
1759  while (to != done)
1760  {
1761  winnerPos = current[winnerIndex];
1762 
1763  // write result
1764  *to = winnerKey;
1765 
1766  // advance winner segment
1767  ++winnerPos;
1768  current[winnerIndex] = winnerPos;
1769  winnerKey = *winnerPos;
1770 
1771  // remove winner segment if empty now
1772  if (is_sentinel(winnerKey)) //
1773  deallocate_segment(winnerIndex);
1774 
1775 
1776  // go up the entry-tree
1777  for (unsigned_type i = (winnerIndex + kReg) >> 1; i > 0; i >>= 1) {
1778  currentPos = entry + i;
1779  currentKey = currentPos->key;
1780  if (cmp(winnerKey, currentKey)) {
1781  currentIndex = currentPos->index;
1782  currentPos->key = winnerKey;
1783  currentPos->index = winnerIndex;
1784  winnerKey = currentKey;
1785  winnerIndex = currentIndex;
1786  }
1787  }
1788 
1789  ++to;
1790  }
1791  entry[0].index = winnerIndex;
1792  entry[0].key = winnerKey;
1793  }
1794 }
1795 
1796 /*
1797 
1798  KNBufferSize1 = 32;
1799  KNN = 512; // bandwidth
1800  KNKMAX = 64; // maximal arity
1801  KNLevels = 4; // overall capacity >= KNN*KNKMAX^KNLevels
1802  LogKNKMAX = 6; // ceil(log KNK)
1803  */
1804 
1805 // internal memory consumption >= N_*(KMAX_^Levels_) + ext
1806 
1807 template <
1808  class Tp_,
1809  class Cmp_,
1810  unsigned BufferSize1_ = 32, // equalize procedure call overheads etc.
1811  unsigned N_ = 512, // bandwidth
1812  unsigned IntKMAX_ = 64, // maximal arity for internal mergers
1813  unsigned IntLevels_ = 4,
1814  unsigned BlockSize_ = (2 * 1024 * 1024),
1815  unsigned ExtKMAX_ = 64, // maximal arity for external mergers
1816  unsigned ExtLevels_ = 2,
1817  class AllocStr_ = STXXL_DEFAULT_ALLOC_STRATEGY
1818  >
1819 struct priority_queue_config
1820 {
1821  typedef Tp_ value_type;
1822  typedef Cmp_ comparator_type;
1823  typedef AllocStr_ alloc_strategy_type;
1824  enum
1825  {
1826  BufferSize1 = BufferSize1_,
1827  N = N_,
1828  IntKMAX = IntKMAX_,
1829  IntLevels = IntLevels_,
1830  ExtLevels = ExtLevels_,
1831  BlockSize = BlockSize_,
1832  ExtKMAX = ExtKMAX_,
1833  E = sizeof(Tp_)
1834  };
1835 };
1836 
1837 __STXXL_END_NAMESPACE
1838 
1839 namespace std
1840 {
1841  template <class BlockType_,
1842  class Cmp_,
1843  unsigned Arity_,
1844  class AllocStr_>
1845  void swap(stxxl::priority_queue_local::ext_merger<BlockType_, Cmp_, Arity_, AllocStr_> & a,
1846  stxxl::priority_queue_local::ext_merger<BlockType_, Cmp_, Arity_, AllocStr_> & b)
1847  {
1848  a.swap(b);
1849  }
1850  template <class ValTp_, class Cmp_, unsigned KNKMAX>
1851  void swap(stxxl::priority_queue_local::loser_tree<ValTp_, Cmp_, KNKMAX> & a,
1852  stxxl::priority_queue_local::loser_tree<ValTp_, Cmp_, KNKMAX> & b)
1853  {
1854  a.swap(b);
1855  }
1856 }
1857 
1858 __STXXL_BEGIN_NAMESPACE
1859 
1861 template <class Config_>
1862 class priority_queue : private noncopyable
1863 {
1864 public:
1865  typedef Config_ Config;
1866  enum
1867  {
1868  BufferSize1 = Config::BufferSize1,
1869  N = Config::N,
1870  IntKMAX = Config::IntKMAX,
1871  IntLevels = Config::IntLevels,
1872  ExtLevels = Config::ExtLevels,
1873  Levels = Config::IntLevels + Config::ExtLevels,
1874  BlockSize = Config::BlockSize,
1875  ExtKMAX = Config::ExtKMAX
1876  };
1877 
1879  typedef typename Config::value_type value_type;
1881  typedef typename Config::comparator_type comparator_type;
1882  typedef typename Config::alloc_strategy_type alloc_strategy_type;
1884  typedef stxxl::uint64 size_type;
1886 
1887 protected:
1890 
1892  value_type,
1894  IntKMAX> int_merger_type;
1895 
1897  block_type,
1899  ExtKMAX,
1900  alloc_strategy_type> ext_merger_type;
1901 
1902 
1903  int_merger_type itree[IntLevels];
1904  prefetch_pool<block_type> & p_pool;
1905  write_pool<block_type> & w_pool;
1906  ext_merger_type * etree;
1907 
1908  // one delete buffer for each tree => group buffer
1909  value_type buffer2[Levels][N + 1]; // tree->buffer2->buffer1 (extra space for sentinel)
1910  value_type * minBuffer2[Levels]; // minBuffer2[i] is current start of buffer2[i], end is buffer2[i] + N
1911 
1912  // overall delete buffer
1913  value_type buffer1[BufferSize1 + 1];
1914  value_type * minBuffer1; // is current start of buffer1, end is buffer1 + BufferSize1
1915 
1916  comparator_type cmp;
1917 
1918  // insert buffer
1919  insert_heap_type insertHeap;
1920 
1921  // how many levels are active
1922  unsigned_type activeLevels;
1923 
1924  // total size not counting insertBuffer and buffer1
1925  size_type size_;
1926  bool deallocate_pools;
1927 
1928  // private member functions
1929  void refillBuffer1();
1930  unsigned_type refillBuffer2(unsigned_type k);
1931 
1932  unsigned_type makeSpaceAvailable(unsigned_type level);
1933  void emptyInsertHeap();
1934 
1935  value_type getSupremum() const { return cmp.min_value(); } //{ return buffer2[0][KNN].key; }
1936  unsigned_type getSize1() const { return (buffer1 + BufferSize1) - minBuffer1; }
1937  unsigned_type getSize2(unsigned_type i) const { return &(buffer2[i][N]) - minBuffer2[i]; }
1938 
1939 public:
1950 
1960  priority_queue(unsigned_type p_pool_mem, unsigned_type w_pool_mem);
1961 
1962  void swap(priority_queue & obj)
1963  {
1964  //swap_1D_arrays(itree,obj.itree,IntLevels); // does not work in g++ 3.4.3 :( bug?
1965  for (unsigned_type i = 0; i < IntLevels; ++i)
1966  std::swap(itree[i], obj.itree[i]);
1967 
1968  // std::swap(p_pool,obj.p_pool);
1969  // std::swap(w_pool,obj.w_pool);
1970  std::swap(etree, obj.etree);
1971  for (unsigned_type i1 = 0; i1 < Levels; ++i1)
1972  for (unsigned_type i2 = 0; i2 < (N + 1); ++i2)
1973  std::swap(buffer2[i1][i2], obj.buffer2[i1][i2]);
1974 
1975  swap_1D_arrays(minBuffer2, obj.minBuffer2, Levels);
1976  swap_1D_arrays(buffer1, obj.buffer1, BufferSize1 + 1);
1977  std::swap(minBuffer1, obj.minBuffer1);
1978  std::swap(cmp, obj.cmp);
1979  std::swap(insertHeap, obj.insertHeap);
1980  std::swap(activeLevels, obj.activeLevels);
1981  std::swap(size_, obj.size_);
1982  //std::swap(deallocate_pools,obj.deallocate_pools);
1983  }
1984 
1985  virtual ~priority_queue();
1986 
1989  size_type size() const;
1990 
1993  bool empty() const { return (size() == 0); }
1994 
2006  const value_type & top() const;
2007 
2014  void pop();
2015 
2020  void push(const value_type & obj);
2021 
2026  unsigned_type mem_cons() const
2027  {
2028  unsigned_type dynam_alloc_mem(0), i(0);
2029  //dynam_alloc_mem += w_pool.mem_cons();
2030  //dynam_alloc_mem += p_pool.mem_cons();
2031  for ( ; i < IntLevels; ++i)
2032  dynam_alloc_mem += itree[i].mem_cons();
2033 
2034  for (i = 0; i < ExtLevels; ++i)
2035  dynam_alloc_mem += etree[i].mem_cons();
2036 
2037 
2038  return (sizeof(*this) +
2039  sizeof(ext_merger_type) * ExtLevels +
2040  dynam_alloc_mem);
2041  }
2042 };
2043 
2044 
2045 template <class Config_>
2047 {
2048  return size_ +
2049  insertHeap.size() - 1 +
2050  ((buffer1 + BufferSize1) - minBuffer1);
2051 }
2052 
2053 
2054 template <class Config_>
2056 {
2057  assert(!insertHeap.empty());
2058 
2059  const typename priority_queue<Config_>::value_type & t = insertHeap.top();
2060  if (/*(!insertHeap.empty()) && */ cmp(*minBuffer1, t))
2061  return t;
2062 
2063 
2064  return *minBuffer1;
2065 }
2066 
2067 template <class Config_>
2069 {
2070  //STXXL_VERBOSE1("priority_queue::pop()");
2071  assert(!insertHeap.empty());
2072 
2073  if (/*(!insertHeap.empty()) && */ cmp(*minBuffer1, insertHeap.top()))
2074  {
2075  insertHeap.pop();
2076  }
2077  else
2078  {
2079  assert(minBuffer1 < buffer1 + BufferSize1);
2080  ++minBuffer1;
2081  if (minBuffer1 == buffer1 + BufferSize1)
2082  refillBuffer1();
2083  }
2084 }
2085 
2086 template <class Config_>
2087 inline void priority_queue<Config_>::push(const value_type & obj)
2088 {
2089  //STXXL_VERBOSE3("priority_queue::push("<< obj <<")");
2090  assert(itree->not_sentinel(obj));
2091  if (insertHeap.size() == N + 1)
2092  emptyInsertHeap();
2093 
2094 
2095  assert(!insertHeap.empty());
2096 
2097  insertHeap.push(obj);
2098 }
2099 
2100 
2102 
2103 template <class Config_>
2105  p_pool(p_pool_), w_pool(w_pool_),
2106  insertHeap(N + 2),
2107  activeLevels(0), size_(0),
2108  deallocate_pools(false)
2109 {
2110  STXXL_VERBOSE2("priority_queue::priority_queue()");
2111  assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering
2112  //etree = new ext_merger_type[ExtLevels](p_pool,w_pool);
2113  etree = new ext_merger_type[ExtLevels];
2114  for (unsigned_type j = 0; j < ExtLevels; ++j)
2115  etree[j].set_pools(&p_pool, &w_pool);
2116 
2117  value_type sentinel = cmp.min_value();
2118  buffer1[BufferSize1] = sentinel; // sentinel
2119  insertHeap.push(sentinel); // always keep the sentinel
2120  minBuffer1 = buffer1 + BufferSize1; // empty
2121  for (unsigned_type i = 0; i < Levels; i++)
2122  {
2123  buffer2[i][N] = sentinel; // sentinel
2124  minBuffer2[i] = &(buffer2[i][N]); // empty
2125  }
2126 }
2127 
2128 template <class Config_>
2129 priority_queue<Config_>::priority_queue(unsigned_type p_pool_mem, unsigned_type w_pool_mem) :
2130  p_pool(*(new prefetch_pool<block_type>(p_pool_mem / BlockSize))),
2131  w_pool(*(new write_pool<block_type>(w_pool_mem / BlockSize))),
2132  insertHeap(N + 2),
2133  activeLevels(0), size_(0),
2134  deallocate_pools(true)
2135 {
2136  STXXL_VERBOSE2("priority_queue::priority_queue()");
2137  assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering
2138  etree = new ext_merger_type[ExtLevels];
2139  for (unsigned_type j = 0; j < ExtLevels; ++j)
2140  etree[j].set_pools(&p_pool, &w_pool);
2141 
2142  value_type sentinel = cmp.min_value();
2143  buffer1[BufferSize1] = sentinel; // sentinel
2144  insertHeap.push(sentinel); // always keep the sentinel
2145  minBuffer1 = buffer1 + BufferSize1; // empty
2146  for (unsigned_type i = 0; i < Levels; i++)
2147  {
2148  buffer2[i][N] = sentinel; // sentinel
2149  minBuffer2[i] = &(buffer2[i][N]); // empty
2150  }
2151 }
2152 
2153 template <class Config_>
2155 {
2156  STXXL_VERBOSE2("priority_queue::~priority_queue()");
2157  if (deallocate_pools)
2158  {
2159  delete &p_pool;
2160  delete &w_pool;
2161  }
2162 
2163  delete[] etree;
2164 }
2165 
2166 //--------------------- Buffer refilling -------------------------------
2167 
2168 // refill buffer2[j] and return number of elements found
2169 template <class Config_>
2170 unsigned_type priority_queue<Config_>::refillBuffer2(unsigned_type j)
2171 {
2172  STXXL_VERBOSE2("priority_queue::refillBuffer2(" << j << ")");
2173 
2174  value_type * oldTarget;
2175  unsigned_type deleteSize;
2176  size_type treeSize = (j < IntLevels) ? itree[j].size() : etree[j - IntLevels].size(); //elements left in segments
2177  unsigned_type bufferSize = buffer2[j] + N - minBuffer2[j]; //elements left in target buffer
2178  if (treeSize + bufferSize >= size_type(N))
2179  { // buffer will be filled completely
2180  oldTarget = buffer2[j];
2181  deleteSize = N - bufferSize;
2182  }
2183  else
2184  {
2185  oldTarget = buffer2[j] + N - treeSize - bufferSize;
2186  deleteSize = treeSize;
2187  }
2188 
2189  if (deleteSize > 0)
2190  {
2191  // shift rest to beginning
2192  // possible hack:
2193  // - use memcpy if no overlap
2194  memmove(oldTarget, minBuffer2[j], bufferSize * sizeof(value_type));
2195  minBuffer2[j] = oldTarget;
2196 
2197  // fill remaining space from tree
2198  if (j < IntLevels)
2199  itree[j].multi_merge(oldTarget + bufferSize, deleteSize);
2200 
2201  else
2202  {
2203  //external
2204  etree[j - IntLevels].multi_merge(oldTarget + bufferSize,
2205  oldTarget + bufferSize + deleteSize);
2206  }
2207  }
2208 
2209 
2210  //STXXL_MSG(deleteSize + bufferSize);
2211  //std::copy(oldTarget,oldTarget + deleteSize + bufferSize,std::ostream_iterator<value_type>(std::cout, "\n"));
2212 
2213  return deleteSize + bufferSize;
2214 }
2215 
2216 
2217 // move elements from the 2nd level buffers
2218 // to the buffer
2219 template <class Config_>
2221 {
2222  STXXL_VERBOSE2("priority_queue::refillBuffer1()");
2223 
2224  size_type totalSize = 0;
2225  unsigned_type sz;
2226  //activeLevels is <= 4
2227  for (int_type i = activeLevels - 1; i >= 0; i--)
2228  {
2229  if ((buffer2[i] + N) - minBuffer2[i] < BufferSize1)
2230  {
2231  sz = refillBuffer2(i);
2232  // max active level dry now?
2233  if (sz == 0 && unsigned_type(i) == activeLevels - 1)
2234  --activeLevels;
2235 
2236  else
2237  totalSize += sz;
2238  }
2239  else
2240  {
2241  totalSize += BufferSize1; // actually only a sufficient lower bound
2242  }
2243  }
2244 
2245  if (totalSize >= BufferSize1) // buffer can be filled completely
2246  {
2247  minBuffer1 = buffer1;
2248  sz = BufferSize1; // amount to be copied
2249  size_ -= size_type(BufferSize1); // amount left in buffer2
2250  }
2251  else
2252  {
2253  minBuffer1 = buffer1 + BufferSize1 - totalSize;
2254  sz = totalSize;
2255  assert(size_ == size_type(sz)); // trees and buffer2 get empty
2256  size_ = 0;
2257  }
2258 
2259  // now call simplified refill routines
2260  // which can make the assumption that
2261  // they find all they are asked to find in the buffers
2262  minBuffer1 = buffer1 + BufferSize1 - sz;
2263  STXXL_VERBOSE2("Active levels = " << activeLevels);
2264  switch (activeLevels)
2265  {
2266  case 0: break;
2267  case 1:
2268  std::copy(minBuffer2[0], minBuffer2[0] + sz, minBuffer1);
2269  minBuffer2[0] += sz;
2270  break;
2271  case 2:
2272  priority_queue_local::merge_iterator(
2273  minBuffer2[0],
2274  minBuffer2[1], minBuffer1, sz, cmp);
2275  break;
2276  case 3:
2277  priority_queue_local::merge3_iterator(
2278  minBuffer2[0],
2279  minBuffer2[1],
2280  minBuffer2[2], minBuffer1, sz, cmp);
2281  break;
2282  case 4:
2283  priority_queue_local::merge4_iterator(
2284  minBuffer2[0],
2285  minBuffer2[1],
2286  minBuffer2[2],
2287  minBuffer2[3], minBuffer1, sz, cmp); //side effect free
2288  break;
2289  default:
2290  STXXL_THROW(std::runtime_error, "priority_queue<...>::refillBuffer1()",
2291  "Overflow! The number of buffers on 2nd level in stxxl::priority_queue is currently limited to 4");
2292  }
2293 
2294  //std::copy(minBuffer1,minBuffer1 + sz,std::ostream_iterator<value_type>(std::cout, "\n"));
2295 }
2296 
2297 //--------------------------------------------------------------------
2298 
2299 // check if space is available on level k and
2300 // empty this level if necessary leading to a recursive call.
2301 // return the level where space was finally available
2302 template <class Config_>
2303 unsigned_type priority_queue<Config_>::makeSpaceAvailable(unsigned_type level)
2304 {
2305  STXXL_VERBOSE2("priority_queue::makeSpaceAvailable(" << level << ")");
2306  unsigned_type finalLevel;
2307  assert(level < Levels);
2308  assert(level <= activeLevels);
2309 
2310  if (level == activeLevels)
2311  activeLevels++;
2312 
2313 
2314  const bool spaceIsAvailable_ =
2315  (level < IntLevels) ? itree[level].spaceIsAvailable()
2316  : ((level == Levels - 1) ? true : (etree[level - IntLevels].spaceIsAvailable()));
2317 
2318  if (spaceIsAvailable_)
2319  {
2320  finalLevel = level;
2321  }
2322  else
2323  {
2324  finalLevel = makeSpaceAvailable(level + 1);
2325 
2326  if (level < IntLevels - 1) // from internal to internal tree
2327  {
2328  unsigned_type segmentSize = itree[level].size();
2329  value_type * newSegment = new value_type[segmentSize + 1];
2330  itree[level].multi_merge(newSegment, segmentSize); // empty this level
2331 
2332  newSegment[segmentSize] = buffer1[BufferSize1]; // sentinel
2333  // for queues where size << #inserts
2334  // it might make sense to stay in this level if
2335  // segmentSize < alpha * KNN * k^level for some alpha < 1
2336  itree[level + 1].insert_segment(newSegment, segmentSize);
2337  }
2338  else
2339  {
2340  if (level == IntLevels - 1) // from internal to external tree
2341  {
2342  const unsigned_type segmentSize = itree[IntLevels - 1].size();
2343  STXXL_VERBOSE1("Inserting segment into first level external: " << level << " " << segmentSize);
2344  etree[0].insert_segment(itree[IntLevels - 1], segmentSize);
2345  }
2346  else // from external to external tree
2347  {
2348  const size_type segmentSize = etree[level - IntLevels].size();
2349  STXXL_VERBOSE1("Inserting segment into second level external: " << level << " " << segmentSize);
2350  etree[level - IntLevels + 1].insert_segment(etree[level - IntLevels], segmentSize);
2351  }
2352  }
2353  }
2354  return finalLevel;
2355 }
2356 
2357 
2358 // empty the insert heap into the main data structure
2359 template <class Config_>
2361 {
2362  STXXL_VERBOSE2("priority_queue::emptyInsertHeap()");
2363  assert(insertHeap.size() == (N + 1));
2364 
2365  const value_type sup = getSupremum();
2366 
2367  // build new segment
2368  value_type * newSegment = new value_type[N + 1];
2369  value_type * newPos = newSegment;
2370 
2371  // put the new data there for now
2372  //insertHeap.sortTo(newSegment);
2373  value_type * SortTo = newSegment;
2374 
2375  insertHeap.sort_to(SortTo);
2376 
2377  SortTo = newSegment + N;
2378  insertHeap.clear();
2379  insertHeap.push(*SortTo);
2380 
2381  assert(insertHeap.size() == 1);
2382 
2383  newSegment[N] = sup; // sentinel
2384 
2385  // copy the buffer1 and buffer2[0] to temporary storage
2386  // (the temporary can be eliminated using some dirty tricks)
2387  const unsigned_type tempSize = N + BufferSize1;
2388  value_type temp[tempSize + 1];
2389  unsigned_type sz1 = getSize1();
2390  unsigned_type sz2 = getSize2(0);
2391  value_type * pos = temp + tempSize - sz1 - sz2;
2392  std::copy(minBuffer1, minBuffer1 + sz1, pos);
2393  std::copy(minBuffer2[0], minBuffer2[0] + sz2, pos + sz1);
2394  temp[tempSize] = sup; // sentinel
2395 
2396  // refill buffer1
2397  // (using more complicated code it could be made somewhat fuller
2398  // in certain circumstances)
2399  priority_queue_local::merge_iterator(pos, newPos, minBuffer1, sz1, cmp);
2400 
2401  // refill buffer2[0]
2402  // (as above we might want to take the opportunity
2403  // to make buffer2[0] fuller)
2404  priority_queue_local::merge_iterator(pos, newPos, minBuffer2[0], sz2, cmp);
2405 
2406  // merge the rest to the new segment
2407  // note that merge exactly trips into the footsteps
2408  // of itself
2409  priority_queue_local::merge_iterator(pos, newPos, newSegment, N, cmp);
2410 
2411  // and insert it
2412  unsigned_type freeLevel = makeSpaceAvailable(0);
2413  assert(freeLevel == 0 || itree[0].size() == 0);
2414  itree[0].insert_segment(newSegment, N);
2415 
2416  // get rid of invalid level 2 buffers
2417  // by inserting them into tree 0 (which is almost empty in this case)
2418  if (freeLevel > 0)
2419  {
2420  for (int_type i = freeLevel; i >= 0; i--)
2421  {
2422  // reverse order not needed
2423  // but would allow immediate refill
2424  newSegment = new value_type[getSize2(i) + 1]; // with sentinel
2425  std::copy(minBuffer2[i], minBuffer2[i] + getSize2(i) + 1, newSegment);
2426  itree[0].insert_segment(newSegment, getSize2(i));
2427  minBuffer2[i] = buffer2[i] + N; // empty
2428  }
2429  }
2430 
2431  // update size
2432  size_ += size_type(N);
2433 
2434  // special case if the tree was empty before
2435  if (minBuffer1 == buffer1 + BufferSize1)
2436  refillBuffer1();
2437 }
2438 
2439 namespace priority_queue_local
2440 {
2441  struct Parameters_for_priority_queue_not_found_Increase_IntM
2442  {
2443  enum { fits = false };
2444  typedef Parameters_for_priority_queue_not_found_Increase_IntM result;
2445  };
2446 
2447  struct dummy
2448  {
2449  enum { fits = false };
2450  typedef dummy result;
2451  };
2452 
2453  template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_, unsigned_type B_, unsigned_type m_, bool stop = false>
2454  struct find_B_m
2455  {
2456  typedef find_B_m<E_, IntM_, MaxS_, B_, m_, stop> Self;
2457  enum {
2458  k = IntM_ / B_, // number of blocks that fit into M
2459  E = E_, // element size
2460  IntM = IntM_,
2461  B = B_, // block size
2462  m = m_, // number of blocks fitting into buffers
2463  c = k - m_,
2464  // memory occ. by block must be at least 10 times larger than size of ext sequence
2465  // && satisfy memory req && if we have two ext mergers their degree must be at least 64=m/2
2466  fits = c > 10 && ((k - m) * (m) * (m * B / (E * 4 * 1024))) >= MaxS_ && (MaxS_ < ((k - m) * m / (2 * E)) * 1024 || m >= 128),
2467  step = 1
2468  };
2469 
2470  typedef typename find_B_m<E, IntM, MaxS_, B, m + step, fits || (m >= k - step)>::result candidate1;
2471  typedef typename find_B_m<E, IntM, MaxS_, B / 2, 1, fits || candidate1::fits>::result candidate2;
2473  };
2474 
2475  // specialization for the case when no valid parameters are found
2476  template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_, bool stop>
2477  struct find_B_m<E_, IntM_, MaxS_, 2048, 1, stop>
2478  {
2479  enum { fits = false };
2480  typedef Parameters_for_priority_queue_not_found_Increase_IntM result;
2481  };
2482 
2483  // to speedup search
2484  template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_, unsigned_type B_, unsigned_type m_>
2485  struct find_B_m<E_, IntM_, MaxS_, B_, m_, true>
2486  {
2487  enum { fits = false };
2488  typedef dummy result;
2489  };
2490 
2491  // E_ size of element in bytes
2492  template <unsigned_type E_, unsigned_type IntM_, unsigned_type MaxS_>
2493  struct find_settings
2494  {
2495  // start from block size (8*1024*1024) bytes
2496  typedef typename find_B_m<E_, IntM_, MaxS_, (8 * 1024 * 1024), 1>::result result;
2497  };
2498 
2499  struct Parameters_not_found_Try_to_change_the_Tune_parameter
2500  {
2501  typedef Parameters_not_found_Try_to_change_the_Tune_parameter result;
2502  };
2503 
2504 
2505  template <unsigned_type AI_, unsigned_type X_, unsigned_type CriticalSize_>
2506  struct compute_N
2507  {
2508  typedef compute_N<AI_, X_, CriticalSize_> Self;
2509  enum
2510  {
2511  X = X_,
2512  AI = AI_,
2513  N = X / (AI * AI) // two stage internal
2514  };
2515  typedef typename IF<(N >= CriticalSize_), Self, typename compute_N<AI / 2, X, CriticalSize_>::result>::result result;
2516  };
2517 
2518  template <unsigned_type X_, unsigned_type CriticalSize_>
2519  struct compute_N<1, X_, CriticalSize_>
2520  {
2521  typedef Parameters_not_found_Try_to_change_the_Tune_parameter result;
2522  };
2523 }
2524 
2526 
2529 
2531 
2595 template <class Tp_, class Cmp_, unsigned_type IntM_, unsigned MaxS_, unsigned Tune_ = 6>
2597 {
2598 public:
2599  // actual calculation of B, m, k and E
2600  typedef typename priority_queue_local::find_settings<sizeof(Tp_), IntM_, MaxS_>::result settings;
2601  enum {
2602  B = settings::B,
2603  m = settings::m,
2604  X = B * (settings::k - m) / settings::E, // interpretation of result
2605  Buffer1Size = 32 // fixed
2606  };
2607  // derivation of N, AI, AE
2608  typedef typename priority_queue_local::compute_N<(1 << Tune_), X, 4 * Buffer1Size>::result ComputeN;
2609  enum
2610  {
2611  N = ComputeN::N,
2612  AI = ComputeN::AI,
2613  AE = (m / 2 < 2) ? 2 : (m / 2) // at least 2
2614  };
2615  enum {
2616  // Estimation of maximum internal memory consumption (in bytes)
2617  EConsumption = X * settings::E + settings::B * AE + ((MaxS_ / X) / AE) * settings::B * 1024
2618  };
2619  /*
2620  unsigned BufferSize1_ = 32, // equalize procedure call overheads etc.
2621  unsigned N_ = 512, // bandwidth
2622  unsigned IntKMAX_ = 64, // maximal arity for internal mergers
2623  unsigned IntLevels_ = 4,
2624  unsigned BlockSize_ = (2*1024*1024),
2625  unsigned ExtKMAX_ = 64, // maximal arity for external mergers
2626  unsigned ExtLevels_ = 2,
2627  */
2629 };
2630 
2632 
2633 __STXXL_END_NAMESPACE
2634 
2635 
2636 namespace std
2637 {
2638  template <class Config_>
2639  void swap(stxxl::priority_queue<Config_> & a,
2640  stxxl::priority_queue<Config_> & b)
2641  {
2642  a.swap(b);
2643  }
2644 }
2645 
2646 #endif // !STXXL_PRIORITY_QUEUE_HEADER
2647 // vim: et:ts=4:sw=4