Stxxl  1.2.1
random_shuffle.h
1 /***************************************************************************
2  * include/stxxl/bits/algo/random_shuffle.h
3  *
4  * Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  * Copyright (C) 2007 Manuel Krings
7  * Copyright (C) 2007 Markus Westphal
8  *
9  * Distributed under the Boost Software License, Version 1.0.
10  * (See accompanying file LICENSE_1_0.txt or copy at
11  * http://www.boost.org/LICENSE_1_0.txt)
12  **************************************************************************/
13 
14 #ifndef STXXL_RANDOM_SHUFFLE_HEADER
15 #define STXXL_RANDOM_SHUFFLE_HEADER
16 
17 // TODO: improve main memory consumption in recursion
18 // (free stacks buffers)
19 
20 
21 #include <stxxl/bits/stream/stream.h>
22 #include <stxxl/scan>
23 #include <stxxl/stack>
24 
25 
26 __STXXL_BEGIN_NAMESPACE
27 
28 namespace random_shuffle_local
29 {
30  // efficiently writes data into an stxxl::vector with overlapping of I/O and
31  // computation
32  template <class ExtIterator>
33  class write_vector
34  {
35  typedef typename ExtIterator::size_type size_type;
36  typedef typename ExtIterator::value_type value_type;
37  typedef typename ExtIterator::block_type block_type;
38  typedef typename ExtIterator::const_iterator ConstExtIterator;
39  typedef stxxl::buf_ostream<block_type, typename ExtIterator::bids_container_iterator> buf_ostream_type;
40 
41  ExtIterator it;
42  unsigned_type nbuffers;
43  buf_ostream_type * outstream;
44 
45  public:
46  write_vector(ExtIterator begin,
47  unsigned nbuffers_ = 2 // buffers to use for overlapping (>=2 recommended)
48  ) : it(begin), nbuffers(nbuffers_)
49  {
50  outstream = new buf_ostream_type(it.bid(), nbuffers);
51  }
52 
53  value_type & operator * ()
54  {
55  if (it.block_offset() == 0)
56  it.touch();
57  // tells the vector that the block was modified
58  return **outstream;
59  }
60 
61  write_vector & operator ++ ()
62  {
63  ++it;
64  ++(*outstream);
65  return *this;
66  }
67 
68  void flush()
69  {
70  ConstExtIterator const_out = it;
71 
72  while (const_out.block_offset())
73  {
74  **outstream = *const_out; // might cause I/Os for loading the page that
75  ++const_out; // contains data beyond out
76  ++(*outstream);
77  }
78 
79  it.flush();
80  delete outstream;
81  outstream = NULL;
82  }
83 
84  virtual ~write_vector()
85  {
86  if (outstream)
87  flush();
88  }
89  };
90 }
91 
92 
95 
96 
97 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
98  unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_>
99 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
100  stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> beyond,
101  RandomNumberGenerator_ & rand,
102  unsigned_type M);
103 
104 
114 template <typename ExtIterator_,
115  typename RandomNumberGenerator_,
116  unsigned BlockSize_,
117  unsigned PageSize_,
118  typename AllocStrategy_>
119 void random_shuffle(ExtIterator_ first,
120  ExtIterator_ beyond,
121  RandomNumberGenerator_ & rand,
122  unsigned_type M,
123  AllocStrategy_ AS = STXXL_DEFAULT_ALLOC_STRATEGY())
124 {
125  typedef typename ExtIterator_::value_type value_type;
126  typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
127  stxxl::grow_shrink2, PageSize_,
128  BlockSize_, void, 0, AllocStrategy_>::result stack_type;
129  typedef typename stack_type::block_type block_type;
130 
131  STXXL_VERBOSE1("random_shuffle: Plain Version");
132 
133  stxxl::int64 n = beyond - first; // the number of input elements
134 
135  // make sure we have at least 6 blocks + 1 page
136  if (M < 6 * BlockSize_ + PageSize_ * BlockSize_)
137  M = 6 * BlockSize_ + PageSize_ * BlockSize_;
138 
139 
140  int_type k = M / (3 * BlockSize_); // number of buckets
141 
142 
143  stxxl::int64 i, j, size = 0;
144 
145  value_type * temp_array;
146  typedef typename stxxl::VECTOR_GENERATOR<value_type,
147  PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
148  temp_vector_type * temp_vector;
149 
150  stxxl::prefetch_pool<block_type> p_pool(0); // no read buffers
151  STXXL_VERBOSE1("random_shuffle: " << M / BlockSize_ - k << " write buffers for " << k << " buckets");
152  stxxl::write_pool<block_type> w_pool(M / BlockSize_ - k); // M/B-k write buffers
153 
154  stack_type ** buckets;
155 
156  // create and put buckets into container
157  buckets = new stack_type *[k];
158  for (j = 0; j < k; j++)
159  buckets[j] = new stack_type(p_pool, w_pool, 0);
160 
161 
163  typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream;
164  input_stream in = stxxl::stream::streamify(first, beyond);
165 
166  // distribute input into random buckets
167  int_type random_bucket = 0;
168  for (i = 0; i < n; ++i) {
169  random_bucket = rand(k);
170  buckets[random_bucket]->push(*in); // reading the current input element
171  ++in; // go to the next input element
172  }
173 
175  // resize buffers
176  w_pool.resize(0);
177  p_pool.resize(PageSize_);
178 
179  // Set prefetch aggr to PageSize_
180  for (int_type j = 0; j < k; j++) {
181  buckets[j]->set_prefetch_aggr(PageSize_);
182  }
183 
184  unsigned_type space_left = M - k * BlockSize_ -
185  PageSize_ * BlockSize_; // remaining int space
186  ExtIterator_ Writer = first;
187  ExtIterator_ it = first;
188 
189  for (i = 0; i < k; i++) {
190  STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
191  }
192 
193  // shuffle each bucket
194  for (i = 0; i < k; i++) {
195  size = buckets[i]->size();
196 
197  // does the bucket fit into memory?
198  if (size * sizeof(value_type) < space_left) {
199  STXXL_VERBOSE1("random_shuffle: no recursion");
200 
201  // copy bucket into temp. array
202  temp_array = new value_type[size];
203  for (j = 0; j < size; j++) {
204  temp_array[j] = buckets[i]->top();
205  buckets[i]->pop();
206  }
207 
208  // shuffle
209  std::random_shuffle(temp_array, temp_array + size, rand);
210 
211  // write back
212  for (j = 0; j < size; j++) {
213  *Writer = temp_array[j];
214  ++Writer;
215  }
216 
217  // free memory
218  delete[] temp_array;
219  }
220  else {
221  STXXL_VERBOSE1("random_shuffle: recursion");
222 
223  // copy bucket into temp. stxxl::vector
224  temp_vector = new temp_vector_type(size);
225 
226  for (j = 0; j < size; j++) {
227  (*temp_vector)[j] = buckets[i]->top();
228  buckets[i]->pop();
229  }
230 
231  p_pool.resize(0);
232  space_left += PageSize_ * BlockSize_;
233  STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
234 
235  // recursive shuffle
236  stxxl::random_shuffle(temp_vector->begin(),
237  temp_vector->end(), rand, space_left);
238 
239  p_pool.resize(PageSize_);
240 
241  // write back
242  for (j = 0; j < size; j++) {
243  *Writer = (*temp_vector)[j];
244  ++Writer;
245  }
246 
247  // free memory
248  delete temp_vector;
249  }
250  }
251 
252  // free buckets
253  for (int j = 0; j < k; j++)
254  delete buckets[j];
255 
256  delete[] buckets;
257 }
258 
264 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
265  unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_>
266 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
267  stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> beyond,
268  RandomNumberGenerator_ & rand,
269  unsigned_type M)
270 {
271  typedef stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> ExtIterator_;
272  typedef typename ExtIterator_::value_type value_type;
273  typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external,
274  stxxl::grow_shrink2, PageSize_, BlockSize_>::result stack_type;
275  typedef typename stack_type::block_type block_type;
276 
277  STXXL_VERBOSE1("random_shuffle: Vector Version");
278 
279  // make sure we have at least 6 blocks + 1 page
280  if (M < 6 * BlockSize_ + PageSize_ * BlockSize_)
281  M = 6 * BlockSize_ + PageSize_ * BlockSize_;
282 
283 
284  stxxl::int64 n = beyond - first; // the number of input elements
285  int_type k = M / (3 * BlockSize_); // number of buckets
286 
287  stxxl::int64 i, j, size = 0;
288 
289  value_type * temp_array;
290  typedef typename stxxl::VECTOR_GENERATOR<value_type,
291  PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type;
292  temp_vector_type * temp_vector;
293 
294  stxxl::prefetch_pool<block_type> p_pool(0); // no read buffers
295  stxxl::write_pool<block_type> w_pool(M / BlockSize_ - k); // M/B-k write buffers
296 
297  stack_type ** buckets;
298 
299  // create and put buckets into container
300  buckets = new stack_type *[k];
301  for (j = 0; j < k; j++)
302  buckets[j] = new stack_type(p_pool, w_pool, 0);
303 
304 
306  typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream;
307  input_stream in = stxxl::stream::streamify(first, beyond);
308 
309  // distribute input into random buckets
310  int_type random_bucket = 0;
311  for (i = 0; i < n; ++i) {
312  random_bucket = rand(k);
313  buckets[random_bucket]->push(*in); // reading the current input element
314  ++in; // go to the next input element
315  }
316 
318  // resize buffers
319  w_pool.resize(0);
320  p_pool.resize(PageSize_);
321 
322  // Set prefetch aggr to PageSize_
323  for (j = 0; j < k; j++) {
324  buckets[j]->set_prefetch_aggr(PageSize_);
325  }
326 
327  unsigned_type space_left = M - k * BlockSize_ -
328  PageSize_ * BlockSize_; // remaining int space
329  random_shuffle_local::write_vector<ExtIterator_>
330  Writer(first, 2 * PageSize_);
331  ExtIterator_ it = first;
332 
333  for (i = 0; i < k; i++) {
334  STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements");
335  }
336 
337  // shuffle each bucket
338  for (i = 0; i < k; i++) {
339  size = buckets[i]->size();
340 
341  // does the bucket fit into memory?
342  if (size * sizeof(value_type) < space_left) {
343  STXXL_VERBOSE1("random_shuffle: no recursion");
344 
345  // copy bucket into temp. array
346  temp_array = new value_type[size];
347  for (j = 0; j < size; j++) {
348  temp_array[j] = buckets[i]->top();
349  buckets[i]->pop();
350  }
351 
352  // shuffle
353  std::random_shuffle(temp_array, temp_array + size, rand);
354 
355  // write back
356  for (j = 0; j < size; j++) {
357  *Writer = temp_array[j];
358  ++Writer;
359  }
360 
361  // free memory
362  delete[] temp_array;
363  }
364  else {
365  STXXL_VERBOSE1("random_shuffle: recursion");
366  // copy bucket into temp. stxxl::vector
367  temp_vector = new temp_vector_type(size);
368 
369  for (j = 0; j < size; j++) {
370  (*temp_vector)[j] = buckets[i]->top();
371  buckets[i]->pop();
372  }
373 
374  p_pool.resize(0);
375  space_left += PageSize_ * BlockSize_;
376 
377  STXXL_VERBOSE1("random_shuffle: Space left: " << space_left);
378 
379  // recursive shuffle
380  stxxl::random_shuffle(temp_vector->begin(),
381  temp_vector->end(), rand, space_left);
382 
383  p_pool.resize(PageSize_);
384 
385  // write back
386  for (j = 0; j < size; j++) {
387  *Writer = (*temp_vector)[j];
388  ++Writer;
389  }
390 
391  // free memory
392  delete temp_vector;
393  }
394  }
395 
396  // free buckets
397  for (int j = 0; j < k; j++)
398  delete buckets[j];
399 
400  delete[] buckets;
401 
402  Writer.flush(); // flush buffers
403 }
404 
409 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_,
410  unsigned BlockSize_, typename PgTp_, unsigned PageSize_>
411 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first,
412  stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> beyond,
413  unsigned_type M)
414 {
415  stxxl::random_number<> rand;
416  stxxl::random_shuffle(first, beyond, rand, M);
417 }
418 
420 
421 __STXXL_END_NAMESPACE
422 
423 #endif // !STXXL_RANDOM_SHUFFLE_HEADER