Stxxl
1.2.1
|
00001 /*************************************************************************** 00002 * include/stxxl/bits/algo/random_shuffle.h 00003 * 00004 * Part of the STXXL. See http://stxxl.sourceforge.net 00005 * 00006 * Copyright (C) 2007 Manuel Krings 00007 * Copyright (C) 2007 Markus Westphal 00008 * 00009 * Distributed under the Boost Software License, Version 1.0. 00010 * (See accompanying file LICENSE_1_0.txt or copy at 00011 * http://www.boost.org/LICENSE_1_0.txt) 00012 **************************************************************************/ 00013 00014 #ifndef STXXL_RANDOM_SHUFFLE_HEADER 00015 #define STXXL_RANDOM_SHUFFLE_HEADER 00016 00017 // TODO: improve main memory consumption in recursion 00018 // (free stacks buffers) 00019 00020 00021 #include <stxxl/bits/stream/stream.h> 00022 #include <stxxl/scan> 00023 #include <stxxl/stack> 00024 00025 00026 __STXXL_BEGIN_NAMESPACE 00027 00028 namespace random_shuffle_local 00029 { 00030 // efficiently writes data into an stxxl::vector with overlapping of I/O and 00031 // computation 00032 template <class ExtIterator> 00033 class write_vector 00034 { 00035 typedef typename ExtIterator::size_type size_type; 00036 typedef typename ExtIterator::value_type value_type; 00037 typedef typename ExtIterator::block_type block_type; 00038 typedef typename ExtIterator::const_iterator ConstExtIterator; 00039 typedef stxxl::buf_ostream<block_type, typename ExtIterator::bids_container_iterator> buf_ostream_type; 00040 00041 ExtIterator it; 00042 unsigned_type nbuffers; 00043 buf_ostream_type * outstream; 00044 00045 public: 00046 write_vector(ExtIterator begin, 00047 unsigned nbuffers_ = 2 // buffers to use for overlapping (>=2 recommended) 00048 ) : it(begin), nbuffers(nbuffers_) 00049 { 00050 outstream = new buf_ostream_type(it.bid(), nbuffers); 00051 } 00052 00053 value_type & operator * () 00054 { 00055 if (it.block_offset() == 0) 00056 it.touch(); 00057 // tells the vector that the block was modified 00058 return **outstream; 00059 } 00060 00061 write_vector & operator ++ () 00062 { 00063 ++it; 00064 ++(*outstream); 00065 return *this; 00066 } 00067 00068 void flush() 00069 { 00070 ConstExtIterator const_out = it; 00071 00072 while (const_out.block_offset()) 00073 { 00074 **outstream = *const_out; // might cause I/Os for loading the page that 00075 ++const_out; // contains data beyond out 00076 ++(*outstream); 00077 } 00078 00079 it.flush(); 00080 delete outstream; 00081 outstream = NULL; 00082 } 00083 00084 virtual ~write_vector() 00085 { 00086 if (outstream) 00087 flush(); 00088 } 00089 }; 00090 } 00091 00092 00095 00096 00097 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_, 00098 unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_> 00099 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first, 00100 stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> beyond, 00101 RandomNumberGenerator_ & rand, 00102 unsigned_type M); 00103 00104 00114 template <typename ExtIterator_, 00115 typename RandomNumberGenerator_, 00116 unsigned BlockSize_, 00117 unsigned PageSize_, 00118 typename AllocStrategy_> 00119 void random_shuffle(ExtIterator_ first, 00120 ExtIterator_ beyond, 00121 RandomNumberGenerator_ & rand, 00122 unsigned_type M, 00123 AllocStrategy_ AS = STXXL_DEFAULT_ALLOC_STRATEGY()) 00124 { 00125 typedef typename ExtIterator_::value_type value_type; 00126 typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external, 00127 stxxl::grow_shrink2, PageSize_, 00128 BlockSize_, void, 0, AllocStrategy_>::result stack_type; 00129 typedef typename stack_type::block_type block_type; 00130 00131 STXXL_VERBOSE1("random_shuffle: Plain Version"); 00132 00133 stxxl::int64 n = beyond - first; // the number of input elements 00134 00135 // make sure we have at least 6 blocks + 1 page 00136 if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) 00137 M = 6 * BlockSize_ + PageSize_ * BlockSize_; 00138 00139 00140 int_type k = M / (3 * BlockSize_); // number of buckets 00141 00142 00143 stxxl::int64 i, j, size = 0; 00144 00145 value_type * temp_array; 00146 typedef typename stxxl::VECTOR_GENERATOR<value_type, 00147 PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type; 00148 temp_vector_type * temp_vector; 00149 00150 stxxl::prefetch_pool<block_type> p_pool(0); // no read buffers 00151 STXXL_VERBOSE1("random_shuffle: " << M / BlockSize_ - k << " write buffers for " << k << " buckets"); 00152 stxxl::write_pool<block_type> w_pool(M / BlockSize_ - k); // M/B-k write buffers 00153 00154 stack_type ** buckets; 00155 00156 // create and put buckets into container 00157 buckets = new stack_type *[k]; 00158 for (j = 0; j < k; j++) 00159 buckets[j] = new stack_type(p_pool, w_pool, 0); 00160 00161 00163 typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream; 00164 input_stream in = stxxl::stream::streamify(first, beyond); 00165 00166 // distribute input into random buckets 00167 int_type random_bucket = 0; 00168 for (i = 0; i < n; ++i) { 00169 random_bucket = rand(k); 00170 buckets[random_bucket]->push(*in); // reading the current input element 00171 ++in; // go to the next input element 00172 } 00173 00175 // resize buffers 00176 w_pool.resize(0); 00177 p_pool.resize(PageSize_); 00178 00179 // Set prefetch aggr to PageSize_ 00180 for (int_type j = 0; j < k; j++) { 00181 buckets[j]->set_prefetch_aggr(PageSize_); 00182 } 00183 00184 unsigned_type space_left = M - k * BlockSize_ - 00185 PageSize_ * BlockSize_; // remaining int space 00186 ExtIterator_ Writer = first; 00187 ExtIterator_ it = first; 00188 00189 for (i = 0; i < k; i++) { 00190 STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements"); 00191 } 00192 00193 // shuffle each bucket 00194 for (i = 0; i < k; i++) { 00195 size = buckets[i]->size(); 00196 00197 // does the bucket fit into memory? 00198 if (size * sizeof(value_type) < space_left) { 00199 STXXL_VERBOSE1("random_shuffle: no recursion"); 00200 00201 // copy bucket into temp. array 00202 temp_array = new value_type[size]; 00203 for (j = 0; j < size; j++) { 00204 temp_array[j] = buckets[i]->top(); 00205 buckets[i]->pop(); 00206 } 00207 00208 // shuffle 00209 std::random_shuffle(temp_array, temp_array + size, rand); 00210 00211 // write back 00212 for (j = 0; j < size; j++) { 00213 *Writer = temp_array[j]; 00214 ++Writer; 00215 } 00216 00217 // free memory 00218 delete[] temp_array; 00219 } 00220 else { 00221 STXXL_VERBOSE1("random_shuffle: recursion"); 00222 00223 // copy bucket into temp. stxxl::vector 00224 temp_vector = new temp_vector_type(size); 00225 00226 for (j = 0; j < size; j++) { 00227 (*temp_vector)[j] = buckets[i]->top(); 00228 buckets[i]->pop(); 00229 } 00230 00231 p_pool.resize(0); 00232 space_left += PageSize_ * BlockSize_; 00233 STXXL_VERBOSE1("random_shuffle: Space left: " << space_left); 00234 00235 // recursive shuffle 00236 stxxl::random_shuffle(temp_vector->begin(), 00237 temp_vector->end(), rand, space_left); 00238 00239 p_pool.resize(PageSize_); 00240 00241 // write back 00242 for (j = 0; j < size; j++) { 00243 *Writer = (*temp_vector)[j]; 00244 ++Writer; 00245 } 00246 00247 // free memory 00248 delete temp_vector; 00249 } 00250 } 00251 00252 // free buckets 00253 for (int j = 0; j < k; j++) 00254 delete buckets[j]; 00255 00256 delete[] buckets; 00257 } 00258 00264 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_, 00265 unsigned BlockSize_, typename PgTp_, unsigned PageSize_, typename RandomNumberGenerator_> 00266 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first, 00267 stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> beyond, 00268 RandomNumberGenerator_ & rand, 00269 unsigned_type M) 00270 { 00271 typedef stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> ExtIterator_; 00272 typedef typename ExtIterator_::value_type value_type; 00273 typedef typename stxxl::STACK_GENERATOR<value_type, stxxl::external, 00274 stxxl::grow_shrink2, PageSize_, BlockSize_>::result stack_type; 00275 typedef typename stack_type::block_type block_type; 00276 00277 STXXL_VERBOSE1("random_shuffle: Vector Version"); 00278 00279 // make sure we have at least 6 blocks + 1 page 00280 if (M < 6 * BlockSize_ + PageSize_ * BlockSize_) 00281 M = 6 * BlockSize_ + PageSize_ * BlockSize_; 00282 00283 00284 stxxl::int64 n = beyond - first; // the number of input elements 00285 int_type k = M / (3 * BlockSize_); // number of buckets 00286 00287 stxxl::int64 i, j, size = 0; 00288 00289 value_type * temp_array; 00290 typedef typename stxxl::VECTOR_GENERATOR<value_type, 00291 PageSize_, 4, BlockSize_, AllocStrategy_>::result temp_vector_type; 00292 temp_vector_type * temp_vector; 00293 00294 stxxl::prefetch_pool<block_type> p_pool(0); // no read buffers 00295 stxxl::write_pool<block_type> w_pool(M / BlockSize_ - k); // M/B-k write buffers 00296 00297 stack_type ** buckets; 00298 00299 // create and put buckets into container 00300 buckets = new stack_type *[k]; 00301 for (j = 0; j < k; j++) 00302 buckets[j] = new stack_type(p_pool, w_pool, 0); 00303 00304 00306 typedef typename stream::streamify_traits<ExtIterator_>::stream_type input_stream; 00307 input_stream in = stxxl::stream::streamify(first, beyond); 00308 00309 // distribute input into random buckets 00310 int_type random_bucket = 0; 00311 for (i = 0; i < n; ++i) { 00312 random_bucket = rand(k); 00313 buckets[random_bucket]->push(*in); // reading the current input element 00314 ++in; // go to the next input element 00315 } 00316 00318 // resize buffers 00319 w_pool.resize(0); 00320 p_pool.resize(PageSize_); 00321 00322 // Set prefetch aggr to PageSize_ 00323 for (j = 0; j < k; j++) { 00324 buckets[j]->set_prefetch_aggr(PageSize_); 00325 } 00326 00327 unsigned_type space_left = M - k * BlockSize_ - 00328 PageSize_ * BlockSize_; // remaining int space 00329 random_shuffle_local::write_vector<ExtIterator_> 00330 Writer(first, 2 * PageSize_); 00331 ExtIterator_ it = first; 00332 00333 for (i = 0; i < k; i++) { 00334 STXXL_VERBOSE1("random_shuffle: bucket no " << i << " contains " << buckets[i]->size() << " elements"); 00335 } 00336 00337 // shuffle each bucket 00338 for (i = 0; i < k; i++) { 00339 size = buckets[i]->size(); 00340 00341 // does the bucket fit into memory? 00342 if (size * sizeof(value_type) < space_left) { 00343 STXXL_VERBOSE1("random_shuffle: no recursion"); 00344 00345 // copy bucket into temp. array 00346 temp_array = new value_type[size]; 00347 for (j = 0; j < size; j++) { 00348 temp_array[j] = buckets[i]->top(); 00349 buckets[i]->pop(); 00350 } 00351 00352 // shuffle 00353 std::random_shuffle(temp_array, temp_array + size, rand); 00354 00355 // write back 00356 for (j = 0; j < size; j++) { 00357 *Writer = temp_array[j]; 00358 ++Writer; 00359 } 00360 00361 // free memory 00362 delete[] temp_array; 00363 } 00364 else { 00365 STXXL_VERBOSE1("random_shuffle: recursion"); 00366 // copy bucket into temp. stxxl::vector 00367 temp_vector = new temp_vector_type(size); 00368 00369 for (j = 0; j < size; j++) { 00370 (*temp_vector)[j] = buckets[i]->top(); 00371 buckets[i]->pop(); 00372 } 00373 00374 p_pool.resize(0); 00375 space_left += PageSize_ * BlockSize_; 00376 00377 STXXL_VERBOSE1("random_shuffle: Space left: " << space_left); 00378 00379 // recursive shuffle 00380 stxxl::random_shuffle(temp_vector->begin(), 00381 temp_vector->end(), rand, space_left); 00382 00383 p_pool.resize(PageSize_); 00384 00385 // write back 00386 for (j = 0; j < size; j++) { 00387 *Writer = (*temp_vector)[j]; 00388 ++Writer; 00389 } 00390 00391 // free memory 00392 delete temp_vector; 00393 } 00394 } 00395 00396 // free buckets 00397 for (int j = 0; j < k; j++) 00398 delete buckets[j]; 00399 00400 delete[] buckets; 00401 00402 Writer.flush(); // flush buffers 00403 } 00404 00409 template <typename Tp_, typename AllocStrategy_, typename SzTp_, typename DiffTp_, 00410 unsigned BlockSize_, typename PgTp_, unsigned PageSize_> 00411 void random_shuffle(stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> first, 00412 stxxl::vector_iterator<Tp_, AllocStrategy_, SzTp_, DiffTp_, BlockSize_, PgTp_, PageSize_> beyond, 00413 unsigned_type M) 00414 { 00415 stxxl::random_number<> rand; 00416 stxxl::random_shuffle(first, beyond, rand, M); 00417 } 00418 00420 00421 __STXXL_END_NAMESPACE 00422 00423 #endif // !STXXL_RANDOM_SHUFFLE_HEADER