001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.stat.ranking;
019
020 import java.util.ArrayList;
021 import java.util.Arrays;
022 import java.util.Iterator;
023 import java.util.List;
024
025 import org.apache.commons.math.random.RandomData;
026 import org.apache.commons.math.random.RandomDataImpl;
027 import org.apache.commons.math.random.RandomGenerator;
028
029
030 /**
031 * <p> Ranking based on the natural ordering on doubles.</p>
032 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
033 * are handled using the selected {@link TiesStrategy}.
034 * Configuration settings are supplied in optional constructor arguments.
035 * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE},
036 * respectively. When using {@link TiesStrategy#RANDOM}, a
037 * {@link RandomGenerator} may be supplied as a constructor argument.</p>
038 * <p>Examples:
039 * <table border="1" cellpadding="3">
040 * <tr><th colspan="3">
041 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
042 * </th></tr>
043 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
044 * <th><code>rank(data)</code></th>
045 * <tr>
046 * <td>default (NaNs maximal)</td>
047 * <td>default (ties averaged)</td>
048 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
049 * <tr>
050 * <td>default (NaNs maximal)</td>
051 * <td>MINIMUM</td>
052 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
053 * <tr>
054 * <td>MINIMAL</td>
055 * <td>default (ties averaged)</td>
056 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
057 * <tr>
058 * <td>REMOVED</td>
059 * <td>SEQUENTIAL</td>
060 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
061 * <tr>
062 * <td>MINIMAL</td>
063 * <td>MAXIMUM</td>
064 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
065 *
066 * @since 2.0
067 * @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $
068 */
069 public class NaturalRanking implements RankingAlgorithm {
070
071 /** NaN strategy - defaults to NaNs maximal */
072 private final NaNStrategy nanStrategy;
073
074 /** Ties strategy - defaults to ties averaged */
075 private final TiesStrategy tiesStrategy;
076
077 /** Source of random data - used only when ties strategy is RANDOM */
078 private final RandomData randomData;
079
080 /** default NaN strategy */
081 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL;
082
083 /** default ties strategy */
084 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
085
086 /**
087 * Create a NaturalRanking with default strategies for handling ties and NaNs.
088 */
089 public NaturalRanking() {
090 super();
091 tiesStrategy = DEFAULT_TIES_STRATEGY;
092 nanStrategy = DEFAULT_NAN_STRATEGY;
093 randomData = null;
094 }
095
096 /**
097 * Create a NaturalRanking with the given TiesStrategy.
098 *
099 * @param tiesStrategy the TiesStrategy to use
100 */
101 public NaturalRanking(TiesStrategy tiesStrategy) {
102 super();
103 this.tiesStrategy = tiesStrategy;
104 nanStrategy = DEFAULT_NAN_STRATEGY;
105 randomData = new RandomDataImpl();
106 }
107
108 /**
109 * Create a NaturalRanking with the given NaNStrategy.
110 *
111 * @param nanStrategy the NaNStrategy to use
112 */
113 public NaturalRanking(NaNStrategy nanStrategy) {
114 super();
115 this.nanStrategy = nanStrategy;
116 tiesStrategy = DEFAULT_TIES_STRATEGY;
117 randomData = null;
118 }
119
120 /**
121 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
122 *
123 * @param nanStrategy NaNStrategy to use
124 * @param tiesStrategy TiesStrategy to use
125 */
126 public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
127 super();
128 this.nanStrategy = nanStrategy;
129 this.tiesStrategy = tiesStrategy;
130 randomData = new RandomDataImpl();
131 }
132
133 /**
134 * Create a NaturalRanking with TiesStrategy.RANDOM and the given
135 * RandomGenerator as the source of random data.
136 *
137 * @param randomGenerator source of random data
138 */
139 public NaturalRanking(RandomGenerator randomGenerator) {
140 super();
141 this.tiesStrategy = TiesStrategy.RANDOM;
142 nanStrategy = DEFAULT_NAN_STRATEGY;
143 randomData = new RandomDataImpl(randomGenerator);
144 }
145
146
147 /**
148 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
149 * and the given source of random data.
150 *
151 * @param nanStrategy NaNStrategy to use
152 * @param randomGenerator source of random data
153 */
154 public NaturalRanking(NaNStrategy nanStrategy,
155 RandomGenerator randomGenerator) {
156 super();
157 this.nanStrategy = nanStrategy;
158 this.tiesStrategy = TiesStrategy.RANDOM;
159 randomData = new RandomDataImpl(randomGenerator);
160 }
161
162 /**
163 * Return the NaNStrategy
164 *
165 * @return returns the NaNStrategy
166 */
167 public NaNStrategy getNanStrategy() {
168 return nanStrategy;
169 }
170
171 /**
172 * Return the TiesStrategy
173 *
174 * @return the TiesStrategy
175 */
176 public TiesStrategy getTiesStrategy() {
177 return tiesStrategy;
178 }
179
180 /**
181 * Rank <code>data</code> using the natural ordering on Doubles, with
182 * NaN values handled according to <code>nanStrategy</code> and ties
183 * resolved using <code>tiesStrategy.</code>
184 *
185 * @param data array to be ranked
186 * @return array of ranks
187 */
188 public double[] rank(double[] data) {
189
190 // Array recording initial positions of data to be ranked
191 IntDoublePair[] ranks = new IntDoublePair[data.length];
192 for (int i = 0; i < data.length; i++) {
193 ranks[i] = new IntDoublePair(data[i], i);
194 }
195
196 // Recode, remove or record positions of NaNs
197 List<Integer> nanPositions = null;
198 switch (nanStrategy) {
199 case MAXIMAL: // Replace NaNs with +INFs
200 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
201 break;
202 case MINIMAL: // Replace NaNs with -INFs
203 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
204 break;
205 case REMOVED: // Drop NaNs from data
206 ranks = removeNaNs(ranks);
207 break;
208 case FIXED: // Record positions of NaNs
209 nanPositions = getNanPositions(ranks);
210 break;
211 }
212
213 // Sort the IntDoublePairs
214 Arrays.sort(ranks);
215
216 // Walk the sorted array, filling output array using sorted positions,
217 // resolving ties as we go
218 double[] out = new double[ranks.length];
219 int pos = 1; // position in sorted array
220 out[ranks[0].getPosition()] = pos;
221 List<Integer> tiesTrace = new ArrayList<Integer>();
222 tiesTrace.add(ranks[0].getPosition());
223 for (int i = 1; i < ranks.length; i++) {
224 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
225 // tie sequence has ended (or had length 1)
226 pos = i + 1;
227 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve
228 resolveTie(out, tiesTrace);
229 }
230 tiesTrace = new ArrayList<Integer>();
231 tiesTrace.add(ranks[i].getPosition());
232 } else {
233 // tie sequence continues
234 tiesTrace.add(ranks[i].getPosition());
235 }
236 out[ranks[i].getPosition()] = pos;
237 }
238 if (tiesTrace.size() > 1) { // handle tie sequence at end
239 resolveTie(out, tiesTrace);
240 }
241 if (nanStrategy == NaNStrategy.FIXED) {
242 restoreNaNs(out, nanPositions);
243 }
244 return out;
245 }
246
247 /**
248 * Returns an array that is a copy of the input array with IntDoublePairs
249 * having NaN values removed.
250 *
251 * @param ranks input array
252 * @return array with NaN-valued entries removed
253 */
254 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
255 if (!containsNaNs(ranks)) {
256 return ranks;
257 }
258 IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
259 int j = 0;
260 for (int i = 0; i < ranks.length; i++) {
261 if (Double.isNaN(ranks[i].getValue())) {
262 // drop, but adjust original ranks of later elements
263 for (int k = i + 1; k < ranks.length; k++) {
264 ranks[k] = new IntDoublePair(
265 ranks[k].getValue(), ranks[k].getPosition() - 1);
266 }
267 } else {
268 outRanks[j] = new IntDoublePair(
269 ranks[i].getValue(), ranks[i].getPosition());
270 j++;
271 }
272 }
273 IntDoublePair[] returnRanks = new IntDoublePair[j];
274 System.arraycopy(outRanks, 0, returnRanks, 0, j);
275 return returnRanks;
276 }
277
278 /**
279 * Recodes NaN values to the given value.
280 *
281 * @param ranks array to recode
282 * @param value the value to replace NaNs with
283 */
284 private void recodeNaNs(IntDoublePair[] ranks, double value) {
285 for (int i = 0; i < ranks.length; i++) {
286 if (Double.isNaN(ranks[i].getValue())) {
287 ranks[i] = new IntDoublePair(
288 value, ranks[i].getPosition());
289 }
290 }
291 }
292
293 /**
294 * Checks for presence of NaNs in <code>ranks.</code>
295 *
296 * @param ranks array to be searched for NaNs
297 * @return true iff ranks contains one or more NaNs
298 */
299 private boolean containsNaNs(IntDoublePair[] ranks) {
300 for (int i = 0; i < ranks.length; i++) {
301 if (Double.isNaN(ranks[i].getValue())) {
302 return true;
303 }
304 }
305 return false;
306 }
307
308 /**
309 * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
310 * The input <code>ranks</code> array is expected to take the same value
311 * for all indices in <code>tiesTrace</code>. The common value is recoded
312 * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
313 * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
314 * The same array and trace with tiesStrategy AVERAGE will come out
315 * <5,8,3,6,3,7,1,3>.
316 *
317 * @param ranks array of ranks
318 * @param tiesTrace list of indices where <code>ranks</code> is constant
319 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
320 * </code>
321 */
322 private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
323
324 // constant value of ranks over tiesTrace
325 final double c = ranks[tiesTrace.get(0)];
326
327 // length of sequence of tied ranks
328 final int length = tiesTrace.size();
329
330 switch (tiesStrategy) {
331 case AVERAGE: // Replace ranks with average
332 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
333 break;
334 case MAXIMUM: // Replace ranks with maximum values
335 fill(ranks, tiesTrace, c + length - 1);
336 break;
337 case MINIMUM: // Replace ties with minimum
338 fill(ranks, tiesTrace, c);
339 break;
340 case RANDOM: // Fill with random integral values in [c, c + length - 1]
341 Iterator<Integer> iterator = tiesTrace.iterator();
342 long f = Math.round(c);
343 while (iterator.hasNext()) {
344 ranks[iterator.next()] =
345 randomData.nextLong(f, f + length - 1);
346 }
347 break;
348 case SEQUENTIAL: // Fill sequentially from c to c + length - 1
349 // walk and fill
350 iterator = tiesTrace.iterator();
351 f = Math.round(c);
352 int i = 0;
353 while (iterator.hasNext()) {
354 ranks[iterator.next()] = f + i++;
355 }
356 break;
357 }
358 }
359
360 /**
361 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
362 *
363 * @param data array to modify
364 * @param tiesTrace list of index values to set
365 * @param value value to set
366 */
367 private void fill(double[] data, List<Integer> tiesTrace, double value) {
368 Iterator<Integer> iterator = tiesTrace.iterator();
369 while (iterator.hasNext()) {
370 data[iterator.next()] = value;
371 }
372 }
373
374 /**
375 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
376 *
377 * @param ranks array to modify
378 * @param nanPositions list of index values to set to <code>Double.NaN</code>
379 */
380 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
381 if (nanPositions.size() == 0) {
382 return;
383 }
384 Iterator<Integer> iterator = nanPositions.iterator();
385 while (iterator.hasNext()) {
386 ranks[iterator.next().intValue()] = Double.NaN;
387 }
388
389 }
390
391 /**
392 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
393 *
394 * @param ranks array to search for <code>NaNs</code>
395 * @return list of indexes i such that <code>ranks[i] = NaN</code>
396 */
397 private List<Integer> getNanPositions(IntDoublePair[] ranks) {
398 ArrayList<Integer> out = new ArrayList<Integer>();
399 for (int i = 0; i < ranks.length; i++) {
400 if (Double.isNaN(ranks[i].getValue())) {
401 out.add(Integer.valueOf(i));
402 }
403 }
404 return out;
405 }
406
407 /**
408 * Represents the position of a double value in an ordering.
409 * Comparable interface is implemented so Arrays.sort can be used
410 * to sort an array of IntDoublePairs by value. Note that the
411 * implicitly defined natural ordering is NOT consistent with equals.
412 */
413 private static class IntDoublePair implements Comparable<IntDoublePair> {
414
415 /** Value of the pair */
416 final private double value;
417
418 /** Original position of the pair */
419 final private int position;
420
421 /**
422 * Construct an IntDoublePair with the given value and position.
423 * @param value the value of the pair
424 * @param position the original position
425 */
426 public IntDoublePair(double value, int position) {
427 this.value = value;
428 this.position = position;
429 }
430
431 /**
432 * Compare this IntDoublePair to another pair.
433 * Only the <strong>values</strong> are compared.
434 *
435 * @param other the other pair to compare this to
436 * @return result of <code>Double.compare(value, other.value)</code>
437 */
438 public int compareTo(IntDoublePair other) {
439 return Double.compare(value, other.value);
440 }
441
442 /**
443 * Returns the value of the pair.
444 * @return value
445 */
446 public double getValue() {
447 return value;
448 }
449
450 /**
451 * Returns the original position of the pair.
452 * @return position
453 */
454 public int getPosition() {
455 return position;
456 }
457 }
458 }