001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.stat.correlation;
019    
020    import org.apache.commons.math.MathRuntimeException;
021    import org.apache.commons.math.linear.BlockRealMatrix;
022    import org.apache.commons.math.linear.RealMatrix;
023    import org.apache.commons.math.stat.ranking.NaturalRanking;
024    import org.apache.commons.math.stat.ranking.RankingAlgorithm;
025    
026    /**
027     * <p>Spearman's rank correlation. This implementation performs a rank
028     * transformation on the input data and then computes {@link PearsonsCorrelation}
029     * on the ranked data.</p>
030     *
031     * <p>By default, ranks are computed using {@link NaturalRanking} with default
032     * strategies for handling NaNs and ties in the data (NaNs maximal, ties averaged).
033     * The ranking algorithm can be set using a constructor argument.</p>
034     *
035     * @since 2.0
036     * @version $Revision: 811685 $ $Date: 2009-09-05 13:36:48 -0400 (Sat, 05 Sep 2009) $
037     */
038    
039    public class SpearmansCorrelation {
040    
041        /** Input data */
042        private final RealMatrix data;
043    
044        /** Ranking algorithm  */
045        private final RankingAlgorithm rankingAlgorithm;
046    
047        /** Rank correlation */
048        private final PearsonsCorrelation rankCorrelation;
049    
050        /**
051         * Create a SpearmansCorrelation with the given input data matrix
052         * and ranking algorithm.
053         *
054         * @param dataMatrix matrix of data with columns representing
055         * variables to correlate
056         * @param rankingAlgorithm ranking algorithm
057         */
058        public SpearmansCorrelation(final RealMatrix dataMatrix, final RankingAlgorithm rankingAlgorithm) {
059            this.data = dataMatrix.copy();
060            this.rankingAlgorithm = rankingAlgorithm;
061            rankTransform(data);
062            rankCorrelation = new PearsonsCorrelation(data);
063        }
064    
065        /**
066         * Create a SpearmansCorrelation from the given data matrix.
067         *
068         * @param dataMatrix matrix of data with columns representing
069         * variables to correlate
070         */
071        public SpearmansCorrelation(final RealMatrix dataMatrix) {
072            this(dataMatrix, new NaturalRanking());
073        }
074    
075        /**
076         * Create a SpearmansCorrelation without data.
077         */
078        public SpearmansCorrelation() {
079            data = null;
080            this.rankingAlgorithm = new NaturalRanking();
081            rankCorrelation = null;
082        }
083    
084        /**
085         * Calculate the Spearman Rank Correlation Matrix.
086         *
087         * @return Spearman Rank Correlation Matrix
088         */
089        public RealMatrix getCorrelationMatrix() {
090            return rankCorrelation.getCorrelationMatrix();
091        }
092    
093        /**
094         * Returns a {@link PearsonsCorrelation} instance constructed from the
095         * ranked input data. That is,
096         * <code>new SpearmansCorrelation(matrix).getRankCorrelation()</code>
097         * is equivalent to
098         * <code>new PearsonsCorrelation(rankTransform(matrix))</code> where
099         * <code>rankTransform(matrix)</code> is the result of applying the
100         * configured <code>RankingAlgorithm</code> to each of the columns of
101         * <code>matrix.</code>
102         *
103         * @return PearsonsCorrelation among ranked column data
104         */
105        public PearsonsCorrelation getRankCorrelation() {
106            return rankCorrelation;
107        }
108    
109        /**
110         * Computes the Spearman's rank correlation matrix for the columns of the
111         * input matrix.
112         *
113         * @param matrix matrix with columns representing variables to correlate
114         * @return correlation matrix
115         */
116        public RealMatrix computeCorrelationMatrix(RealMatrix matrix) {
117            RealMatrix matrixCopy = matrix.copy();
118            rankTransform(matrixCopy);
119            return new PearsonsCorrelation().computeCorrelationMatrix(matrixCopy);
120        }
121    
122        /**
123         * Computes the Spearman's rank correlation matrix for the columns of the
124         * input rectangular array.  The columns of the array represent values
125         * of variables to be correlated.
126         *
127         * @param matrix matrix with columns representing variables to correlate
128         * @return correlation matrix
129         */
130        public RealMatrix computeCorrelationMatrix(double[][] matrix) {
131           return computeCorrelationMatrix(new BlockRealMatrix(matrix));
132        }
133    
134        /**
135         * Computes the Spearman's rank correlation coefficient between the two arrays.
136         *
137         * </p>Throws IllegalArgumentException if the arrays do not have the same length
138         * or their common length is less than 2</p>
139         *
140         * @param xArray first data array
141         * @param yArray second data array
142         * @return Returns Spearman's rank correlation coefficient for the two arrays
143         * @throws  IllegalArgumentException if the arrays lengths do not match or
144         * there is insufficient data
145         */
146        public double correlation(final double[] xArray, final double[] yArray)
147        throws IllegalArgumentException {
148            if (xArray.length == yArray.length && xArray.length > 1) {
149                return new PearsonsCorrelation().correlation(rankingAlgorithm.rank(xArray),
150                        rankingAlgorithm.rank(yArray));
151            }
152            else {
153                throw MathRuntimeException.createIllegalArgumentException(
154                        "invalid array dimensions. xArray has size {0}; yArray has {1} elements",
155                        xArray.length, yArray.length);
156            }
157        }
158    
159        /**
160         * Applies rank transform to each of the columns of <code>matrix</code>
161         * using the current <code>rankingAlgorithm</code>
162         *
163         * @param matrix matrix to transform
164         */
165        private void rankTransform(RealMatrix matrix) {
166            for (int i = 0; i < matrix.getColumnDimension(); i++) {
167                matrix.setColumn(i, rankingAlgorithm.rank(matrix.getColumn(i)));
168            }
169        }
170    }