001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.distribution;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.MathRuntimeException;
023    
024    /**
025     * Implementation for the {@link ZipfDistribution}.
026     *
027     * @version $Revision: 920852 $ $Date: 2010-03-09 07:53:44 -0500 (Tue, 09 Mar 2010) $
028     */
029    public class ZipfDistributionImpl extends AbstractIntegerDistribution
030        implements ZipfDistribution, Serializable {
031    
032        /** Serializable version identifier. */
033        private static final long serialVersionUID = -140627372283420404L;
034    
035        /** Number of elements. */
036        private int numberOfElements;
037    
038        /** Exponent parameter of the distribution. */
039        private double exponent;
040    
041        /**
042         * Create a new Zipf distribution with the given number of elements and
043         * exponent. Both values must be positive; otherwise an
044         * <code>IllegalArgumentException</code> is thrown.
045         *
046         * @param numberOfElements the number of elements
047         * @param exponent the exponent
048         * @exception IllegalArgumentException if n &le; 0 or s &le; 0.0
049         */
050        public ZipfDistributionImpl(final int numberOfElements, final double exponent)
051            throws IllegalArgumentException {
052            setNumberOfElementsInternal(numberOfElements);
053            setExponentInternal(exponent);
054        }
055    
056        /**
057         * Get the number of elements (e.g. corpus size) for the distribution.
058         *
059         * @return the number of elements
060         */
061        public int getNumberOfElements() {
062            return numberOfElements;
063        }
064    
065        /**
066         * Set the number of elements (e.g. corpus size) for the distribution.
067         * The parameter value must be positive; otherwise an
068         * <code>IllegalArgumentException</code> is thrown.
069         *
070         * @param n the number of elements
071         * @exception IllegalArgumentException if n &le; 0
072         * @deprecated as of 2.1 (class will become immutable in 3.0)
073         */
074        @Deprecated
075        public void setNumberOfElements(final int n) {
076            setNumberOfElementsInternal(n);
077        }
078        /**
079         * Set the number of elements (e.g. corpus size) for the distribution.
080         * The parameter value must be positive; otherwise an
081         * <code>IllegalArgumentException</code> is thrown.
082         *
083         * @param n the number of elements
084         * @exception IllegalArgumentException if n &le; 0
085         */
086        private void setNumberOfElementsInternal(final int n)
087            throws IllegalArgumentException {
088            if (n <= 0) {
089                throw MathRuntimeException.createIllegalArgumentException(
090                        "invalid number of elements {0} (must be positive)",
091                        n);
092            }
093            this.numberOfElements = n;
094        }
095    
096        /**
097         * Get the exponent characterising the distribution.
098         *
099         * @return the exponent
100         */
101        public double getExponent() {
102            return exponent;
103        }
104    
105        /**
106         * Set the exponent characterising the distribution.
107         * The parameter value must be positive; otherwise an
108         * <code>IllegalArgumentException</code> is thrown.
109         *
110         * @param s the exponent
111         * @exception IllegalArgumentException if s &le; 0.0
112         * @deprecated as of 2.1 (class will become immutable in 3.0)
113         */
114        @Deprecated
115        public void setExponent(final double s) {
116            setExponentInternal(s);
117        }
118        /**
119         * Set the exponent characterising the distribution.
120         * The parameter value must be positive; otherwise an
121         * <code>IllegalArgumentException</code> is thrown.
122         *
123         * @param s the exponent
124         * @exception IllegalArgumentException if s &le; 0.0
125         */
126        private void setExponentInternal(final double s)
127            throws IllegalArgumentException {
128            if (s <= 0.0) {
129                throw MathRuntimeException.createIllegalArgumentException(
130                        "invalid exponent {0} (must be positive)",
131                        s);
132            }
133            this.exponent = s;
134        }
135    
136        /**
137         * The probability mass function P(X = x) for a Zipf distribution.
138         *
139         * @param x the value at which the probability density function is evaluated.
140         * @return the value of the probability mass function at x
141         */
142        public double probability(final int x) {
143            if (x <= 0 || x > numberOfElements) {
144                return 0.0;
145            }
146    
147            return (1.0 / Math.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent);
148    
149        }
150    
151        /**
152         * The probability distribution function P(X <= x) for a Zipf distribution.
153         *
154         * @param x the value at which the PDF is evaluated.
155         * @return Zipf distribution function evaluated at x
156         */
157        @Override
158        public double cumulativeProbability(final int x) {
159            if (x <= 0) {
160                return 0.0;
161            } else if (x >= numberOfElements) {
162                return 1.0;
163            }
164    
165            return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent);
166    
167        }
168    
169        /**
170         * Access the domain value lower bound, based on <code>p</code>, used to
171         * bracket a PDF root.
172         *
173         * @param p the desired probability for the critical value
174         * @return domain value lower bound, i.e.
175         *         P(X &lt; <i>lower bound</i>) &lt; <code>p</code>
176         */
177        @Override
178        protected int getDomainLowerBound(final double p) {
179            return 0;
180        }
181    
182        /**
183         * Access the domain value upper bound, based on <code>p</code>, used to
184         * bracket a PDF root.
185         *
186         * @param p the desired probability for the critical value
187         * @return domain value upper bound, i.e.
188         *         P(X &lt; <i>upper bound</i>) &gt; <code>p</code>
189         */
190        @Override
191        protected int getDomainUpperBound(final double p) {
192            return numberOfElements;
193        }
194    
195    
196        /**
197         * Calculates the Nth generalized harmonic number. See
198         * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic
199         * Series</a>.
200         *
201         * @param n the term in the series to calculate (must be &ge; 1)
202         * @param m the exponent; special case m == 1.0 is the harmonic series
203         * @return the nth generalized harmonic number
204         */
205        private double generalizedHarmonic(final int n, final double m) {
206            double value = 0;
207            for (int k = n; k > 0; --k) {
208                value += 1.0 / Math.pow(k, m);
209            }
210            return value;
211        }
212    
213    }