001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2016 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.ArrayDeque;
023import java.util.Deque;
024import java.util.List;
025import java.util.Locale;
026import java.util.Set;
027import java.util.regex.Pattern;
028
029import com.google.common.collect.ImmutableSortedSet;
030import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
031import com.puppycrawl.tools.checkstyle.api.DetailAST;
032import com.puppycrawl.tools.checkstyle.api.FileContents;
033import com.puppycrawl.tools.checkstyle.api.Scope;
034import com.puppycrawl.tools.checkstyle.api.TextBlock;
035import com.puppycrawl.tools.checkstyle.api.TokenTypes;
036import com.puppycrawl.tools.checkstyle.utils.CheckUtils;
037import com.puppycrawl.tools.checkstyle.utils.CommonUtils;
038import com.puppycrawl.tools.checkstyle.utils.ScopeUtils;
039
040/**
041 * Custom Checkstyle Check to validate Javadoc.
042 *
043 * @author Chris Stillwell
044 * @author Daniel Grenner
045 * @author Travis Schneeberger
046 */
047public class JavadocStyleCheck
048    extends AbstractCheck {
049
050    /** Message property key for the Unclosed HTML message. */
051    public static final String MSG_JAVADOC_MISSING = "javadoc.missing";
052
053    /** Message property key for the Unclosed HTML message. */
054    public static final String MSG_EMPTY = "javadoc.empty";
055
056    /** Message property key for the Unclosed HTML message. */
057    public static final String MSG_NO_PERIOD = "javadoc.noPeriod";
058
059    /** Message property key for the Unclosed HTML message. */
060    public static final String MSG_INCOMPLETE_TAG = "javadoc.incompleteTag";
061
062    /** Message property key for the Unclosed HTML message. */
063    public static final String MSG_UNCLOSED_HTML = "javadoc.unclosedHtml";
064
065    /** Message property key for the Extra HTML message. */
066    public static final String MSG_EXTRA_HTML = "javadoc.extraHtml";
067
068    /** HTML tags that do not require a close tag. */
069    private static final Set<String> SINGLE_TAGS = ImmutableSortedSet.of(
070            "br", "li", "dt", "dd", "hr", "img", "p", "td", "tr", "th");
071
072    /** HTML tags that are allowed in java docs.
073     * From http://www.w3schools.com/tags/default.asp
074     * The forms and structure tags are not allowed
075     */
076    private static final Set<String> ALLOWED_TAGS = ImmutableSortedSet.of(
077            "a", "abbr", "acronym", "address", "area", "b", "bdo", "big",
078            "blockquote", "br", "caption", "cite", "code", "colgroup", "dd",
079            "del", "div", "dfn", "dl", "dt", "em", "fieldset", "font", "h1",
080            "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd",
081            "li", "ol", "p", "pre", "q", "samp", "small", "span", "strong",
082            "style", "sub", "sup", "table", "tbody", "td", "tfoot", "th",
083            "thead", "tr", "tt", "u", "ul", "var");
084
085    /** The scope to check. */
086    private Scope scope = Scope.PRIVATE;
087
088    /** The visibility scope where Javadoc comments shouldn't be checked. **/
089    private Scope excludeScope;
090
091    /** Format for matching the end of a sentence. */
092    private String endOfSentenceFormat = "([.?!][ \t\n\r\f<])|([.?!]$)";
093
094    /** Regular expression for matching the end of a sentence. */
095    private Pattern endOfSentencePattern;
096
097    /**
098     * Indicates if the first sentence should be checked for proper end of
099     * sentence punctuation.
100     */
101    private boolean checkFirstSentence = true;
102
103    /**
104     * Indicates if the HTML within the comment should be checked.
105     */
106    private boolean checkHtml = true;
107
108    /**
109     * Indicates if empty javadoc statements should be checked.
110     */
111    private boolean checkEmptyJavadoc;
112
113    @Override
114    public int[] getDefaultTokens() {
115        return getAcceptableTokens();
116    }
117
118    @Override
119    public int[] getAcceptableTokens() {
120        return new int[] {
121            TokenTypes.ANNOTATION_DEF,
122            TokenTypes.ANNOTATION_FIELD_DEF,
123            TokenTypes.CLASS_DEF,
124            TokenTypes.CTOR_DEF,
125            TokenTypes.ENUM_CONSTANT_DEF,
126            TokenTypes.ENUM_DEF,
127            TokenTypes.INTERFACE_DEF,
128            TokenTypes.METHOD_DEF,
129            TokenTypes.PACKAGE_DEF,
130            TokenTypes.VARIABLE_DEF,
131        };
132    }
133
134    @Override
135    public int[] getRequiredTokens() {
136        return CommonUtils.EMPTY_INT_ARRAY;
137    }
138
139    @Override
140    public void visitToken(DetailAST ast) {
141        if (shouldCheck(ast)) {
142            final FileContents contents = getFileContents();
143            // Need to start searching for the comment before the annotations
144            // that may exist. Even if annotations are not defined on the
145            // package, the ANNOTATIONS AST is defined.
146            final TextBlock textBlock =
147                contents.getJavadocBefore(ast.getFirstChild().getLineNo());
148
149            checkComment(ast, textBlock);
150        }
151    }
152
153    /**
154     * Whether we should check this node.
155     * @param ast a given node.
156     * @return whether we should check a given node.
157     */
158    private boolean shouldCheck(final DetailAST ast) {
159        boolean check = false;
160
161        if (ast.getType() == TokenTypes.PACKAGE_DEF) {
162            check = getFileContents().inPackageInfo();
163        }
164        else if (!ScopeUtils.isInCodeBlock(ast)) {
165            final Scope customScope;
166
167            if (ScopeUtils.isInInterfaceOrAnnotationBlock(ast)
168                    || ast.getType() == TokenTypes.ENUM_CONSTANT_DEF) {
169                customScope = Scope.PUBLIC;
170            }
171            else {
172                customScope = ScopeUtils.getScopeFromMods(ast.findFirstToken(TokenTypes.MODIFIERS));
173            }
174            final Scope surroundingScope = ScopeUtils.getSurroundingScope(ast);
175
176            check = customScope.isIn(scope)
177                    && (surroundingScope == null || surroundingScope.isIn(scope))
178                    && (excludeScope == null
179                        || !customScope.isIn(excludeScope)
180                        || surroundingScope != null
181                            && !surroundingScope.isIn(excludeScope));
182        }
183        return check;
184    }
185
186    /**
187     * Performs the various checks against the Javadoc comment.
188     *
189     * @param ast the AST of the element being documented
190     * @param comment the source lines that make up the Javadoc comment.
191     *
192     * @see #checkFirstSentenceEnding(DetailAST, TextBlock)
193     * @see #checkHtmlTags(DetailAST, TextBlock)
194     */
195    private void checkComment(final DetailAST ast, final TextBlock comment) {
196        if (comment == null) {
197            // checking for missing docs in JavadocStyleCheck is not consistent
198            // with the rest of CheckStyle...  Even though, I didn't think it
199            // made sense to make another check just to ensure that the
200            // package-info.java file actually contains package Javadocs.
201            if (getFileContents().inPackageInfo()) {
202                log(ast.getLineNo(), MSG_JAVADOC_MISSING);
203            }
204        }
205        else {
206            if (checkFirstSentence) {
207                checkFirstSentenceEnding(ast, comment);
208            }
209
210            if (checkHtml) {
211                checkHtmlTags(ast, comment);
212            }
213
214            if (checkEmptyJavadoc) {
215                checkJavadocIsNotEmpty(comment);
216            }
217        }
218    }
219
220    /**
221     * Checks that the first sentence ends with proper punctuation.  This method
222     * uses a regular expression that checks for the presence of a period,
223     * question mark, or exclamation mark followed either by whitespace, an
224     * HTML element, or the end of string. This method ignores {_AT_inheritDoc}
225     * comments for TokenTypes that are valid for {_AT_inheritDoc}.
226     *
227     * @param ast the current node
228     * @param comment the source lines that make up the Javadoc comment.
229     */
230    private void checkFirstSentenceEnding(final DetailAST ast, TextBlock comment) {
231        final String commentText = getCommentText(comment.getText());
232
233        if (!commentText.isEmpty()
234            && !getEndOfSentencePattern().matcher(commentText).find()
235            && !(commentText.startsWith("{@inheritDoc}")
236            && JavadocTagInfo.INHERIT_DOC.isValidOn(ast))) {
237            log(comment.getStartLineNo(), MSG_NO_PERIOD);
238        }
239    }
240
241    /**
242     * Checks that the Javadoc is not empty.
243     *
244     * @param comment the source lines that make up the Javadoc comment.
245     */
246    private void checkJavadocIsNotEmpty(TextBlock comment) {
247        final String commentText = getCommentText(comment.getText());
248
249        if (commentText.isEmpty()) {
250            log(comment.getStartLineNo(), MSG_EMPTY);
251        }
252    }
253
254    /**
255     * Returns the comment text from the Javadoc.
256     * @param comments the lines of Javadoc.
257     * @return a comment text String.
258     */
259    private static String getCommentText(String... comments) {
260        final StringBuilder builder = new StringBuilder();
261        for (final String line : comments) {
262            final int textStart = findTextStart(line);
263
264            if (textStart != -1) {
265                if (line.charAt(textStart) == '@') {
266                    //we have found the tag section
267                    break;
268                }
269                builder.append(line.substring(textStart));
270                trimTail(builder);
271                builder.append('\n');
272            }
273        }
274
275        return builder.toString().trim();
276    }
277
278    /**
279     * Finds the index of the first non-whitespace character ignoring the
280     * Javadoc comment start and end strings (&#47** and *&#47) as well as any
281     * leading asterisk.
282     * @param line the Javadoc comment line of text to scan.
283     * @return the int index relative to 0 for the start of text
284     *         or -1 if not found.
285     */
286    private static int findTextStart(String line) {
287        int textStart = -1;
288        for (int i = 0; i < line.length();) {
289            if (!Character.isWhitespace(line.charAt(i))) {
290                if (line.regionMatches(i, "/**", 0, "/**".length())) {
291                    i += 2;
292                }
293                else if (line.regionMatches(i, "*/", 0, 2)) {
294                    i++;
295                }
296                else if (line.charAt(i) != '*') {
297                    textStart = i;
298                    break;
299                }
300            }
301            i++;
302        }
303        return textStart;
304    }
305
306    /**
307     * Trims any trailing whitespace or the end of Javadoc comment string.
308     * @param builder the StringBuilder to trim.
309     */
310    private static void trimTail(StringBuilder builder) {
311        int index = builder.length() - 1;
312        while (true) {
313            if (Character.isWhitespace(builder.charAt(index))) {
314                builder.deleteCharAt(index);
315            }
316            else if (index > 0 && builder.charAt(index) == '/'
317                    && builder.charAt(index - 1) == '*') {
318                builder.deleteCharAt(index);
319                builder.deleteCharAt(index - 1);
320                index--;
321                while (builder.charAt(index - 1) == '*') {
322                    builder.deleteCharAt(index - 1);
323                    index--;
324                }
325            }
326            else {
327                break;
328            }
329            index--;
330        }
331    }
332
333    /**
334     * Checks the comment for HTML tags that do not have a corresponding close
335     * tag or a close tag that has no previous open tag.  This code was
336     * primarily copied from the DocCheck checkHtml method.
337     *
338     * @param ast the node with the Javadoc
339     * @param comment the {@code TextBlock} which represents
340     *                 the Javadoc comment.
341     */
342    private void checkHtmlTags(final DetailAST ast, final TextBlock comment) {
343        final int lineNo = comment.getStartLineNo();
344        final Deque<HtmlTag> htmlStack = new ArrayDeque<>();
345        final String[] text = comment.getText();
346
347        final TagParser parser = new TagParser(text, lineNo);
348
349        while (parser.hasNextTag()) {
350            final HtmlTag tag = parser.nextTag();
351
352            if (tag.isIncompleteTag()) {
353                log(tag.getLineNo(), MSG_INCOMPLETE_TAG,
354                    text[tag.getLineNo() - lineNo]);
355                return;
356            }
357            if (tag.isClosedTag()) {
358                //do nothing
359                continue;
360            }
361            if (tag.isCloseTag()) {
362                // We have found a close tag.
363                if (isExtraHtml(tag.getId(), htmlStack)) {
364                    // No corresponding open tag was found on the stack.
365                    log(tag.getLineNo(),
366                        tag.getPosition(),
367                        MSG_EXTRA_HTML,
368                        tag);
369                }
370                else {
371                    // See if there are any unclosed tags that were opened
372                    // after this one.
373                    checkUnclosedTags(htmlStack, tag.getId());
374                }
375            }
376            else {
377                //We only push html tags that are allowed
378                if (isAllowedTag(tag)) {
379                    htmlStack.push(tag);
380                }
381            }
382        }
383
384        // Identify any tags left on the stack.
385        // Skip multiples, like <b>...<b>
386        String lastFound = "";
387        final List<String> typeParameters = CheckUtils.getTypeParameterNames(ast);
388        for (final HtmlTag htmlTag : htmlStack) {
389            if (!isSingleTag(htmlTag)
390                && !htmlTag.getId().equals(lastFound)
391                && !typeParameters.contains(htmlTag.getId())) {
392                log(htmlTag.getLineNo(), htmlTag.getPosition(), MSG_UNCLOSED_HTML, htmlTag);
393                lastFound = htmlTag.getId();
394            }
395        }
396    }
397
398    /**
399     * Checks to see if there are any unclosed tags on the stack.  The token
400     * represents a html tag that has been closed and has a corresponding open
401     * tag on the stack.  Any tags, except single tags, that were opened
402     * (pushed on the stack) after the token are missing a close.
403     *
404     * @param htmlStack the stack of opened HTML tags.
405     * @param token the current HTML tag name that has been closed.
406     */
407    private void checkUnclosedTags(Deque<HtmlTag> htmlStack, String token) {
408        final Deque<HtmlTag> unclosedTags = new ArrayDeque<>();
409        HtmlTag lastOpenTag = htmlStack.pop();
410        while (!token.equalsIgnoreCase(lastOpenTag.getId())) {
411            // Find unclosed elements. Put them on a stack so the
412            // output order won't be back-to-front.
413            if (isSingleTag(lastOpenTag)) {
414                lastOpenTag = htmlStack.pop();
415            }
416            else {
417                unclosedTags.push(lastOpenTag);
418                lastOpenTag = htmlStack.pop();
419            }
420        }
421
422        // Output the unterminated tags, if any
423        // Skip multiples, like <b>..<b>
424        String lastFound = "";
425        for (final HtmlTag htag : unclosedTags) {
426            lastOpenTag = htag;
427            if (lastOpenTag.getId().equals(lastFound)) {
428                continue;
429            }
430            lastFound = lastOpenTag.getId();
431            log(lastOpenTag.getLineNo(),
432                lastOpenTag.getPosition(),
433                MSG_UNCLOSED_HTML,
434                lastOpenTag);
435        }
436    }
437
438    /**
439     * Determines if the HtmlTag is one which does not require a close tag.
440     *
441     * @param tag the HtmlTag to check.
442     * @return {@code true} if the HtmlTag is a single tag.
443     */
444    private static boolean isSingleTag(HtmlTag tag) {
445        // If its a singleton tag (<p>, <br>, etc.), ignore it
446        // Can't simply not put them on the stack, since singletons
447        // like <dt> and <dd> (unhappily) may either be terminated
448        // or not terminated. Both options are legal.
449        return SINGLE_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
450    }
451
452    /**
453     * Determines if the HtmlTag is one which is allowed in a javadoc.
454     *
455     * @param tag the HtmlTag to check.
456     * @return {@code true} if the HtmlTag is an allowed html tag.
457     */
458    private static boolean isAllowedTag(HtmlTag tag) {
459        return ALLOWED_TAGS.contains(tag.getId().toLowerCase(Locale.ENGLISH));
460    }
461
462    /**
463     * Determines if the given token is an extra HTML tag. This indicates that
464     * a close tag was found that does not have a corresponding open tag.
465     *
466     * @param token an HTML tag id for which a close was found.
467     * @param htmlStack a Stack of previous open HTML tags.
468     * @return {@code false} if a previous open tag was found
469     *         for the token.
470     */
471    private static boolean isExtraHtml(String token, Deque<HtmlTag> htmlStack) {
472        boolean isExtra = true;
473        for (final HtmlTag tag : htmlStack) {
474            // Loop, looking for tags that are closed.
475            // The loop is needed in case there are unclosed
476            // tags on the stack. In that case, the stack would
477            // not be empty, but this tag would still be extra.
478            if (token.equalsIgnoreCase(tag.getId())) {
479                isExtra = false;
480                break;
481            }
482        }
483
484        return isExtra;
485    }
486
487    /**
488     * Sets the scope to check.
489     * @param from string to get the scope from
490     */
491    public void setScope(String from) {
492        scope = Scope.getInstance(from);
493    }
494
495    /**
496     * Set the excludeScope.
497     * @param excludeScope a {@code String} value
498     */
499    public void setExcludeScope(String excludeScope) {
500        this.excludeScope = Scope.getInstance(excludeScope);
501    }
502
503    /**
504     * Set the format for matching the end of a sentence.
505     * @param format format for matching the end of a sentence.
506     */
507    public void setEndOfSentenceFormat(String format) {
508        endOfSentenceFormat = format;
509    }
510
511    /**
512     * Returns a regular expression for matching the end of a sentence.
513     *
514     * @return a regular expression for matching the end of a sentence.
515     */
516    private Pattern getEndOfSentencePattern() {
517        if (endOfSentencePattern == null) {
518            endOfSentencePattern = Pattern.compile(endOfSentenceFormat);
519        }
520        return endOfSentencePattern;
521    }
522
523    /**
524     * Sets the flag that determines if the first sentence is checked for
525     * proper end of sentence punctuation.
526     * @param flag {@code true} if the first sentence is to be checked
527     */
528    public void setCheckFirstSentence(boolean flag) {
529        checkFirstSentence = flag;
530    }
531
532    /**
533     * Sets the flag that determines if HTML checking is to be performed.
534     * @param flag {@code true} if HTML checking is to be performed.
535     */
536    public void setCheckHtml(boolean flag) {
537        checkHtml = flag;
538    }
539
540    /**
541     * Sets the flag that determines if empty Javadoc checking should be done.
542     * @param flag {@code true} if empty Javadoc checking should be done.
543     */
544    public void setCheckEmptyJavadoc(boolean flag) {
545        checkEmptyJavadoc = flag;
546    }
547}