Subversion
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
svn_path.h
Go to the documentation of this file.
1 /**
2  * @copyright
3  * ====================================================================
4  * Licensed to the Apache Software Foundation (ASF) under one
5  * or more contributor license agreements. See the NOTICE file
6  * distributed with this work for additional information
7  * regarding copyright ownership. The ASF licenses this file
8  * to you under the Apache License, Version 2.0 (the
9  * "License"); you may not use this file except in compliance
10  * with the License. You may obtain a copy of the License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing,
15  * software distributed under the License is distributed on an
16  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17  * KIND, either express or implied. See the License for the
18  * specific language governing permissions and limitations
19  * under the License.
20  * ====================================================================
21  * @endcopyright
22  *
23  * @file svn_path.h
24  * @brief A path manipulation library
25  *
26  * All incoming and outgoing paths are non-NULL and in UTF-8, unless
27  * otherwise documented.
28  *
29  * No result path ever ends with a separator, no matter whether the
30  * path is a file or directory, because we always canonicalize() it.
31  *
32  * Nearly all the @c svn_path_xxx functions expect paths passed into
33  * them to be in canonical form as defined by the Subversion path
34  * library itself. The only functions which do *not* have such
35  * expectations are:
36  *
37  * - @c svn_path_canonicalize()
38  * - @c svn_path_is_canonical()
39  * - @c svn_path_internal_style()
40  * - @c svn_path_uri_encode()
41  *
42  * For the most part, we mean what most anyone would mean when talking
43  * about canonical paths, but to be on the safe side, you must run
44  * your paths through @c svn_path_canonicalize() before passing them to
45  * other functions in this API.
46  */
47 
48 #ifndef SVN_PATH_H
49 #define SVN_PATH_H
50 
51 #include <apr.h>
52 #include <apr_pools.h>
53 #include <apr_tables.h>
54 
55 #include "svn_types.h"
56 #include "svn_string.h"
57 #include "svn_dirent_uri.h"
58 
59 
60 #ifdef __cplusplus
61 extern "C" {
62 #endif /* __cplusplus */
63 
64 
65 
66 /** Convert @a path from the local style to the canonical internal style.
67  *
68  * @deprecated Provided for backward compatibility with the 1.6 API.
69  * New code should use svn_dirent_internal_style().
70  */
72 const char *
73 svn_path_internal_style(const char *path, apr_pool_t *pool);
74 
75 /** Convert @a path from the canonical internal style to the local style.
76  *
77  * @deprecated Provided for backward compatibility with the 1.6 API.
78  * New code should use svn_dirent_local_style().
79  */
81 const char *
82 svn_path_local_style(const char *path, apr_pool_t *pool);
83 
84 
85 /** Join a base path (@a base) with a component (@a component), allocating
86  * the result in @a pool. @a component need not be a single component: it
87  * can be any path, absolute or relative to @a base.
88  *
89  * If either @a base or @a component is the empty path, then the other
90  * argument will be copied and returned. If both are the empty path the
91  * empty path is returned.
92  *
93  * If the @a component is an absolute path, then it is copied and returned.
94  * Exactly one slash character ('/') is used to join the components,
95  * accounting for any trailing slash in @a base.
96  *
97  * Note that the contents of @a base are not examined, so it is possible to
98  * use this function for constructing URLs, or for relative URLs or
99  * repository paths.
100  *
101  * This function is NOT appropriate for native (local) file
102  * paths. Only for "internal" canonicalized paths, since it uses '/'
103  * for the separator. Further, an absolute path (for @a component) is
104  * based on a leading '/' character. Thus, an "absolute URI" for the
105  * @a component won't be detected. An absolute URI can only be used
106  * for the base.
107  *
108  * @deprecated Provided for backward compatibility with the 1.6 API.
109  * New code should use svn_dirent_join(), svn_uri_join(),
110  * svn_relpath_join() or svn_fspath__join().
111  */
113 char *
114 svn_path_join(const char *base, const char *component, apr_pool_t *pool);
115 
116 /** Join multiple components onto a @a base path, allocated in @a pool. The
117  * components are terminated by a @c NULL.
118  *
119  * If any component is the empty string, it will be ignored.
120  *
121  * If any component is an absolute path, then it resets the base and
122  * further components will be appended to it.
123  *
124  * This function does not support URLs.
125  *
126  * See svn_path_join() for further notes about joining paths.
127  *
128  * @deprecated Provided for backward compatibility with the 1.6 API.
129  * For new code, consider using svn_dirent_join_many() or a sequence of
130  * calls to one of the *_join() functions.
131  */
133 char *
134 svn_path_join_many(apr_pool_t *pool, const char *base, ...);
135 
136 
137 /** Get the basename of the specified canonicalized @a path. The
138  * basename is defined as the last component of the path (ignoring any
139  * trailing slashes). If the @a path is root ("/"), then that is
140  * returned. Otherwise, the returned value will have no slashes in
141  * it.
142  *
143  * Example: svn_path_basename("/foo/bar") -> "bar"
144  *
145  * The returned basename will be allocated in @a pool.
146  *
147  * @note If an empty string is passed, then an empty string will be returned.
148  *
149  * @deprecated Provided for backward compatibility with the 1.6 API.
150  * New code should use svn_dirent_basename(), svn_uri_basename(),
151  * svn_relpath_basename() or svn_fspath__basename().
152  */
154 char *
155 svn_path_basename(const char *path, apr_pool_t *pool);
156 
157 /** Get the dirname of the specified canonicalized @a path, defined as
158  * the path with its basename removed. If @a path is root ("/"), it is
159  * returned unchanged.
160  *
161  * The returned dirname will be allocated in @a pool.
162  *
163  * @deprecated Provided for backward compatibility with the 1.6 API.
164  * New code should use svn_dirent_dirname(), svn_uri_dirname(),
165  * svn_relpath_dirname() or svn_fspath__dirname().
166  */
168 char *
169 svn_path_dirname(const char *path, apr_pool_t *pool);
170 
171 /** Split @a path into a root portion and an extension such that
172  * the root + the extension = the original path, and where the
173  * extension contains no period (.) characters. If not @c NULL, set
174  * @a *path_root to the root portion. If not @c NULL, set
175  * @a *path_ext to the extension (or "" if there is no extension
176  * found). Allocate both @a *path_root and @a *path_ext in @a pool.
177  *
178  * @since New in 1.5.
179  */
180 void
181 svn_path_splitext(const char **path_root, const char **path_ext,
182  const char *path, apr_pool_t *pool);
183 
184 /** Return the number of components in the canonicalized @a path.
185  *
186  * @since New in 1.1.
187 */
188 apr_size_t
189 svn_path_component_count(const char *path);
190 
191 /** Add a @a component (a NULL-terminated C-string) to the
192  * canonicalized @a path. @a component is allowed to contain
193  * directory separators.
194  *
195  * If @a path is non-empty, append the appropriate directory separator
196  * character, and then @a component. If @a path is empty, simply set it to
197  * @a component; don't add any separator character.
198  *
199  * If the result ends in a separator character, then remove the separator.
200  */
201 void
202 svn_path_add_component(svn_stringbuf_t *path, const char *component);
203 
204 /** Remove one component off the end of the canonicalized @a path. */
205 void
207 
208 /** Remove @a n components off the end of the canonicalized @a path.
209  * Equivalent to calling svn_path_remove_component() @a n times.
210  *
211  * @since New in 1.1.
212  */
213 void
214 svn_path_remove_components(svn_stringbuf_t *path, apr_size_t n);
215 
216 /** Divide the canonicalized @a path into @a *dirpath and @a
217  * *base_name, allocated in @a pool.
218  *
219  * If @a dirpath or @a base_name is NULL, then don't set that one.
220  *
221  * Either @a dirpath or @a base_name may be @a path's own address, but they
222  * may not both be the same address, or the results are undefined.
223  *
224  * If @a path has two or more components, the separator between @a dirpath
225  * and @a base_name is not included in either of the new names.
226  *
227  * examples:
228  * - <pre>"/foo/bar/baz" ==> "/foo/bar" and "baz"</pre>
229  * - <pre>"/bar" ==> "/" and "bar"</pre>
230  * - <pre>"/" ==> "/" and "/"</pre>
231  * - <pre>"X:/" ==> "X:/" and "X:/"</pre>
232  * - <pre>"bar" ==> "" and "bar"</pre>
233  * - <pre>"" ==> "" and ""</pre>
234  *
235  * @deprecated Provided for backward compatibility with the 1.6 API.
236  * New code should use svn_dirent_split(), svn_uri_split(),
237  * svn_relpath_split() or svn_fspath__split().
238  */
240 void
241 svn_path_split(const char *path,
242  const char **dirpath,
243  const char **base_name,
244  apr_pool_t *pool);
245 
246 
247 /** Return non-zero iff @a path is empty ("") or represents the current
248  * directory -- that is, if prepending it as a component to an existing
249  * path would result in no meaningful change.
250  */
251 int
252 svn_path_is_empty(const char *path);
253 
254 
255 #ifndef SVN_DIRENT_URI_H
256 /* This declaration has been moved to svn_dirent_uri.h, and remains
257  here only for compatibility reasons. */
259 svn_dirent_is_root(const char *dirent, apr_size_t len);
260 #endif /* SVN_DIRENT_URI_H */
261 
262 
263 /** Return a new path (or URL) like @a path, but transformed such that
264  * some types of path specification redundancies are removed.
265  *
266  * This involves collapsing redundant "/./" elements, removing
267  * multiple adjacent separator characters, removing trailing
268  * separator characters, and possibly other semantically inoperative
269  * transformations.
270  *
271  * Convert the scheme and hostname to lowercase (see issue #2475)
272  *
273  * The returned path may be statically allocated, equal to @a path, or
274  * allocated from @a pool.
275  *
276  * @deprecated Provided for backward compatibility with the 1.6 API.
277  * New code should use svn_dirent_canonicalize(), svn_uri_canonicalize(),
278  * svn_relpath_canonicalize() or svn_fspath__canonicalize().
279  */
281 const char *
282 svn_path_canonicalize(const char *path, apr_pool_t *pool);
283 
284 /** Return @c TRUE iff path is canonical. Use @a pool for temporary
285  * allocations.
286  *
287  * @since New in 1.5.
288  * @deprecated Provided for backward compatibility with the 1.6 API.
289  * New code should use svn_dirent_is_canonical(), svn_uri_is_canonical(),
290  * svn_relpath_is_canonical() or svn_fspath__is_canonical().
291  */
294 svn_path_is_canonical(const char *path, apr_pool_t *pool);
295 
296 
297 /** Return an integer greater than, equal to, or less than 0, according
298  * as @a path1 is greater than, equal to, or less than @a path2.
299  */
300 int
301 svn_path_compare_paths(const char *path1, const char *path2);
302 
303 
304 /** Return the longest common path shared by two canonicalized paths,
305  * @a path1 and @a path2. If there's no common ancestor, return the
306  * empty path.
307  *
308  * @a path1 and @a path2 may be URLs. In order for two URLs to have
309  * a common ancestor, they must (a) have the same protocol (since two URLs
310  * with the same path but different protocols may point at completely
311  * different resources), and (b) share a common ancestor in their path
312  * component, i.e. 'protocol://' is not a sufficient ancestor.
313  *
314  * @deprecated Provided for backward compatibility with the 1.6 API.
315  * New code should use svn_dirent_get_longest_ancestor(),
316  * svn_uri_get_longest_ancestor(), svn_relpath_get_longest_ancestor() or
317  * svn_fspath__get_longest_ancestor().
318  */
320 char *
321 svn_path_get_longest_ancestor(const char *path1,
322  const char *path2,
323  apr_pool_t *pool);
324 
325 /** Convert @a relative canonicalized path to an absolute path and
326  * return the results in @a *pabsolute, allocated in @a pool.
327  *
328  * @a relative may be a URL, in which case no attempt is made to convert it,
329  * and a copy of the URL is returned.
330  *
331  * @deprecated Provided for backward compatibility with the 1.6 API.
332  * New code should use svn_dirent_get_absolute() on a non-URL input.
333  */
335 svn_error_t *
336 svn_path_get_absolute(const char **pabsolute,
337  const char *relative,
338  apr_pool_t *pool);
339 
340 /** Return the path part of the canonicalized @a path in @a
341  * *pdirectory, and the file part in @a *pfile. If @a path is a
342  * directory, set @a *pdirectory to @a path, and @a *pfile to the
343  * empty string. If @a path does not exist it is treated as if it is
344  * a file, since directories do not normally vanish.
345  *
346  * @deprecated Provided for backward compatibility with the 1.6 API.
347  * New code should implement the required logic directly; no direct
348  * replacement is provided.
349  */
351 svn_error_t *
352 svn_path_split_if_file(const char *path,
353  const char **pdirectory,
354  const char **pfile,
355  apr_pool_t *pool);
356 
357 /** Find the common prefix of the canonicalized paths in @a targets
358  * (an array of <tt>const char *</tt>'s), and remove redundant paths if @a
359  * remove_redundancies is TRUE.
360  *
361  * - Set @a *pcommon to the absolute path of the path or URL common to
362  * all of the targets. If the targets have no common prefix, or
363  * are a mix of URLs and local paths, set @a *pcommon to the
364  * empty string.
365  *
366  * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
367  * to an array of targets relative to @a *pcommon, and if
368  * @a remove_redundancies is TRUE, omit any paths/URLs that are
369  * descendants of another path/URL in @a targets. If *pcommon
370  * is empty, @a *pcondensed_targets will contain full URLs and/or
371  * absolute paths; redundancies can still be removed (from both URLs
372  * and paths). If @a pcondensed_targets is NULL, leave it alone.
373  *
374  * Else if there is exactly one target, then
375  *
376  * - Set @a *pcommon to that target, and
377  *
378  * - If @a pcondensed_targets is non-NULL, set @a *pcondensed_targets
379  * to an array containing zero elements. Else if
380  * @a pcondensed_targets is NULL, leave it alone.
381  *
382  * If there are no items in @a targets, set @a *pcommon and (if
383  * applicable) @a *pcondensed_targets to @c NULL.
384  *
385  * @note There is no guarantee that @a *pcommon is within a working
386  * copy.
387  *
388  * @deprecated Provided for backward compatibility with the 1.6 API.
389  * New code should use svn_dirent_condense_targets() or
390  * svn_uri_condense_targets().
391  */
393 svn_error_t *
394 svn_path_condense_targets(const char **pcommon,
395  apr_array_header_t **pcondensed_targets,
396  const apr_array_header_t *targets,
397  svn_boolean_t remove_redundancies,
398  apr_pool_t *pool);
399 
400 
401 /** Copy a list of canonicalized @a targets, one at a time, into @a
402  * pcondensed_targets, omitting any targets that are found earlier in
403  * the list, or whose ancestor is found earlier in the list. Ordering
404  * of targets in the original list is preserved in the condensed list
405  * of targets. Use @a pool for any allocations.
406  *
407  * How does this differ in functionality from svn_path_condense_targets()?
408  *
409  * Here's the short version:
410  *
411  * 1. Disclaimer: if you wish to debate the following, talk to Karl. :-)
412  * Order matters for updates because a multi-arg update is not
413  * atomic, and CVS users are used to, when doing 'cvs up targetA
414  * targetB' seeing targetA get updated, then targetB. I think the
415  * idea is that if you're in a time-sensitive or flaky-network
416  * situation, a user can say, "I really *need* to update
417  * wc/A/D/G/tau, but I might as well update my whole working copy if
418  * I can." So that user will do 'svn up wc/A/D/G/tau wc', and if
419  * something dies in the middles of the 'wc' update, at least the
420  * user has 'tau' up-to-date.
421  *
422  * 2. Also, we have this notion of an anchor and a target for updates
423  * (the anchor is where the update editor is rooted, the target is
424  * the actual thing we want to update). I needed a function that
425  * would NOT screw with my input paths so that I could tell the
426  * difference between someone being in A/D and saying 'svn up G' and
427  * being in A/D/G and saying 'svn up .' -- believe it or not, these
428  * two things don't mean the same thing. svn_path_condense_targets()
429  * plays with absolute paths (which is fine, so does
430  * svn_path_remove_redundancies()), but the difference is that it
431  * actually tweaks those targets to be relative to the "grandfather
432  * path" common to all the targets. Updates don't require a
433  * "grandfather path" at all, and even if it did, the whole
434  * conversion to an absolute path drops the crucial difference
435  * between saying "i'm in foo, update bar" and "i'm in foo/bar,
436  * update '.'"
437  */
438 svn_error_t *
439 svn_path_remove_redundancies(apr_array_header_t **pcondensed_targets,
440  const apr_array_header_t *targets,
441  apr_pool_t *pool);
442 
443 
444 /** Decompose the canonicalized @a path into an array of <tt>const
445  * char *</tt> components, allocated in @a pool. If @a path is
446  * absolute, the first component will be a lone dir separator (the
447  * root directory).
448  */
449 apr_array_header_t *
450 svn_path_decompose(const char *path, apr_pool_t *pool);
451 
452 /** Join an array of <tt>const char *</tt> components into a '/'
453  * separated path, allocated in @a pool. The joined path is absolute if
454  * the first component is a lone dir separator.
455  *
456  * Calling svn_path_compose() on the output of svn_path_decompose()
457  * will return the exact same path.
458  *
459  * @since New in 1.5.
460  */
461 const char *
462 svn_path_compose(const apr_array_header_t *components, apr_pool_t *pool);
463 
464 /** Test that @a name is a single path component, that is:
465  * - not @c NULL or empty.
466  * - not a `/'-separated directory path
467  * - not empty or `..'
468  */
470 svn_path_is_single_path_component(const char *name);
471 
472 
473 /**
474  * Test to see if a backpath, i.e. '..', is present in @a path.
475  * If not, return @c FALSE.
476  * If so, return @c TRUE.
477  *
478  * @since New in 1.1.
479  */
481 svn_path_is_backpath_present(const char *path);
482 
483 
484 /**
485  * Test to see if a dotpath, i.e. '.', is present in @a path.
486  * If not, return @c FALSE.
487  * If so, return @c TRUE.
488  *
489  * @since New in 1.6.
490  */
492 svn_path_is_dotpath_present(const char *path);
493 
494 
495 /** Test if @a path2 is a child of @a path1.
496  * If not, return @c NULL.
497  * If so, return a copy of the remainder path, allocated in @a pool.
498  * (The remainder is the component which, added to @a path1, yields
499  * @a path2. The remainder does not begin with a dir separator.)
500  *
501  * Both paths must be in canonical form, and must either be absolute,
502  * or contain no ".." components.
503  *
504  * If @a path2 is the same as @a path1, it is not considered a child, so the
505  * result is @c NULL; an empty string is never returned.
506  *
507  * @note In 1.5 this function has been extended to allow a @c NULL @a pool
508  * in which case a pointer into @a path2 will be returned to
509  * identify the remainder path.
510  *
511  * @deprecated Provided for backward compatibility with the 1.6 API.
512  * New code should use svn_dirent_is_child(), svn_uri_is_child(),
513  * svn_relpath_is_child() or svn_fspath__is_child().
514  */
516 const char *
517 svn_path_is_child(const char *path1, const char *path2, apr_pool_t *pool);
518 
519 /** Return TRUE if @a path1 is an ancestor of @a path2 or the paths are equal
520  * and FALSE otherwise.
521  *
522  * @since New in 1.3.
523  *
524  * @deprecated Provided for backward compatibility with the 1.6 API.
525  * New code should use svn_dirent_is_ancestor(), svn_uri_is_ancestor(),
526  * svn_relpath_is_ancestor() or svn_fspath__is_ancestor().
527  */
530 svn_path_is_ancestor(const char *path1, const char *path2);
531 
532 /**
533  * Check whether @a path is a valid Subversion path.
534  *
535  * A valid Subversion pathname is a UTF-8 string without control
536  * characters. "Valid" means Subversion can store the pathname in
537  * a repository. There may be other, OS-specific, limitations on
538  * what paths can be represented in a working copy.
539  *
540  * ASSUMPTION: @a path is a valid UTF-8 string. This function does
541  * not check UTF-8 validity.
542  *
543  * Return @c SVN_NO_ERROR if valid and @c SVN_ERR_FS_PATH_SYNTAX if
544  * invalid.
545  *
546  * @note Despite returning an @c SVN_ERR_FS_* error, this function has
547  * nothing to do with the versioned filesystem's concept of validity.
548  *
549  * @since New in 1.2.
550  */
551 svn_error_t *
552 svn_path_check_valid(const char *path, apr_pool_t *pool);
553 
554 
555 /** URI/URL stuff
556  *
557  * @defgroup svn_path_uri_stuff URI/URL conversion
558  * @{
559  */
560 
561 /** Return TRUE iff @a path looks like a valid absolute URL. */
563 svn_path_is_url(const char *path);
564 
565 /** Return @c TRUE iff @a path is URI-safe, @c FALSE otherwise. */
567 svn_path_is_uri_safe(const char *path);
568 
569 /** Return a URI-encoded copy of @a path, allocated in @a pool. (@a
570  path can be an arbitrary UTF-8 string and does not have to be a
571  canonical path.) */
572 const char *
573 svn_path_uri_encode(const char *path, apr_pool_t *pool);
574 
575 /** Return a URI-decoded copy of @a path, allocated in @a pool. */
576 const char *
577 svn_path_uri_decode(const char *path, apr_pool_t *pool);
578 
579 /** Extend @a url by @a component, URI-encoding that @a component
580  * before adding it to the @a url; return the new @a url, allocated in
581  * @a pool. If @a component is @c NULL, just return a copy of @a url,
582  * allocated in @a pool.
583  *
584  * @a component need not be a single path segment, but if it contains
585  * multiple segments, they must be separated by '/'. @a component
586  * should not begin with '/', however; if it does, the behavior is
587  * undefined.
588  *
589  * @a url must be in canonical format; it may not have a trailing '/'.
590  *
591  * @note To add a component that is already URI-encoded, use
592  * <tt>svn_path_join(url, component, pool)</tt> instead.
593  *
594  * @note gstein suggests this for when @a component begins with '/':
595  *
596  * "replace the path entirely
597  * https://example.com:4444/base/path joined with /leading/slash,
598  * should return: https://example.com:4444/leading/slash
599  * per the RFCs on combining URIs"
600  *
601  * We may implement that someday, which is why leading '/' is
602  * merely undefined right now.
603  *
604  * @since New in 1.6.
605  */
606 const char *
607 svn_path_url_add_component2(const char *url,
608  const char *component,
609  apr_pool_t *pool);
610 
611 /** Like svn_path_url_add_component2(), but allows path components that
612  * end with a trailing '/'
613  *
614  * @deprecated Provided for backward compatibility with the 1.5 API.
615  */
617 const char *
618 svn_path_url_add_component(const char *url,
619  const char *component,
620  apr_pool_t *pool);
621 
622 /**
623  * Convert @a iri (Internationalized URI) to an URI.
624  * The return value may be the same as @a iri if it was already
625  * a URI. Else, allocate the return value in @a pool.
626  *
627  * @since New in 1.1.
628  */
629 const char *
630 svn_path_uri_from_iri(const char *iri, apr_pool_t *pool);
631 
632 /**
633  * URI-encode certain characters in @a uri that are not valid in an URI, but
634  * doesn't have any special meaning in @a uri at their positions. If no
635  * characters need escaping, just return @a uri.
636  *
637  * @note Currently, this function escapes <, >, ", space, {, }, |, \, ^, and `.
638  * This may be extended in the future to do context-dependent escaping.
639  *
640  * @since New in 1.1.
641  */
642 const char *
643 svn_path_uri_autoescape(const char *uri, apr_pool_t *pool);
644 
645 /** @} */
646 
647 /** Charset conversion stuff
648  *
649  * @defgroup svn_path_charset_stuff Charset conversion
650  * @{
651  */
652 
653 /** Convert @a path_utf8 from UTF-8 to the internal encoding used by APR. */
654 svn_error_t *
655 svn_path_cstring_from_utf8(const char **path_apr,
656  const char *path_utf8,
657  apr_pool_t *pool);
658 
659 /** Convert @a path_apr from the internal encoding used by APR to UTF-8. */
660 svn_error_t *
661 svn_path_cstring_to_utf8(const char **path_utf8,
662  const char *path_apr,
663  apr_pool_t *pool);
664 
665 
666 /** @} */
667 
668 #ifdef __cplusplus
669 }
670 #endif /* __cplusplus */
671 
672 
673 #endif /* SVN_PATH_H */