BASIS  version 1.2.3 (revision 2104)
path.h
Go to the documentation of this file.
00001 /**
00002  * @file  path.h
00003  * @brief Basic file path manipulation and related system functions.
00004  *
00005  * The implementations provided by this module are related to the manipulation
00006  * of file paths. These implementations are meant to be simple and therefore
00007  * focus on non-multibyte path strings only.
00008  *
00009  * Note that in order to improve portability and because of the main focus
00010  * on Unix-based systems, slashes (/) are used as path separators. A function
00011  * to convert a given path to its native representation is provided, however.
00012  * Further, only relative paths and absolute paths which start with a slash (/)
00013  * on Unix-based systems or a slash (/), backslash (\), or drive specification
00014  * on Windows are supported. Hence, UNC paths, the inclusion of a hostname
00015  * in the path,... are not supported. Also, masking of slashes or backslashes
00016  * is not supported either. This is just a simple implementation that should
00017  * work for most of the use cases in software written at SBIA. If not, please
00018  * contact the maintainer of the BASIS package.
00019  *
00020  * Copyright (c) 2011 University of Pennsylvania. All rights reserved.<br />
00021  * See https://www.cbica.upenn.edu/sbia/software/license.html or COPYING file.
00022  *
00023  * Contact: SBIA Group <sbia-software at uphs.upenn.edu>
00024  */
00025 
00026 #pragma once
00027 #ifndef _SBIA_BASIS_PATH_H
00028 #define _SBIA_BASIS_PATH_H
00029 
00030 
00031 #include <string>
00032 #include <set>
00033 
00034 
00035 namespace sbia
00036 {
00037 
00038 namespace basis
00039 {
00040 
00041 
00042 // ===========================================================================
00043 // constants
00044 // ===========================================================================
00045 
00046 /**
00047  * @brief Native path separator, i.e., either slash (/) or backslash (\).
00048  */
00049 extern const char cPathSeparator;
00050 
00051 /**
00052  * @brief Native path separator as string, i.e., either slash (/) or backslash (\).
00053  */
00054 extern const std::string cPathSeparatorStr;
00055 
00056 // ===========================================================================
00057 // path representations
00058 // ===========================================================================
00059 
00060 /**
00061  * @brief Whether a given string is a valid path.
00062  *
00063  * @attention This function only checks whether the string is a valid path
00064  *            identifier. It does <b>not</b> check whether the file exists.
00065  *
00066  * @sa file_exists()
00067  *
00068  * @param [in] path   The path string.
00069  * @param [in] strict Whether to be strict, i.e., whether a drive specification
00070  *                    other than "C:" is considered invalid on UNIX-based
00071  *                    systems.
00072  *
00073  * @return Whether the given string is a valid path.
00074  */
00075 bool is_valid_path(const std::string& path, bool strict = true);
00076 
00077 /**
00078  * @brief Clean path, i.e., remove occurences of "./", duplicate slashes,...
00079  *
00080  * This function removes single periods (.) enclosed by slashes or backslashes,
00081  * duplicate slashes (/) or backslashes (\), and further tries to reduce the
00082  * number of parent directory references.
00083  *
00084  * For example, "../bla//.//.\bla\\\\\bla/../.." is convert to "../bla".
00085  *
00086  * @param [in] path Path.
00087  *
00088  * @return Cleaned path.
00089  */
00090 std::string clean_path(const std::string& path);
00091 
00092 /**
00093  * @brief Convert path to Unix representation.
00094  *
00095  * @sa to_windows_path()
00096  * @sa to_native_path()
00097  * @sa is_valid_path()
00098  *
00099  * @param [in] path  Path.
00100  * @param [in] drive Whether the drive specification should be preserved.
00101  *
00102  * @return Path in Unix representation, i.e., with slashes (/) as path
00103  *         separators and without drive specification if the drive parameter
00104  *         is set to false.
00105  *
00106  * @throw std::invalid_argument if the given path is not valid.
00107  */
00108 std::string to_unix_path(const std::string& path, bool drive = false);
00109 
00110 /**
00111  * @brief Convert path to Windows representation.
00112  *
00113  * @sa to_unix_path()
00114  * @sa to_native_path()
00115  * @sa is_valid_path()
00116  *
00117  * @param [in] path Path.
00118  *
00119  * @return Path in Windows representation, i.e., with backslashes (\) as path
00120  *         separators and drive specification. If the input path does not
00121  *         specify the drive, "C:/" is used as drive specification.
00122  *
00123  * @throw std::invalid_argument if the given path is not valid.
00124  */
00125 std::string to_windows_path(const std::string& path);
00126 
00127 /**
00128  * @brief Convert path to native representation.
00129  *
00130  * In general, Unix-style paths should be preferred as these in most cases also
00131  * work on Windows.
00132  *
00133  * @sa to_unix_path()
00134  * @sa to_windows_path()
00135  * @sa is_valid_path()
00136  *
00137  * @param [in] path Path.
00138  *
00139  * @return Path in native representation, i.e., the representation used by
00140  *         the underlying operating system.
00141  *
00142  * @throw std::invalid_argument if the given path is not valid.
00143  */
00144 std::string to_native_path(const std::string& path);
00145 
00146 // ===========================================================================
00147 // working directory
00148 // ===========================================================================
00149 
00150 /**
00151  * @brief Get absolute path of the (current) working directory.
00152  *
00153  * @return Absolute path of working directory or empty string on error.
00154  */
00155 std::string get_working_directory();
00156 
00157 // ===========================================================================
00158 // path components
00159 // ===========================================================================
00160 
00161 /**
00162  * @brief Split path into its components.
00163  *
00164  * This function splits a path into the path root, the path directory, the
00165  * path base name without extension, and the file path extension including
00166  * the leading period (.). Note that if a directory path is given as input
00167  * where the name of the last directory in this path includes a period (.),
00168  * this part is falsely interpreted as file name extension. This is because
00169  * this function does not query the file system whether a given path exists
00170  * or is a directory path.
00171  *
00172  * Examples:
00173 @verbatim
00174    path                     | root   | dir           | fname       | ext
00175    -------------------------+--------+---------------+-------------+--------
00176    "/usr/bin"               | "/"    | "usr/"        | "bin"       | ""
00177    "/home/user/info.txt     | "/"    | "home/user/"  | "info"      | ".txt"
00178    "word.doc"               | "./"   | ""            | "word"      | ".doc"
00179    "../word.doc"            | "./"   | "../"         | "word"      | ".doc"
00180    "C:/WINDOWS/regedit.exe" | "C:/"  | "WINDOWS/"    | "regedit"   | ".exe"
00181    "d:\data"                | "D:/"  | ""            | "data"      | ""
00182    "/usr/local/"            | "/"    | "usr/local/"  | ""          | ""
00183 @endverbatim
00184  *
00185  * On Windows, if the path starts with a slash (/) without leading drive letter
00186  * followed by a colon (:), the returned root component is set to "C:/".
00187  * On Unix, if the path starts with a drive specification, a slash (/) is
00188  * returned as root.
00189  *
00190  * The original path (note that the resulting string will, however, not
00191  * necessarily equal the input path string) can be reassembled as follows:
00192  *
00193  * @code
00194  * std::string path = root + dir + fname + ext;
00195  * @endcode
00196  *
00197  * @sa is_valid_path
00198  *
00199  * @param [in]  path  Path.
00200  * @param [out] root  Root component of the given path. If the given path is
00201  *                    absolute, the root component is a slash (/) on
00202  *                    Unix-based systems or the drive letter followed by a
00203  *                    colon (:) and a trailing slash (/) on Windows. Otherwise,
00204  *                    the root component is the current working directory and
00205  *                    hence a period (.) followed by a slash (/) is returned.
00206  *                    If NULL is given, the root is not returned.
00207  * @param [out] dir   The directory part of the path including trailing slash
00208  *                    (/). Note that the directory component of the path
00209  *                    '/bla/bla/' is 'bla/bla/', while the directory component
00210  *                    of '/bla/bla' is 'bla/'. If NULL is given, the
00211  *                    directory component is not returned.
00212  * @param [out] fname The name of the directory or the name of the file
00213  *                    without extension. If NULL is given, the name is not
00214  *                    returned.
00215  * @param [out] ext   The file name extension. If NULL is given, the extension
00216  *                    is not returned.
00217  * @param [in]  exts  Set of additionally recognized extensions. Note that the
00218  *                    given set can contain extensions with dots (.) as part of
00219  *                    the extension, e.g., ".nii.gz". If given, the longest
00220  *                    extension from the given set which is equal to the end of
00221  *                    the file path is returned as extension. Otherwise, the part
00222  *                    after the last dot (including the dot) is considered to be
00223  *                    the file name extension.
00224  *
00225  * @throw std::invalid_argument if the given path is not valid.
00226  */
00227 void split_path(const std::string&           path,
00228                 std::string*                 root,
00229                 std::string*                 dir,
00230                 std::string*                 fname,
00231                 std::string*                 ext,
00232                 const std::set<std::string>* exts = NULL);
00233 
00234 /**
00235  * @brief Get file root.
00236  *
00237  * @sa split_path()
00238  * @sa is_valid_path()
00239  *
00240  * @param [in] path Path.
00241  *
00242  * @return Root component of file path.
00243  *
00244  * @throw std::invalid_argument if the given path is not valid.
00245  */
00246 std::string get_file_root(const std::string& path);
00247 
00248 /**
00249  * @brief Get file directory.
00250  *
00251  * @sa split_path()
00252  * @sa is_valid_path()
00253  *
00254  * @param [in] path Path.
00255  *
00256  * @return Root component plus directory component of file path without
00257  *         leading slash.
00258  *
00259  * @throw std::invalid_argument if the given path is not valid.
00260  */
00261 std::string get_file_directory(const std::string& path);
00262 
00263 /**
00264  * @brief Get file name with extension.
00265  *
00266  * @sa split_path()
00267  * @sa is_valid_path()
00268  *
00269  * @param [in] path Path.
00270  *
00271  * @return File name without directory.
00272  *
00273  * @throw std::invalid_argument if the given path is not valid.
00274  */
00275 std::string get_file_name(const std::string& path);
00276 
00277 /**
00278  * @brief Get file name without extension.
00279  *
00280  * @sa split_path()
00281  * @sa is_valid_path()
00282  *
00283  * @param [in] path Path.
00284  * @param [in] exts Set of additionally recognized extensions. Note that the
00285  *                  given set can contain extensions with dots (.) as part of
00286  *                  the extension, e.g., ".nii.gz". If given, the longest
00287  *                  extension from the given set which is equal to the end of
00288  *                  the file path is removed. Otherwise, the part after the
00289  *                  last dot (including the dot) is removed.
00290  *
00291  * @return File name without directory and extension.
00292  *
00293  * @throw std::invalid_argument if the given path is not valid.
00294  */
00295 std::string get_file_name_without_extension(const std::string&           path,
00296                                             const std::set<std::string>* exts = NULL);
00297 
00298 /**
00299  * @brief Get file name extension.
00300  *
00301  * @sa split_path()
00302  * @sa is_valid_path()
00303  *
00304  * @param [in] path Path.
00305  * @param [in] exts Set of recognized extensions. Note that the given set
00306  *                  can contain extensions with dots (.) as part of the
00307  *                  extension, e.g., ".nii.gz". If NULL or an empty set is
00308  *                  given, the part after the last dot (including the dot)
00309  *                  is considered to be the file name extension. Otherwise,
00310  *                  the longest extension from the given set which is equal
00311  *                  to the end of the file path is returned.
00312  *
00313  * @return File name extension including leading period (.).
00314  *
00315  * @throw std::invalid_argument if the given path is not valid.
00316  */
00317 std::string get_file_name_extension(const std::string&           path,
00318                                     const std::set<std::string>* exts = NULL);
00319 
00320 /**
00321  * @brief Test whether a given path has an extension.
00322  *
00323  * @param [in] path Path.
00324  * @param [in] exts Set of recognized extensions or NULL.
00325  *
00326  * @return Whether the given path has a file name extension. If @p exts is not
00327  *         NULL, this function returns true only if the file name ends in one
00328  *         of the specified extensions (including dot if required). Otherwise,
00329  *         it only checks if the path has a dot (.) in the file name.
00330  */
00331 bool has_extension(const std::string& path, const std::set<std::string>* exts = NULL);
00332 
00333 // ===========================================================================
00334 // absolute / relative paths
00335 // ===========================================================================
00336 
00337 /**
00338  * @brief Test whether a given path is absolute.
00339  *
00340  * @sa is_valid_path()
00341  *
00342  * @param path [in] Absolute or relative path.
00343  *
00344  * @return Whether the given path is absolute.
00345  *
00346  * @throw std::invalid_argument if the given path is not valid.
00347  */
00348 bool is_absolute(const std::string& path);
00349 
00350 /**
00351  * @brief Test whether a given path is relative.
00352  *
00353  * @sa is_valid_path()
00354  *
00355  * @param path [in] Absolute or relative path.
00356  *
00357  * @return Whether the given path is relative.
00358  *
00359  * @throw std::invalid_argument if the given path is not valid.
00360  */
00361 bool is_relative(const std::string& path);
00362 
00363 /**
00364  * @brief Get absolute path given a relative path.
00365  *
00366  * This function converts a relative path to an absolute path. If the given
00367  * path is already absolute, this path is passed through unchanged.
00368  *
00369  * @sa is_valid_path()
00370  *
00371  * @param [in] path Absolute or relative path.
00372  *
00373  * @return Absolute path.
00374  *
00375  * @throw std::invalid_argument if the given path is not valid.
00376  */
00377 std::string to_absolute_path(const std::string& path);
00378 
00379 /**
00380  * @brief Get absolute path given a relative path and a base path.
00381  *
00382  * This function converts a relative path to an absolute path. If the given
00383  * path is already absolute, this path is passed through unchanged.
00384  *
00385  * @sa is_valid_path()
00386  *
00387  * @param [in] base Base path used for relative path.
00388  * @param [in] path Absolute or relative path.
00389  *
00390  * @return Absolute path.
00391  *
00392  * @throw std::invalid_argument if the given path is not valid.
00393  */
00394 std::string to_absolute_path(const std::string& base, const std::string& path);
00395 
00396 /**
00397  * @brief Get path relative to current working directory.
00398  *
00399  * This function converts a path to a path relative to a the current working
00400  * directory. If the input path is relative, it is first made absolute using
00401  * the current working directory and then made relative to the given base path.
00402  *
00403  * @sa is_valid_path()
00404  *
00405  * @param [in] path Absolute or relative path.
00406  *
00407  * @return Path relative to current working directory.
00408  *
00409  * @throw std::invalid_argument if the given path is not valid.
00410  */
00411 std::string to_relative_path(const std::string& path);
00412 
00413 /**
00414  * @brief Get path relative to given absolute path.
00415  *
00416  * This function converts a path to a path relative to a given base path.
00417  * If the input path is relative, it is first made absolute using the
00418  * current working directory and then made relative to the given base path.
00419  *
00420  * @sa is_valid_path()
00421  *
00422  * @param [in] base Base path used for relative path.
00423  * @param [in] path Absolute or relative path.
00424  *
00425  * @return Path relative to base path.
00426  *
00427  * @throw std::invalid_argument if the given path is not valid.
00428  */
00429 std::string to_relative_path(const std::string& base, const std::string& path);
00430 
00431 /**
00432  * @brief Join two paths, e.g., base path and relative path.
00433  *
00434  * This function joins two paths. If the second path is an absolute path,
00435  * this cleaned absolute path is returned. Otherwise, the base path is
00436  * prepended to the relative path and the resulting relative or absolute
00437  * path returned.
00438  *
00439  * @param [in] base Base path.
00440  * @param [in] path Relative or absolute path.
00441  *
00442  * @return Joined path.
00443  */
00444 std::string join_paths(const std::string& base, const std::string& path);
00445 
00446 // ===========================================================================
00447 // file / directory checks
00448 // ===========================================================================
00449 
00450 /**
00451  * @brief Test the existance of a file or directory.
00452  *
00453  * @param [in] path File or directory path.
00454  *
00455  * @return Whether the given file or directory is an exists.
00456  */
00457 bool exists(const std::string path);
00458 
00459 /**
00460  * @brief Test whether a given path is the path of an existent file.
00461  *
00462  * @note This function follows symbolic links.
00463  *
00464  * @param [in] path File path.
00465  *
00466  * @return Whether the given path is an existent file.
00467  */
00468 bool is_file(const std::string path);
00469 
00470 /**
00471  * @brief Test whether a given path is the path of an existent directory.
00472  *
00473  * @note This function follows symbolic links.
00474  *
00475  * @param [in] path Directory path.
00476  *
00477  * @return Whether the given path is an existent directory.
00478  */
00479 bool is_dir(const std::string path);
00480 
00481 /**
00482  * @brief Whether a given path is a symbolic link.
00483  *
00484  * @sa is_valid_path()
00485  *
00486  * @param [in] path Path.
00487  *
00488  * @return Whether the given path denotes a symbolic link.
00489  *
00490  * @throw std::invalid_argument if the given path is not valid.
00491  */
00492 bool is_symlink(const std::string& path);
00493 
00494 // ===========================================================================
00495 // make directory
00496 // ===========================================================================
00497 
00498 /**
00499  * @brief Make directory optionally including parent directories.
00500  *
00501  * @param path   Path of the directory.
00502  * @param parent Whether to also create parent directories if none existent.
00503  *
00504  * @returns Whether the directory was created successfully.
00505  *          Note that on Unix, the created directories are created using
00506  *          mode 0755. On Windows, the default security descriptor is passed
00507  *          on to the CreateDirectory() function.
00508  */
00509 bool make_directory(const std::string& path, bool parent = true);
00510 
00511 /**
00512  * @brief Remove directory.
00513  *
00514  * @param path      Path of the directory.
00515  * @param recursive Remove subdirectories and files inside the directory.
00516  *
00517  * @returns Whether the directory was removed successfully.
00518  */
00519 bool remove_directory(const std::string& path, bool recursive = false);
00520 
00521 /**
00522  * @brief Remove files and directories from directory.
00523  *
00524  * @param path Path of the directory.
00525  *
00526  * @returns Whether the directory was cleared successfully, i.e., leaving
00527  *          the directory @p path empty.
00528  */
00529 bool clear_directory(const std::string& path);
00530 
00531 // ===========================================================================
00532 // symbolic links
00533 // ===========================================================================
00534 
00535 /**
00536  * @brief Read value of symbolic link.
00537  *
00538  * @param [in]  link  Path of symbolic link.
00539  * @param [out] value Value of symbolic link.
00540  *
00541  * @return Whether the given path is a symbolic link and its value could be
00542  *         read and returned successfully.
00543  */
00544 bool read_symlink(const std::string& link, std::string& value);
00545 
00546 /**
00547  * @brief Get canonical file path.
00548  *
00549  * This function resolves symbolic links and returns a cleaned path.
00550  *
00551  * @sa read_symlink()
00552  * @sa clean_path()
00553  *
00554  * @param [in] path Path.
00555  *
00556  * @return Canonical file path without duplicate slashes, ".", "..", and
00557  *         symbolic links.
00558  */
00559 std::string get_real_path(const std::string& path);
00560 
00561 // ===========================================================================
00562 // executable file
00563 // ===========================================================================
00564 
00565 /**
00566  * @brief Get canonical path of executable file.
00567  *
00568  * @sa get_executable_directory()
00569  * @sa get_executable_name()
00570  *
00571  * @return Canonical path of executable file or empty string on error.
00572  */
00573 std::string get_executable_path();
00574 
00575 /**
00576  * @brief Get canonical path of directory containing executable file.
00577  *
00578  * @sa get_executable_path()
00579  * @sa get_executable_name()
00580  *
00581  * @return Canonical path of directory containing executable file.
00582  */
00583 std::string get_executable_directory();
00584 
00585 /**
00586  * @brief Get name of executable.
00587  *
00588  * @note The name of the executable may or may not include the file name
00589  *       extension depending on the executable type and operating system.
00590  *       Hence, this function is neither an equivalent to
00591  *       get_file_name(get_executable_path()) nor
00592  *       get_file_name_without_extension(get_executable_path()).
00593  *
00594  * @sa get_executable_path()
00595  * @sa get_executable_directory()
00596  *
00597  * @return Name of the executable derived from the executable's file path
00598  *         or empty string on error.
00599  */
00600 std::string get_executable_name();
00601 
00602 
00603 } // namespace basis
00604 
00605 } // namespace sbia
00606 
00607 
00608 #endif // _SBIA_BASIS_PATH_H