BASIS  r3148
path.cxx
Go to the documentation of this file.
00001 /**
00002  * @file  path.cxx
00003  * @brief File/directory path related functions.
00004  *
00005  * Copyright (c) 2011, 2012 University of Pennsylvania. All rights reserved.<br />
00006  * See https://www.cbica.upenn.edu/sbia/software/license.html or COPYING file.
00007  *
00008  * Contact: SBIA Group <sbia-software at uphs.upenn.edu>
00009  */
00010 
00011 
00012 #include <vector>
00013 
00014 #include <basis/config.h> // platform macros - must be first
00015 
00016 #include <stdlib.h>       // malloc(), free(), _splitpath_s() (WINDOWS)
00017 #include <string.h>       // strncmp()
00018 #include <cctype>         // toupper()
00019 #include <algorithm>      // transform()
00020 
00021 #if WINDOWS
00022 #  include <windows.h>    // GetFileAttributes()
00023 #else
00024 #  include <sys/stat.h>   // stat(), lstat()
00025 #endif
00026 
00027 #include <basis/except.h> // to throw exceptions
00028 
00029 #include <basis/os.h>
00030 #include <basis/os/path.h>
00031 
00032 
00033 // acceptable in .cxx file
00034 using namespace std;
00035 
00036 
00037 namespace basis { namespace os { namespace path {
00038 
00039 
00040 // ===========================================================================
00041 // representation
00042 // ===========================================================================
00043 
00044 #if WINDOWS
00045     static const char  cSeparator  = '\\';
00046     static const char* cSeparators = "\\/";
00047 #else
00048     static const char  cSeparator  = '/';
00049     static const char* cSeparators = "/";
00050 #endif
00051 
00052 // ---------------------------------------------------------------------------
00053 inline bool issep(char c)
00054 {
00055     #if WINDOWS
00056         return c == '/' || c == '\\';
00057     #else
00058         return c == '/';
00059     #endif
00060 }
00061 
00062 // ---------------------------------------------------------------------------
00063 static inline string replace(string str, char from, char to)
00064 {
00065     string res(str.size(), '\0');
00066     string::const_iterator in  = str.begin();
00067     string::iterator       out = res.begin();
00068     while (in != str.end()) {
00069         if (*in == from) *out = to;
00070         else             *out = *in;
00071         in++; out++;
00072     }
00073     return res;
00074 }
00075 
00076 // ---------------------------------------------------------------------------
00077 string normpath(const string& path)
00078 {
00079     if (path.empty()) return "";
00080     char drive[3] = {'\0', ':', '\0'};
00081     size_t i = 0;
00082     #if WINDOWS
00083         if (path.size() > 1 && path[1] == ':') {
00084             drive[0] = path[0];
00085             i = 2;
00086         }
00087     #endif
00088     string norm_path = drive;
00089     bool abs = issep(path[i]);
00090     if (abs) {
00091         #if WINDOWS
00092             while (i <= path.size() && issep(path[i])) {
00093                 norm_path += cSeparator;
00094                 i++;
00095             }
00096         #else
00097             norm_path += cSeparator;
00098         #endif
00099     }
00100     string         current;
00101     vector<string> parts;
00102     while (i <= path.size()) {
00103         if (issep(path[i]) || path[i] == '\0') {
00104             if (current == "..") {
00105                 if (!abs && (parts.empty() || parts.back() == "..")) {
00106                     parts.push_back(current);
00107                 } else if (!parts.empty()) {
00108                     parts.pop_back();
00109                 }
00110             } else if (current != "" && current != ".") {
00111                 parts.push_back(current);
00112             }
00113             current.clear();
00114         } else {
00115             current += path[i];
00116         }
00117         i++;
00118     }
00119     for (i = 0; i < parts.size(); i++) {
00120         norm_path = join(norm_path, parts[i]);
00121     }
00122     return norm_path.empty() ? "." : norm_path;
00123 }
00124 
00125 // ---------------------------------------------------------------------------
00126 string posixpath(const string& path)
00127 {
00128     #if WINDOWS
00129         string norm_path = path;
00130     #else
00131         string norm_path = replace(path, '\\', '/');
00132     #endif
00133     norm_path = normpath(norm_path);
00134     #if WINDOWS
00135         norm_path = replace(norm_path, '\\', '/');
00136     #endif
00137     return norm_path;
00138 }
00139 
00140 // ---------------------------------------------------------------------------
00141 string ntpath(const string& path)
00142 {
00143     #if WINDOWS
00144         string norm_path = path;
00145     #else
00146         string norm_path = replace(path, '\\', '/');
00147     #endif
00148     norm_path = normpath(norm_path);
00149     #if UNIX
00150         norm_path = replace(norm_path, '/', '\\');
00151     #endif
00152     return norm_path;
00153 }
00154 
00155 // ===========================================================================
00156 // components
00157 // ===========================================================================
00158 
00159 // ---------------------------------------------------------------------------
00160 void split(const string& path, string& head, string& tail)
00161 {
00162     size_t last = path.find_last_of(cSeparators);
00163     if (last == string::npos) {
00164         head = "";
00165         tail = path;
00166     } else {
00167         size_t pos = last;
00168         if (last > 0) pos  = path.find_last_not_of(cSeparators, last - 1);
00169         if (pos == string::npos) head = path.substr(0, last + 1);
00170         else                     head = path.substr(0, pos  + 1);
00171         tail = path.substr(last + 1);
00172     }
00173 }
00174 
00175 // ---------------------------------------------------------------------------
00176 vector<string> split(const string& path)
00177 {
00178     vector<string> parts(2, "");
00179     split(path, parts[0], parts[1]);
00180     return parts;
00181 }
00182 
00183 // ---------------------------------------------------------------------------
00184 void splitdrive(const string& path, string& drive, string& tail)
00185 {
00186 #if WINDOWS
00187     if (path.size() > 1 && path[1] == ':') {
00188         tail  = path.substr(2);
00189         drive = path[0]; drive += ':';
00190     }
00191     else
00192 #endif
00193     {
00194         tail  = path;
00195         drive = "";
00196     }
00197 }
00198 
00199 // ---------------------------------------------------------------------------
00200 vector<string> splitdrive(const string& path)
00201 {
00202     vector<string> parts(2, "");
00203     splitdrive(path, parts[0], parts[1]);
00204     return parts;
00205 }
00206 
00207 // ---------------------------------------------------------------------------
00208 void splitext(const string& path, string& head, string& ext, const set<string>* exts, bool icase)
00209 {
00210     size_t pos = string::npos;
00211     // test user supplied extensions only
00212     if (exts) {
00213         for (set<string>::const_iterator i = exts->begin(); i != exts->end(); ++i) {
00214             if (path.size() < i->size()) continue;
00215             size_t start = path.size() - i->size();
00216             if (start < pos) { // longest match
00217                 if (icase) {
00218                     string str = path.substr(start);
00219                     string ext = *i;
00220                     std::transform(str.begin(), str.end(), str.begin(), ::toupper);
00221                     std::transform(ext.begin(), ext.end(), ext.begin(), ::toupper);
00222                     if (str == ext) pos = start;
00223                 } else if (path.compare(start, i->size(), *i) == 0) {
00224                     pos = start;
00225                 }
00226             }
00227         }
00228     // otherwise, get position of last dot
00229     } else {
00230         pos = path.find_last_of('.');
00231         // leading dot of file name in Posix indicates hidden file,
00232         // not start of file extension
00233         #if UNIX
00234             if (pos != string::npos && (pos == 0 || issep(path[pos - 1]))) {
00235                 pos = string::npos;
00236             }
00237         #endif
00238     }
00239     // split extension
00240     if (pos == string::npos) {
00241         head = path;
00242         ext  = "";
00243     } else {
00244         // tmp variable used for the case that head references the same input
00245         // string as path
00246         string tmp = path.substr(0, pos);
00247         ext        = path.substr(pos);
00248         head       = tmp;
00249     }
00250 }
00251 
00252 // ---------------------------------------------------------------------------
00253 vector<string> splitext(const string& path, const set<string>* exts)
00254 {
00255     vector<string> parts(2, "");
00256     splitext(path, parts[0], parts[1], exts);
00257     return parts;
00258 }
00259 
00260 // ---------------------------------------------------------------------------
00261 string dirname(const string& path)
00262 {
00263     vector<string> parts(2, "");
00264     split(path, parts[0], parts[1]);
00265     return parts[0];
00266 }
00267 
00268 // ---------------------------------------------------------------------------
00269 string basename(const string& path)
00270 {
00271     vector<string> parts(2, "");
00272     split(path, parts[0], parts[1]);
00273     return parts[1];
00274 }
00275 
00276 // ---------------------------------------------------------------------------
00277 bool hasext(const string& path, const set<string>* exts)
00278 {
00279     string ext = splitext(path, exts)[1];
00280     return exts ? exts->find(ext) != exts->end() : !ext.empty();
00281 }
00282 
00283 // ===========================================================================
00284 // conversion
00285 // ===========================================================================
00286 
00287 // ---------------------------------------------------------------------------
00288 bool isabs(const string& path)
00289 {
00290     size_t i = 0;
00291     #if WINDOWS
00292         if (path.size() > 1 && path[1] == ':') i = 2;
00293     #endif
00294     return i < path.size() && issep(path[i]);
00295 }
00296 
00297 // ---------------------------------------------------------------------------
00298 string abspath(const string& path)
00299 {
00300     return normpath(join(getcwd(), path));
00301 }
00302 
00303 // ---------------------------------------------------------------------------
00304 string relpath(const string& path, const string& base)
00305 {
00306     // if relative path is given just return it
00307     if (!isabs(path)) return path;
00308     // normalize paths
00309     string norm_path = normpath(path);
00310     string norm_base = normpath(join(getcwd(), base));
00311     // check if paths are on same drive
00312     #if WINDOWS
00313         string drive      = splitdrive(norm_path)[0];
00314         string base_drive = splitdrive(norm_base)[0];
00315         if (drive != base_drive) {
00316             BASIS_THROW(invalid_argument,
00317                         "Path is on drive " << drive << ", start is on drive " << base_drive);
00318         }
00319     #endif
00320     // find start of first path component in which paths differ
00321     string::const_iterator b = norm_base.begin();
00322     string::const_iterator p = norm_path.begin();
00323     size_t pos = 0;
00324     size_t i   = 0;
00325     while (b != norm_base.end() && p != norm_path.end()) {
00326         if (issep(*p)) {
00327             if (!issep(*b)) break;
00328             pos = i;
00329         } else if (*b != *p) {
00330             break;
00331         }
00332         b++; p++; i++;
00333     }
00334     // set pos to i (in this case, the size of one of the paths) if the end
00335     // of one path was reached, but the other path has a path separator
00336     // at this position, this is required below
00337     if ((b != norm_base.end() && issep(*b)) ||
00338         (p != norm_path.end() && issep(*p))) pos = i;
00339     // skip trailing separator of other path if end of one path reached
00340     if (b == norm_base.end() && p != norm_path.end() && issep(*p)) p++;
00341     if (p == norm_path.end() && b != norm_base.end() && issep(*b)) b++;
00342     // if paths are the same, just return a period (.)
00343     //
00344     // Thanks to the previous skipping of trailing separators, this condition
00345     // handles all of the following cases:
00346     //
00347     //    base := "/usr/bin"  path := "/usr/bin"
00348     //    base := "/usr/bin/" path := "/usr/bin/"
00349     //    base := "/usr/bin"  path := "/usr/bin/"
00350     //    base := "/usr/bin/" path := "/usr/bin"
00351     if (b == norm_base.end() && p == norm_path.end()) return ".";
00352     // otherwise, pos is the index of the last slash for which both paths
00353     // were identical; hence, everything that comes after in the original
00354     // path is preserved and for each following component in the base path
00355     // a "../" is prepended to the relative path
00356     string rel_path;
00357     // truncate base path with a separator as for each "*/" path component,
00358     // a "../" will be prepended to the relative path
00359     if (b != norm_base.end() && !issep(norm_base[norm_base.size() - 1])) {
00360         // attention: This operation may invalidate the iterator b!
00361         //            Therefore, remember position of iterator and get a new one.
00362         size_t pos = b - norm_base.begin();
00363         norm_base += cSeparator;
00364         b = norm_base.begin() + pos;
00365     }
00366     while (b != norm_base.end()) {
00367         if (issep(*b)) {
00368             rel_path += "..";
00369             rel_path += cSeparator;
00370         }
00371         b++;
00372     }
00373     if (pos + 1 < norm_path.size()) rel_path += norm_path.substr(pos + 1);
00374     // remove trailing path separator
00375     if (issep(rel_path[rel_path.size() - 1])) {
00376         rel_path.erase(rel_path.size() - 1);
00377     }
00378     return rel_path;
00379 }
00380 
00381 // ---------------------------------------------------------------------------
00382 string realpath(const string& path)
00383 {
00384     string curr_path = join(getcwd(), path);
00385     #if UNIX
00386         // use stringstream and std::getline() to split absolute path at slashes (/)
00387         stringstream ss(curr_path);
00388         curr_path.clear();
00389         string fname;
00390         string prev_path;
00391         string next_path;
00392         char slash;
00393         ss >> slash; // root slash
00394         while (getline(ss, fname, '/')) {
00395             // current absolute path
00396             curr_path += '/';
00397             curr_path += fname;
00398             // if current path is a symbolic link, follow it
00399             if (islink(curr_path)) {
00400                 // for safety reasons, restrict the depth of symbolic links followed
00401                 for (unsigned int i = 0; i < 100; i++) {
00402                     next_path = os::readlink(curr_path);
00403                     if (next_path.empty()) {
00404                         // if real path could not be determined because of permissions
00405                         // or invalid path, return the original path
00406                         break;
00407                     } else {
00408                         curr_path = join(prev_path, next_path);
00409                         if (!islink(next_path)) break;
00410                     }
00411                 }
00412                 // if real path could not be determined with the given maximum number
00413                 // of loop iterations (endless cycle?) or one of the symbolic links
00414                 // could not be read, just return original path as absolute path
00415                 if (islink(next_path)) {
00416                     return abspath(path);
00417                 }
00418             }
00419             // memorize previous path used as base for abspath()
00420             prev_path = curr_path;
00421         }
00422     #endif
00423     // normalize path after all symbolic links were resolved
00424     return normpath(curr_path);
00425 }
00426 
00427 // ---------------------------------------------------------------------------
00428 string join(const string& base, const string& path)
00429 {
00430     if (base.empty() || isabs(path))  return path;
00431     if (issep(base[base.size() - 1])) return base + path;
00432     #if WINDOWS
00433         return base + '\\' + path;
00434     #else
00435         return base + '/' + path;
00436     #endif
00437 }
00438 
00439 // ===========================================================================
00440 // file status
00441 // ===========================================================================
00442 
00443 // ---------------------------------------------------------------------------
00444 bool isfile(const std::string path)
00445 {
00446     #if WINDOWS 
00447         const DWORD info = ::GetFileAttributes(path.c_str());
00448         return (FILE_ATTRIBUTE_DIRECTORY & info) == 0;
00449     #else
00450         struct stat info;
00451         if (stat(path.c_str(), &info) != 0) return false;
00452         return S_ISREG(info.st_mode);
00453     #endif
00454     return false;
00455 }
00456 
00457 // ---------------------------------------------------------------------------
00458 bool isdir(const std::string path)
00459 {
00460     #if WINDOWS 
00461         const DWORD info = ::GetFileAttributes(path.c_str());
00462         return (FILE_ATTRIBUTE_DIRECTORY & info) != 0;
00463     #else
00464         struct stat info;
00465         if (stat(path.c_str(), &info) != 0) return false;
00466         return S_ISDIR(info.st_mode);
00467     #endif
00468     return false;
00469 }
00470 
00471 // ---------------------------------------------------------------------------
00472 bool exists(const std::string path)
00473 {
00474     #if WINDOWS 
00475         const DWORD info = ::GetFileAttributes(path.c_str());
00476         return info != INVALID_FILE_ATTRIBUTES;
00477     #else
00478         struct stat info;
00479         if (stat(path.c_str(), &info) == 0) return true;
00480     #endif
00481     return false;
00482 }
00483 
00484 // ---------------------------------------------------------------------------
00485 bool islink(const string& path)
00486 {
00487     #if WINDOWS
00488         return false;
00489     #else
00490         struct stat info;
00491         if (lstat(path.c_str(), &info) != 0) return false;
00492         return S_ISLNK(info.st_mode);
00493     #endif
00494 }
00495 
00496 
00497 } // namespace path
00498 
00499 } // namespace os
00500 
00501 } // namespace basis