path.cxx
Go to the documentation of this file.
00001 /** 00002 * @file path.cxx 00003 * @brief File/directory path related functions. 00004 * 00005 * Copyright (c) 2011, 2012 University of Pennsylvania. All rights reserved.<br /> 00006 * See https://www.cbica.upenn.edu/sbia/software/license.html or COPYING file. 00007 * 00008 * Contact: SBIA Group <sbia-software at uphs.upenn.edu> 00009 */ 00010 00011 00012 #include <vector> 00013 00014 #include <basis/config.h> // platform macros - must be first 00015 00016 #include <stdlib.h> // malloc(), free(), _splitpath_s() (WINDOWS) 00017 #include <string.h> // strncmp() 00018 #include <cctype> // toupper() 00019 #include <algorithm> // transform() 00020 00021 #if WINDOWS 00022 # include <windows.h> // GetFileAttributes() 00023 #else 00024 # include <sys/stat.h> // stat(), lstat() 00025 #endif 00026 00027 #include <basis/except.h> // to throw exceptions 00028 00029 #include <basis/os.h> 00030 #include <basis/os/path.h> 00031 00032 00033 // acceptable in .cxx file 00034 using namespace std; 00035 00036 00037 namespace basis { namespace os { namespace path { 00038 00039 00040 // =========================================================================== 00041 // representation 00042 // =========================================================================== 00043 00044 #if WINDOWS 00045 static const char cSeparator = '\\'; 00046 static const char* cSeparators = "\\/"; 00047 #else 00048 static const char cSeparator = '/'; 00049 static const char* cSeparators = "/"; 00050 #endif 00051 00052 // --------------------------------------------------------------------------- 00053 inline bool issep(char c) 00054 { 00055 #if WINDOWS 00056 return c == '/' || c == '\\'; 00057 #else 00058 return c == '/'; 00059 #endif 00060 } 00061 00062 // --------------------------------------------------------------------------- 00063 static inline string replace(string str, char from, char to) 00064 { 00065 string res(str.size(), '\0'); 00066 string::const_iterator in = str.begin(); 00067 string::iterator out = res.begin(); 00068 while (in != str.end()) { 00069 if (*in == from) *out = to; 00070 else *out = *in; 00071 in++; out++; 00072 } 00073 return res; 00074 } 00075 00076 // --------------------------------------------------------------------------- 00077 string normpath(const string& path) 00078 { 00079 if (path.empty()) return ""; 00080 char drive[3] = {'\0', ':', '\0'}; 00081 size_t i = 0; 00082 #if WINDOWS 00083 if (path.size() > 1 && path[1] == ':') { 00084 drive[0] = path[0]; 00085 i = 2; 00086 } 00087 #endif 00088 string norm_path = drive; 00089 bool abs = issep(path[i]); 00090 if (abs) { 00091 #if WINDOWS 00092 while (i <= path.size() && issep(path[i])) { 00093 norm_path += cSeparator; 00094 i++; 00095 } 00096 #else 00097 norm_path += cSeparator; 00098 #endif 00099 } 00100 string current; 00101 vector<string> parts; 00102 while (i <= path.size()) { 00103 if (issep(path[i]) || path[i] == '\0') { 00104 if (current == "..") { 00105 if (!abs && (parts.empty() || parts.back() == "..")) { 00106 parts.push_back(current); 00107 } else if (!parts.empty()) { 00108 parts.pop_back(); 00109 } 00110 } else if (current != "" && current != ".") { 00111 parts.push_back(current); 00112 } 00113 current.clear(); 00114 } else { 00115 current += path[i]; 00116 } 00117 i++; 00118 } 00119 for (i = 0; i < parts.size(); i++) { 00120 norm_path = join(norm_path, parts[i]); 00121 } 00122 return norm_path.empty() ? "." : norm_path; 00123 } 00124 00125 // --------------------------------------------------------------------------- 00126 string posixpath(const string& path) 00127 { 00128 #if WINDOWS 00129 string norm_path = path; 00130 #else 00131 string norm_path = replace(path, '\\', '/'); 00132 #endif 00133 norm_path = normpath(norm_path); 00134 #if WINDOWS 00135 norm_path = replace(norm_path, '\\', '/'); 00136 #endif 00137 return norm_path; 00138 } 00139 00140 // --------------------------------------------------------------------------- 00141 string ntpath(const string& path) 00142 { 00143 #if WINDOWS 00144 string norm_path = path; 00145 #else 00146 string norm_path = replace(path, '\\', '/'); 00147 #endif 00148 norm_path = normpath(norm_path); 00149 #if UNIX 00150 norm_path = replace(norm_path, '/', '\\'); 00151 #endif 00152 return norm_path; 00153 } 00154 00155 // =========================================================================== 00156 // components 00157 // =========================================================================== 00158 00159 // --------------------------------------------------------------------------- 00160 void split(const string& path, string& head, string& tail) 00161 { 00162 size_t last = path.find_last_of(cSeparators); 00163 if (last == string::npos) { 00164 head = ""; 00165 tail = path; 00166 } else { 00167 size_t pos = last; 00168 if (last > 0) pos = path.find_last_not_of(cSeparators, last - 1); 00169 if (pos == string::npos) head = path.substr(0, last + 1); 00170 else head = path.substr(0, pos + 1); 00171 tail = path.substr(last + 1); 00172 } 00173 } 00174 00175 // --------------------------------------------------------------------------- 00176 vector<string> split(const string& path) 00177 { 00178 vector<string> parts(2, ""); 00179 split(path, parts[0], parts[1]); 00180 return parts; 00181 } 00182 00183 // --------------------------------------------------------------------------- 00184 void splitdrive(const string& path, string& drive, string& tail) 00185 { 00186 #if WINDOWS 00187 if (path.size() > 1 && path[1] == ':') { 00188 tail = path.substr(2); 00189 drive = path[0]; drive += ':'; 00190 } 00191 else 00192 #endif 00193 { 00194 tail = path; 00195 drive = ""; 00196 } 00197 } 00198 00199 // --------------------------------------------------------------------------- 00200 vector<string> splitdrive(const string& path) 00201 { 00202 vector<string> parts(2, ""); 00203 splitdrive(path, parts[0], parts[1]); 00204 return parts; 00205 } 00206 00207 // --------------------------------------------------------------------------- 00208 void splitext(const string& path, string& head, string& ext, const set<string>* exts, bool icase) 00209 { 00210 size_t pos = string::npos; 00211 // test user supplied extensions only 00212 if (exts) { 00213 for (set<string>::const_iterator i = exts->begin(); i != exts->end(); ++i) { 00214 if (path.size() < i->size()) continue; 00215 size_t start = path.size() - i->size(); 00216 if (start < pos) { // longest match 00217 if (icase) { 00218 string str = path.substr(start); 00219 string ext = *i; 00220 std::transform(str.begin(), str.end(), str.begin(), ::toupper); 00221 std::transform(ext.begin(), ext.end(), ext.begin(), ::toupper); 00222 if (str == ext) pos = start; 00223 } else if (path.compare(start, i->size(), *i) == 0) { 00224 pos = start; 00225 } 00226 } 00227 } 00228 // otherwise, get position of last dot 00229 } else { 00230 pos = path.find_last_of('.'); 00231 // leading dot of file name in Posix indicates hidden file, 00232 // not start of file extension 00233 #if UNIX 00234 if (pos != string::npos && (pos == 0 || issep(path[pos - 1]))) { 00235 pos = string::npos; 00236 } 00237 #endif 00238 } 00239 // split extension 00240 if (pos == string::npos) { 00241 head = path; 00242 ext = ""; 00243 } else { 00244 // tmp variable used for the case that head references the same input 00245 // string as path 00246 string tmp = path.substr(0, pos); 00247 ext = path.substr(pos); 00248 head = tmp; 00249 } 00250 } 00251 00252 // --------------------------------------------------------------------------- 00253 vector<string> splitext(const string& path, const set<string>* exts) 00254 { 00255 vector<string> parts(2, ""); 00256 splitext(path, parts[0], parts[1], exts); 00257 return parts; 00258 } 00259 00260 // --------------------------------------------------------------------------- 00261 string dirname(const string& path) 00262 { 00263 vector<string> parts(2, ""); 00264 split(path, parts[0], parts[1]); 00265 return parts[0]; 00266 } 00267 00268 // --------------------------------------------------------------------------- 00269 string basename(const string& path) 00270 { 00271 vector<string> parts(2, ""); 00272 split(path, parts[0], parts[1]); 00273 return parts[1]; 00274 } 00275 00276 // --------------------------------------------------------------------------- 00277 bool hasext(const string& path, const set<string>* exts) 00278 { 00279 string ext = splitext(path, exts)[1]; 00280 return exts ? exts->find(ext) != exts->end() : !ext.empty(); 00281 } 00282 00283 // =========================================================================== 00284 // conversion 00285 // =========================================================================== 00286 00287 // --------------------------------------------------------------------------- 00288 bool isabs(const string& path) 00289 { 00290 size_t i = 0; 00291 #if WINDOWS 00292 if (path.size() > 1 && path[1] == ':') i = 2; 00293 #endif 00294 return i < path.size() && issep(path[i]); 00295 } 00296 00297 // --------------------------------------------------------------------------- 00298 string abspath(const string& path) 00299 { 00300 return normpath(join(getcwd(), path)); 00301 } 00302 00303 // --------------------------------------------------------------------------- 00304 string relpath(const string& path, const string& base) 00305 { 00306 // if relative path is given just return it 00307 if (!isabs(path)) return path; 00308 // normalize paths 00309 string norm_path = normpath(path); 00310 string norm_base = normpath(join(getcwd(), base)); 00311 // check if paths are on same drive 00312 #if WINDOWS 00313 string drive = splitdrive(norm_path)[0]; 00314 string base_drive = splitdrive(norm_base)[0]; 00315 if (drive != base_drive) { 00316 BASIS_THROW(invalid_argument, 00317 "Path is on drive " << drive << ", start is on drive " << base_drive); 00318 } 00319 #endif 00320 // find start of first path component in which paths differ 00321 string::const_iterator b = norm_base.begin(); 00322 string::const_iterator p = norm_path.begin(); 00323 size_t pos = 0; 00324 size_t i = 0; 00325 while (b != norm_base.end() && p != norm_path.end()) { 00326 if (issep(*p)) { 00327 if (!issep(*b)) break; 00328 pos = i; 00329 } else if (*b != *p) { 00330 break; 00331 } 00332 b++; p++; i++; 00333 } 00334 // set pos to i (in this case, the size of one of the paths) if the end 00335 // of one path was reached, but the other path has a path separator 00336 // at this position, this is required below 00337 if ((b != norm_base.end() && issep(*b)) || 00338 (p != norm_path.end() && issep(*p))) pos = i; 00339 // skip trailing separator of other path if end of one path reached 00340 if (b == norm_base.end() && p != norm_path.end() && issep(*p)) p++; 00341 if (p == norm_path.end() && b != norm_base.end() && issep(*b)) b++; 00342 // if paths are the same, just return a period (.) 00343 // 00344 // Thanks to the previous skipping of trailing separators, this condition 00345 // handles all of the following cases: 00346 // 00347 // base := "/usr/bin" path := "/usr/bin" 00348 // base := "/usr/bin/" path := "/usr/bin/" 00349 // base := "/usr/bin" path := "/usr/bin/" 00350 // base := "/usr/bin/" path := "/usr/bin" 00351 if (b == norm_base.end() && p == norm_path.end()) return "."; 00352 // otherwise, pos is the index of the last slash for which both paths 00353 // were identical; hence, everything that comes after in the original 00354 // path is preserved and for each following component in the base path 00355 // a "../" is prepended to the relative path 00356 string rel_path; 00357 // truncate base path with a separator as for each "*/" path component, 00358 // a "../" will be prepended to the relative path 00359 if (b != norm_base.end() && !issep(norm_base[norm_base.size() - 1])) { 00360 // attention: This operation may invalidate the iterator b! 00361 // Therefore, remember position of iterator and get a new one. 00362 size_t pos = b - norm_base.begin(); 00363 norm_base += cSeparator; 00364 b = norm_base.begin() + pos; 00365 } 00366 while (b != norm_base.end()) { 00367 if (issep(*b)) { 00368 rel_path += ".."; 00369 rel_path += cSeparator; 00370 } 00371 b++; 00372 } 00373 if (pos + 1 < norm_path.size()) rel_path += norm_path.substr(pos + 1); 00374 // remove trailing path separator 00375 if (issep(rel_path[rel_path.size() - 1])) { 00376 rel_path.erase(rel_path.size() - 1); 00377 } 00378 return rel_path; 00379 } 00380 00381 // --------------------------------------------------------------------------- 00382 string realpath(const string& path) 00383 { 00384 string curr_path = join(getcwd(), path); 00385 #if UNIX 00386 // use stringstream and std::getline() to split absolute path at slashes (/) 00387 stringstream ss(curr_path); 00388 curr_path.clear(); 00389 string fname; 00390 string prev_path; 00391 string next_path; 00392 char slash; 00393 ss >> slash; // root slash 00394 while (getline(ss, fname, '/')) { 00395 // current absolute path 00396 curr_path += '/'; 00397 curr_path += fname; 00398 // if current path is a symbolic link, follow it 00399 if (islink(curr_path)) { 00400 // for safety reasons, restrict the depth of symbolic links followed 00401 for (unsigned int i = 0; i < 100; i++) { 00402 next_path = os::readlink(curr_path); 00403 if (next_path.empty()) { 00404 // if real path could not be determined because of permissions 00405 // or invalid path, return the original path 00406 break; 00407 } else { 00408 curr_path = join(prev_path, next_path); 00409 if (!islink(next_path)) break; 00410 } 00411 } 00412 // if real path could not be determined with the given maximum number 00413 // of loop iterations (endless cycle?) or one of the symbolic links 00414 // could not be read, just return original path as absolute path 00415 if (islink(next_path)) { 00416 return abspath(path); 00417 } 00418 } 00419 // memorize previous path used as base for abspath() 00420 prev_path = curr_path; 00421 } 00422 #endif 00423 // normalize path after all symbolic links were resolved 00424 return normpath(curr_path); 00425 } 00426 00427 // --------------------------------------------------------------------------- 00428 string join(const string& base, const string& path) 00429 { 00430 if (base.empty() || isabs(path)) return path; 00431 if (issep(base[base.size() - 1])) return base + path; 00432 #if WINDOWS 00433 return base + '\\' + path; 00434 #else 00435 return base + '/' + path; 00436 #endif 00437 } 00438 00439 // =========================================================================== 00440 // file status 00441 // =========================================================================== 00442 00443 // --------------------------------------------------------------------------- 00444 bool isfile(const std::string path) 00445 { 00446 #if WINDOWS 00447 const DWORD info = ::GetFileAttributes(path.c_str()); 00448 return (FILE_ATTRIBUTE_DIRECTORY & info) == 0; 00449 #else 00450 struct stat info; 00451 if (stat(path.c_str(), &info) != 0) return false; 00452 return S_ISREG(info.st_mode); 00453 #endif 00454 return false; 00455 } 00456 00457 // --------------------------------------------------------------------------- 00458 bool isdir(const std::string path) 00459 { 00460 #if WINDOWS 00461 const DWORD info = ::GetFileAttributes(path.c_str()); 00462 return (FILE_ATTRIBUTE_DIRECTORY & info) != 0; 00463 #else 00464 struct stat info; 00465 if (stat(path.c_str(), &info) != 0) return false; 00466 return S_ISDIR(info.st_mode); 00467 #endif 00468 return false; 00469 } 00470 00471 // --------------------------------------------------------------------------- 00472 bool exists(const std::string path) 00473 { 00474 #if WINDOWS 00475 const DWORD info = ::GetFileAttributes(path.c_str()); 00476 return info != INVALID_FILE_ATTRIBUTES; 00477 #else 00478 struct stat info; 00479 if (stat(path.c_str(), &info) == 0) return true; 00480 #endif 00481 return false; 00482 } 00483 00484 // --------------------------------------------------------------------------- 00485 bool islink(const string& path) 00486 { 00487 #if WINDOWS 00488 return false; 00489 #else 00490 struct stat info; 00491 if (lstat(path.c_str(), &info) != 0) return false; 00492 return S_ISLNK(info.st_mode); 00493 #endif 00494 } 00495 00496 00497 } // namespace path 00498 00499 } // namespace os 00500 00501 } // namespace basis