BESCache.cc

Go to the documentation of this file.
00001 // BESCache.cc
00002 
00003 // This file is part of bes, A C++ back-end server implementation framework
00004 // for the OPeNDAP Data Access Protocol.
00005 
00006 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research
00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu>
00008 //
00009 // This library is free software; you can redistribute it and/or
00010 // modify it under the terms of the GNU Lesser General Public
00011 // License as published by the Free Software Foundation; either
00012 // version 2.1 of the License, or (at your option) any later version.
00013 // 
00014 // This library is distributed in the hope that it will be useful,
00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017 // Lesser General Public License for more details.
00018 // 
00019 // You should have received a copy of the GNU Lesser General Public
00020 // License along with this library; if not, write to the Free Software
00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022 //
00023 // You can contact University Corporation for Atmospheric Research at
00024 // 3080 Center Green Drive, Boulder, CO 80301
00025  
00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005
00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR.
00028 //
00029 // Authors:
00030 //      pwest       Patrick West <pwest@ucar.edu>
00031 //      jgarcia     Jose Garcia <jgarcia@ucar.edu>
00032 
00033 #include "config.h"
00034 
00035 #include <unistd.h>  // for unlink
00036 #include <sys/types.h>
00037 #include <sys/stat.h>
00038 #include <dirent.h>
00039 #include <fcntl.h>
00040 
00041 #include <cstring>
00042 #include <cerrno>
00043 #include <map>
00044 #include <iostream>
00045 #include <sstream>
00046 
00047 using std::multimap ;
00048 using std::pair ;
00049 using std::greater ;
00050 using std::endl ;
00051 
00052 #include "BESCache.h"
00053 #include "TheBESKeys.h"
00054 #include "BESInternalError.h"
00055 #include "BESDebug.h"
00056 
00057 #define BES_CACHE_CHAR '#'
00058 
00059 typedef struct _cache_entry
00060 {
00061     string name ;
00062     int size ;
00063 } cache_entry ;
00064 
00065 void 
00066 BESCache::check_ctor_params()
00067 {
00068     if( _cache_dir.empty() )
00069     {
00070         string err = "The cache dir was not specified, must be non-empty" ;
00071         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00072     }
00073 
00074     struct stat buf;
00075     int statret = stat( _cache_dir.c_str(), &buf ) ;
00076     if( statret != 0 || ! S_ISDIR(buf.st_mode) )
00077     {
00078         string err = "The cache dir " + _cache_dir + " does not exist" ;
00079         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00080     }
00081 
00082     if( _prefix.empty() )
00083     {
00084         string err = "The prefix was not specified, must be non-empty" ;
00085         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00086     }
00087 
00088     if( _cache_size == 0 )
00089     {
00090         string err = "The cache size was not specified, must be non-zero" ;
00091         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00092     }
00093     // the cache size is specified in megabytes. When calculating
00094     // the size of the cache we convert to bytes, which is 1048576
00095     // bytes per meg. The max unsigned int allows for only 4095
00096     // megabytes.
00097     if( _cache_size > 4095 ) _cache_size = 4095 ;
00098 
00099     BESDEBUG( "bes", "BES Cache: directory " << _cache_dir
00100                      << ", prefix " << _prefix
00101                      << ", max size " << _cache_size << endl )
00102 }
00103 
00113 BESCache::BESCache( const string &cache_dir,
00114                     const string &prefix,
00115                     unsigned int size )
00116     : _cache_dir( cache_dir ),
00117       _prefix( prefix ),
00118       _cache_size( size ),
00119       _lock_fd( -1 )
00120 {
00121     check_ctor_params(); // Throws BESInternalError on error.
00122 }
00123 
00138 BESCache::BESCache( BESKeys &keys,
00139                     const string &cache_dir_key,
00140                     const string &prefix_key,
00141                     const string &size_key )
00142     : _cache_size( 0 ),
00143       _lock_fd( -1 )
00144 {
00145     bool found = false ;
00146     _cache_dir = keys.get_key( cache_dir_key, found ) ;
00147     if( !found )
00148     {
00149         string err = "The cache dir key " + cache_dir_key
00150                      + " was not found" ;
00151         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00152     }
00153 
00154     found = false ;
00155     _prefix = keys.get_key( prefix_key, found ) ;
00156     if( !found )
00157     {
00158         string err = "The prefix key " + prefix_key
00159                      + " was not found" ;
00160         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00161     }
00162 
00163     found = false ;
00164     string _cache_size_str = keys.get_key( size_key, found ) ;
00165     if( !found )
00166     {
00167         string err = "The size key " + size_key
00168                      + " was not found" ;
00169         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00170     }
00171 
00172 
00173     std::istringstream is( _cache_size_str ) ;
00174     is >> _cache_size ;
00175 
00176     check_ctor_params(); // Throws BESInternalError on error.
00177 }
00178 
00185 bool
00186 BESCache::lock( unsigned int retry, unsigned int num_tries )
00187 {
00188     // make sure we aren't retrying too many times
00189     if( num_tries > MAX_LOCK_TRIES )
00190         num_tries = MAX_LOCK_TRIES ;
00191     if( retry > MAX_LOCK_RETRY_MS )
00192         retry = MAX_LOCK_RETRY_MS ;
00193 
00194     bool got_lock = true ;
00195     if( _lock_fd == -1 )
00196     {
00197         string lock_file = _cache_dir + "/lock" ;
00198         unsigned int tries = 0 ;
00199         _lock_fd = open( lock_file.c_str(),
00200                          O_CREAT | O_EXCL,
00201                          S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00202         while( _lock_fd < 0 && got_lock )
00203         {
00204             tries ++ ;
00205             if( tries > num_tries )
00206             {
00207                 _lock_fd = -1 ;
00208                 got_lock = false ;
00209             }
00210             else
00211             {
00212                 usleep( retry ) ;
00213                 _lock_fd = open( lock_file.c_str(),
00214                                  O_CREAT | O_EXCL,
00215                                  S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ;
00216             }
00217         }
00218     }
00219     else
00220     {
00221         // This would be a programming error, or we've gotten into a
00222         // situation where the lock is lost. Lock has been called on the
00223         // same cache object twice in a row without an unlock being called.
00224         string err = "The cache dir " + _cache_dir + " is already locked" ;
00225         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00226     }
00227 
00228     return got_lock ;
00229 }
00230 
00237 bool
00238 BESCache::unlock()
00239 {
00240     // if we call unlock twice in a row, does it matter? I say no, just say
00241     // that it is unlocked.
00242     bool unlocked = true ;
00243     if( _lock_fd != -1 )
00244     {
00245         string lock_file = _cache_dir + "/lock" ;
00246         close( _lock_fd ) ;
00247         (void)unlink( lock_file.c_str() ) ;
00248     }
00249 
00250     _lock_fd = -1 ;
00251 
00252     return unlocked ;
00253 }
00254 
00268 bool
00269 BESCache::is_cached( const string &src, string &target )
00270 {
00271     bool is_it = true ;
00272     string tmp_target = src ;
00273 
00274     // Create the file that would be created in the cache directory
00275     //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g'
00276     if( tmp_target.at(0) == '/' )
00277     {
00278         tmp_target = src.substr( 1, tmp_target.length() - 1 ) ;
00279     }
00280     string::size_type slash = 0 ;
00281     while( ( slash = tmp_target.find( '/' ) ) != string::npos )
00282     {
00283         tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ;
00284     }
00285     string::size_type last_dot = tmp_target.rfind( '.' ) ;
00286     if( last_dot != string::npos )
00287     {
00288         tmp_target = tmp_target.substr( 0, last_dot ) ;
00289     }
00290 
00291     target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ;
00292 
00293     // Determine if the target file is already in the cache or not
00294     struct stat buf;
00295     int statret = stat( target.c_str(), &buf ) ;
00296     if( statret != 0 )
00297     {
00298         is_it = false ;
00299     }
00300 
00301     return is_it ;
00302 }
00303 
00312 void
00313 BESCache::purge( )
00314 {
00315     unsigned int max_size = _cache_size * 1048576 ; // Bytes/Meg
00316     struct stat buf;
00317     unsigned int size = 0 ; // total size of all cached files
00318     time_t curr_time = time( NULL ) ; // grab the current time so we can
00319                                       // determine the oldest file
00320     // map of time,entry values
00321     multimap<double,cache_entry,greater<double> > contents ;
00322 
00323     // the prefix is actually the specified prefix plus the cache char '#'
00324     string match_prefix = _prefix + BES_CACHE_CHAR ;
00325 
00326     // go through the cache directory and collect all of the files that
00327     // start with the matching prefix
00328     DIR *dip = opendir( _cache_dir.c_str() ) ;
00329     if( dip != NULL )
00330     {
00331         struct dirent *dit;
00332         while( ( dit = readdir( dip ) ) != NULL )
00333         {
00334             string dirEntry = dit->d_name ;
00335             if( dirEntry.compare( 0, match_prefix.length(), match_prefix ) == 0)
00336             {
00337                 // Now that we have found a match we want to get the size of
00338                 // the file and the last access time from the file.
00339                 string fullPath = _cache_dir + "/" + dirEntry ;
00340                 int statret = stat( fullPath.c_str(), &buf ) ;
00341                 if( statret == 0 )
00342                 {
00343                     size += buf.st_size ;
00344 
00345                     // Find out how old the file is
00346                     time_t file_time = buf.st_atime ;
00347                     // I think we can use the access time without the diff,
00348                     // since it's the relative ages that determine when to
00349                     // delete a file. Good idea to use the access time so
00350                     // recently used (read) files will linger. jhrg 5/9/07
00351                     double time_diff = difftime( curr_time, file_time ) ;
00352                     cache_entry entry ;
00353                     entry.name = fullPath ;
00354                     entry.size = buf.st_size ;
00355                     contents.insert( pair<double,cache_entry>( time_diff, entry ) );
00356                 }
00357             }
00358         }
00359 
00360         // We're done looking in the directory, close it
00361         closedir( dip ) ;
00362 
00363         if( BESISDEBUG( "bes" ) )
00364         {
00365             BESDEBUG( "bes", endl << "BEFORE" << endl )
00366             multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
00367             multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
00368             for( ; ti != te; ti++ )
00369             {
00370                 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl )
00371             }
00372             BESDEBUG( "bes", endl )
00373         }
00374 
00375         // if the size of files is greater than max allowed then we need to
00376         // purge the cache directory. Keep going until the size is less than
00377         // the max.
00378         multimap<double,cache_entry,greater<double> >::iterator i ;
00379         if( size > max_size )
00380         {
00381             // Maybe change this to size + (fraction of max_size) > max_size?
00382             // jhrg 5/9/07
00383             while( size > max_size )
00384             {
00385                 i = contents.begin() ;
00386                 if( i == contents.end() )
00387                 {
00388                     size = 0 ;
00389                 }
00390                 else
00391                 {
00392                     BESDEBUG( "bes", "BESCache::purge - removing "
00393                                      << (*i).second.name << endl )
00394                     if( remove( (*i).second.name.c_str() ) != 0 )
00395                     {
00396                         char *s_err = strerror( errno ) ;
00397                         string err = "Unable to remove the file "
00398                                      + (*i).second.name
00399                                      + " from the cache: " ;
00400                         if( s_err )
00401                         {
00402                             err.append( s_err ) ;
00403                         }
00404                         else
00405                         {
00406                             err.append( "Unknown error" ) ;
00407                         }
00408                         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00409                     }
00410                     size -= (*i).second.size ;
00411                     contents.erase( i ) ;
00412                 }
00413             }
00414         }
00415 
00416         if( BESISDEBUG( "bes" ) )
00417         {
00418             BESDEBUG( "bes", endl << "AFTER" << endl )
00419             multimap<double,cache_entry,greater<double> >::iterator ti = contents.begin() ;
00420             multimap<double,cache_entry,greater<double> >::iterator te = contents.end() ;
00421             for( ; ti != te; ti++ )
00422             {
00423                 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl )
00424             }
00425         }
00426     }
00427     else
00428     {
00429         string err = "Unable to open cache directory " + _cache_dir ;
00430         throw BESInternalError( err, __FILE__, __LINE__ ) ;
00431     }
00432 }
00433 
00441 void
00442 BESCache::dump( ostream &strm ) const
00443 {
00444     strm << BESIndent::LMarg << "BESCache::dump - ("
00445                              << (void *)this << ")" << endl ;
00446     BESIndent::Indent() ;
00447     strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ;
00448     strm << BESIndent::LMarg << "prefix: " << _prefix << endl ;
00449     strm << BESIndent::LMarg << "size: " << _cache_size << endl ;
00450     BESIndent::UnIndent() ;
00451 }
00452 

Generated on Sat Aug 22 06:04:40 2009 for OPeNDAP Hyrax Back End Server (BES) by  doxygen 1.6.0