OPeNDAP Hyrax Back End Server (BES)  Updated for version 3.8.3
BESStoredDapResultCache.cc
Go to the documentation of this file.
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2011 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 //#define DODS_DEBUG
28 
29 #include <sys/stat.h>
30 
31 #include <iostream>
32 #ifdef HAVE_TR1_FUNCTIONAL
33 #include <tr1/functional>
34 #endif
35 #include <string>
36 #include <fstream>
37 #include <sstream>
38 
39 #include <DDS.h>
40 #include <ConstraintEvaluator.h>
41 #include <DDXParserSAX2.h>
42 #include <XDRStreamMarshaller.h>
43 #include <XDRStreamUnMarshaller.h>
44 //<XDRFileUnMarshaller.h>
45 #include <debug.h>
46 #include <mime_util.h> // for last_modified_time() and rfc_822_date()
47 #include <util.h>
48 
49 
51 #include "BESDapResponseBuilder.h"
52 #include "BESInternalError.h"
53 
54 #include "BESUtil.h"
55 #include "TheBESKeys.h"
56 #include "BESDebug.h"
57 
58 #ifdef HAVE_TR1_FUNCTIONAL
59 #define HASH_OBJ std::tr1::hash
60 #else
61 #define HASH_OBJ std::hash
62 #endif
63 
64 
65 #define CRLF "\r\n"
66 #define BES_DATA_ROOT "BES.Data.RootDirectory"
67 #define BES_CATALOG_ROOT "BES.Catalog.catalog.RootDirectory"
68 
69 
70 using namespace std;
71 using namespace libdap;
72 
73 
74 BESStoredDapResultCache *BESStoredDapResultCache::d_instance = 0;
75 const string BESStoredDapResultCache::SUBDIR_KEY = "DAP.StoredResultsCache.subdir";
76 const string BESStoredDapResultCache::PREFIX_KEY = "DAP.StoredResultsCache.prefix";
77 const string BESStoredDapResultCache::SIZE_KEY = "DAP.StoredResultsCache.size";
78 
80 
81  bool found;
82  string size;
83  unsigned long size_in_megabytes = 0;
84  TheBESKeys::TheKeys()->get_value( SIZE_KEY, size, found ) ;
85  if( found ) {
86  istringstream iss(size);
87  iss >> size_in_megabytes;
88  }
89  else {
90  string msg = "[ERROR] BESStoreResultCache::getCacheSize() - The BES Key " + SIZE_KEY + " is not set! It MUST be set to utilize the Stored Result Caching system. ";
91  BESDEBUG("cache", msg);
92  throw BESInternalError(msg , __FILE__, __LINE__);
93  }
94  return size_in_megabytes;
95 }
96 
98  bool found;
99  string subdir = "";
100  TheBESKeys::TheKeys()->get_value( SUBDIR_KEY, subdir, found ) ;
101 
102  if( !found ) {
103  string msg = "[ERROR] BESStoreResultCache::getDefaultSubDir() - The BES Key " + SUBDIR_KEY + " is not set! It MUST be set to utilize the Stored Result Caching system. ";
104  BESDEBUG("cache", msg);
105  throw BESInternalError(msg , __FILE__, __LINE__);
106  }
107  else {
108  while(*subdir.begin() == '/' && subdir.length()>0){
109  subdir = subdir.substr(1);
110  }
111  // So if it's value is "/" or the empty string then the subdir will default to the root
112  // directory of the BES data system.
113  }
114 
115 
116  return subdir;
117 }
118 
120  bool found;
121  string prefix = "";
122  TheBESKeys::TheKeys()->get_value( PREFIX_KEY, prefix, found ) ;
123  if( found ) {
124  prefix = BESUtil::lowercase( prefix ) ;
125  }
126  else {
127  string msg = "[ERROR] BESStoreResultCache::getResultPrefix() - The BES Key " + PREFIX_KEY + " is not set! It MUST be set to utilize the Stored Result Caching system. ";
128  BESDEBUG("cache", msg);
129  throw BESInternalError(msg , __FILE__, __LINE__);
130  }
131 
132  return prefix;
133 }
134 
136  BESDEBUG("cache", "BESStoreResultCache::getDefaultCacheDir() - BEGIN" << endl);
137  bool found;
138 
139  string cacheDir = "";
140  TheBESKeys::TheKeys()->get_value( BES_CATALOG_ROOT, cacheDir, found ) ;
141  if( !found ) {
142  TheBESKeys::TheKeys()->get_value( BES_DATA_ROOT, cacheDir, found ) ;
143  if( !found ) {
144  string msg = ((string)"[ERROR] BESStoreResultCache::getStoredResultsDir() - Neither the BES Key ") + BES_CATALOG_ROOT +
145  "or the BES key " + BES_DATA_ROOT + " have been set! One MUST be set to utilize the Stored Result Caching system. ";
146  BESDEBUG("cache", msg);
147  throw BESInternalError(msg , __FILE__, __LINE__);
148  }
149  }
150  BESDEBUG("cache", "BESStoreResultCache::getDefaultCacheDir() - Using data directory: " << cacheDir << endl);
151 
152 
153  if(*cacheDir.rbegin() != '/')
154  cacheDir += "/";
155 
156  string subDir = getSubDirFromConfig(); // Can never start with a '/' (method ensures it)
157 
158  cacheDir += subDir;
159  BESDEBUG("cache", "BESStoreResultCache::getDefaultCacheDir() - Stored Results Directory: " << cacheDir << endl);
160 
161  BESDEBUG("cache", "BESStoreResultCache::getDefaultCacheDir() - END" << endl);
162  return cacheDir;
163 }
164 
165 
166 BESStoredDapResultCache::BESStoredDapResultCache(){
167  BESDEBUG("cache", "BESStoreResultCache::BESStoreResultCache() - BEGIN" << endl);
168 
169  string resultsDir = getStoredResultsDirFromConfig();
170  string resultPrefix = getResultPrefixFromConfig();
171  unsigned long size_in_megabytes = getCacheSizeFromConfig();
172 
173  BESDEBUG("cache", "BESStoreResultCache() - Cache config params: " << resultsDir << ", " << resultPrefix << ", " << size_in_megabytes << endl);
174 
175  // The required params must be present. If initialize() is not called,
176  // then d_cache will stay null and is_available() will return false.
177  // Also, the directory 'path' must exist, or d_cache will be null.
178  if (!resultsDir.empty() && size_in_megabytes > 0)
179  initialize(resultsDir, resultPrefix, size_in_megabytes);
180 
181  BESDEBUG("cache", "BESStoreResultCache::BESStoreResultCache() - END" << endl);
182 }
183 
184 
198 BESStoredDapResultCache::BESStoredDapResultCache(const string &stored_results_dir, const string &prefix, unsigned long long size): BESFileLockingCache(stored_results_dir,prefix,size) {
199 
200 }
201 
202 
215 BESStoredDapResultCache::get_instance(const string &cache_dir, const string &prefix, unsigned long long size)
216 {
217  if (d_instance == 0){
218  if(dir_exists(cache_dir)){
219  try {
220  d_instance = new BESStoredDapResultCache(cache_dir, prefix, size);
221  }
222  catch(BESInternalError &bie){
223  BESDEBUG("cache", "BESStoreResultCache::get_instance(): Failed to obtain cache! msg: " << bie.get_message() << endl);
224  }
225  }
226  }
227  return d_instance;
228 }
229 
235 {
236  if (d_instance == 0) {
237  if(dir_exists(getStoredResultsDirFromConfig())){
238  try {
239  d_instance = new BESStoredDapResultCache();
240  }
241  catch(BESInternalError &bie){
242  BESDEBUG("cache", "BESStoreResultCache::get_instance(): Failed to obtain cache! msg: " << bie.get_message() << endl);
243  }
244  }
245  }
246 
247  return d_instance;
248 }
249 
250 
251 
252 void BESStoredDapResultCache::delete_instance() {
253  BESDEBUG("cache","BESStoreResultCache::delete_instance() - Deleting singleton BESStoreResultCache instance." << endl);
254  delete d_instance;
255  d_instance = 0;
256 }
257 
258 
259 
269 bool BESStoredDapResultCache::is_valid(const string &cache_file_name, const string &dataset)
270 {
271  // If the cached response is zero bytes in size, it's not valid.
272  // (hmmm...)
273 
274  off_t entry_size = 0;
275  time_t entry_time = 0;
276  struct stat buf;
277  if (stat(cache_file_name.c_str(), &buf) == 0) {
278  entry_size = buf.st_size;
279  entry_time = buf.st_mtime;
280  }
281  else {
282  return false;
283  }
284 
285  if (entry_size == 0)
286  return false;
287 
288  time_t dataset_time = entry_time;
289  if (stat(dataset.c_str(), &buf) == 0) {
290  dataset_time = buf.st_mtime;
291  }
292 
293  // Trick: if the d_dataset is not a file, stat() returns error and
294  // the times stay equal and the code uses the cache entry.
295 
296  // TODO Fix this so that the code can get a LMT from the correct
297  // handler.
298  if (dataset_time > entry_time)
299  return false;
300 
301  return true;
302 }
303 
315 void BESStoredDapResultCache::read_data_from_cache(const string &cache_file_name, DDS *fdds)
316 {
317  BESDEBUG("cache", "Opening cache file: " << cache_file_name << endl);
318  ifstream data(cache_file_name.c_str());
319 
320  // Rip off the MIME headers from the response if they are present
321  string mime = get_next_mime_header(data);
322  while (!mime.empty()) {
323  mime = get_next_mime_header(data);
324  }
325 
326  // Parse the DDX; throw an exception on error.
327  DDXParser ddx_parser(fdds->get_factory());
328 
329  // Read the MPM boundary and then read the subsequent headers
330  string boundary = read_multipart_boundary(data);
331  BESDEBUG("cache", "MPM Boundary: " << boundary << endl);
332 
333  read_multipart_headers(data, "text/xml", dap4_ddx);
334 
335  BESDEBUG("cache", "Read the multipart haeaders" << endl);
336 
337  // Parse the DDX, reading up to and including the next boundary.
338  // Return the CID for the matching data part
339  string data_cid;
340  try {
341  ddx_parser.intern_stream(data, fdds, data_cid, boundary);
342  BESDEBUG("cache", "Dataset name: " << fdds->get_dataset_name() << endl);
343  }
344  catch(Error &e) {
345  BESDEBUG("cache", "DDX Parser Error: " << e.get_error_message() << endl);
346  throw;
347  }
348 
349  // Munge the CID into something we can work with
350  BESDEBUG("cache", "Data CID (before): " << data_cid << endl);
351  data_cid = cid_to_header_value(data_cid);
352  BESDEBUG("cache", "Data CID (after): " << data_cid << endl);
353 
354  // Read the data part's MPM part headers (boundary was read by
355  // DDXParse::intern)
356  read_multipart_headers(data, "application/octet-stream", dap4_data, data_cid);
357 
358  // Now read the data
359 
360  // XDRFileUnMarshaller um(data);
361  XDRStreamUnMarshaller um(data);
362  for (DDS::Vars_iter i = fdds->var_begin(); i != fdds->var_end(); i++) {
363  (*i)->deserialize(um, fdds);
364  }
365 }
366 
371 DDS *
372 BESStoredDapResultCache::get_cached_data_ddx(const string &cache_file_name, BaseTypeFactory *factory, const string &filename)
373 {
374  BESDEBUG("cache", "Reading cache for " << cache_file_name << endl);
375 
376  DDS *fdds = new DDS(factory);
377 
378  fdds->filename(filename) ;
379  //fdds->set_dataset_name( "function_result_" + name_path(filename) ) ;
380 
381  read_data_from_cache(cache_file_name, fdds);
382 
383  BESDEBUG("cache", "DDS Filename: " << fdds->filename() << endl);
384  BESDEBUG("cache", "DDS Dataset name: " << fdds->get_dataset_name() << endl);
385 
386  fdds->set_factory( 0 ) ;
387 
388  // mark everything as read. and send. That is, make sure that when a response
389  // is retrieved from the cache, all of the variables are marked as to be sent
390  DDS::Vars_iter i = fdds->var_begin();
391  while(i != fdds->var_end()) {
392  (*i)->set_read_p( true );
393  (*i++)->set_send_p(true);
394  }
395 
396  return fdds;
397 }
398 
399 
400 
432 DDS *BESStoredDapResultCache::cache_dataset(DDS &dds, const string &constraint, BESDapResponseBuilder *rb, ConstraintEvaluator *eval, string &cache_token)
433 {
434  BESDEBUG("cache", "BESStoredDapResultCache::cache_dataset() - BEGIN" << endl );
435  // These are used for the cached or newly created DDS object
436  BaseTypeFactory factory;
437  DDS *fdds;
438 
439  // Get the cache filename for this thing. Do not use the default
440  // name mangling; instead use what build_cache_file_name() does.
441  string cache_file_name = get_cache_file_name(build_stored_result_file_name(dds.filename(), constraint), /*mangle*/false);
442  int fd;
443  try {
444  // If the object in the cache is not valid, remove it. The read_lock will
445  // then fail and the code will drop down to the create_and_lock() call.
446  // is_valid() tests for a non-zero object and for d_dateset newer than
447  // the cached object.
448  if (!is_valid(cache_file_name, dds.filename()))
449  purge_file(cache_file_name);
450 
451  if (get_read_lock(cache_file_name, fd)) {
452  BESDEBUG("cache", "BESStoredDapResultCache::cache_dataset() - function ce (change)- cached hit: " << cache_file_name << endl);
453  fdds = get_cached_data_ddx(cache_file_name, &factory, dds.filename());
454  }
455  else if (create_and_lock(cache_file_name, fd)) {
456  // If here, the cache_file_name could not be locked for read access;
457  // try to build it. First make an empty file and get an exclusive lock on it.
458  BESDEBUG("cache", "BESStoredDapResultCache::cache_dataset() - function ce - caching " << cache_file_name << ", constraint: " << constraint << endl);
459 
460  fdds = new DDS(dds);
461  eval->parse_constraint(constraint, *fdds);
462 
463  if (eval->function_clauses()) {
464  DDS *temp_fdds = eval->eval_function_clauses(*fdds);
465  delete fdds;
466  fdds = temp_fdds;
467  }
468 
469  ofstream data_stream(cache_file_name.c_str());
470  if (!data_stream)
471  throw InternalErr(__FILE__, __LINE__, "Could not open '" + cache_file_name + "' to write cached response.");
472 
473  string start="dataddx_cache_start", boundary="dataddx_cache_boundary";
474 
475  // Use a ConstraintEvaluator that has not parsed a CE so the code can use
476  // the send method(s)
477  ConstraintEvaluator eval;
478 
479  // Setting the version to 3.2 causes send_data_ddx to write the MIME headers that
480  // the cache expects.
481  fdds->set_dap_version("3.2");
482 
483  // This is a bit of a hack, but it effectively uses ResponseBuilder to write the
484  // cached object/response without calling the machinery in one of the send_*()
485  // methods. Those methods assume they need to evaluate the BESDapResponseBuilder's
486  // CE, which is not necessary and will alter the values of the send_p property
487  // of the DDS's variables.
488  set_mime_multipart(data_stream, boundary, start, dap4_data_ddx, x_plain, last_modified_time(rb->get_dataset_name()));
489  //data_stream << flush;
490  rb->dataset_constraint_ddx(data_stream, *fdds, eval, boundary, start);
491  //data_stream << flush;
492 
493  data_stream << CRLF << "--" << boundary << "--" << CRLF;
494 
495  data_stream.close();
496 
497  // Change the exclusive lock on the new file to a shared lock. This keeps
498  // other processes from purging the new file and ensures that the reading
499  // process can use it.
501 
502  // Now update the total cache size info and purge if needed. The new file's
503  // name is passed into the purge method because this process cannot detect its
504  // own lock on the file.
505  unsigned long long size = update_cache_info(cache_file_name);
506  if (cache_too_big(size))
507  update_and_purge(cache_file_name);
508  }
509  // get_read_lock() returns immediately if the file does not exist,
510  // but blocks waiting to get a shared lock if the file does exist.
511  else if (get_read_lock(cache_file_name, fd)) {
512  BESDEBUG("cache", "BESStoredDapResultCache::cache_dataset() - function ce - cached hit: " << cache_file_name << endl);
513  fdds = get_cached_data_ddx(cache_file_name, &factory, dds.get_dataset_name());
514  }
515  else {
516  throw InternalErr(__FILE__, __LINE__, "BESStoredDapResultCache::cache_dataset() - Cache error during function invocation.");
517  }
518  }
519  catch (...) {
520  BESDEBUG("cache", "BESStoredDapResultCache::cache_dataset() - caught exception, unlocking cache and re-throw." << endl );
521  // I think this call is not needed. jhrg 10/23/12
522  unlock_cache();
523  throw;
524  }
525 
526 
527 
528  cache_token = cache_file_name; // Set this value-result parameter
529  BESDEBUG("cache", "BESStoredDapResultCache::cache_dataset() - END (cache_token=`"<< cache_token << "'" << endl );
530  return fdds;
531 }
532 
540 string
541 BESStoredDapResultCache::build_stored_result_file_name(const string &dataset, const string &ce)
542 {
543  BESDEBUG("cache", "build_stored_result_file_name() - BEGIN. dataset: " << dataset << ", ce: " << ce << endl);
544  std::ostringstream ostr;
545  HASH_OBJ<std::string> str_hash;
546  string name = dataset + "#" + ce;
547  ostr << str_hash(name);
548  string hashed_name = ostr.str();
549 
550  BESDEBUG("cache", "build_stored_result_file_name(): hashed_name: " << hashed_name << endl);
551 
552  return hashed_name;
553 }
554 
555 
571 string BESStoredDapResultCache::get_cache_file_name(const string &src, bool mangle)
572 {
573  string target = src;
574  // Make sure the target does not begin with slash
575  while(*target.begin() == '/' && target.length()>0){
576  target = target.substr(1);
577  }
578  if(target.empty()){
579  throw BESInternalError("BESStoredDapResultCache: The target cache file name must not be made of only the '/' character. Srsly.", __FILE__, __LINE__);
580  }
581 
582  string cacheDir = getCacheDirectory();
583  // Make sure cacheDir String ends in '/'
584  if(*cacheDir.rbegin() != '/')
585  cacheDir += "/";
586 
587  string prefix = getCacheFilePrefix();
588  // Make sure the damn prefix does not begin with slash
589  while(*prefix.begin() == '/' && prefix.length()>0){
590  prefix = prefix.substr(1);
591  }
592 
593  BESDEBUG("cache", "BESStoredDapResultCache::get_cache_file_name() - cacheDir: '" << cacheDir << "'" << endl);
594  BESDEBUG("cache", "BESStoredDapResultCache::get_cache_file_name() - prefix: '" << prefix << "'" << endl);
595  BESDEBUG("cache", "BESStoredDapResultCache::get_cache_file_name() - target: '" << target << "'" << endl);
596 
597  if(mangle){
598  BESDEBUG("cache", "[WARNING] BESStoredDapResultCache::get_cache_file_name() - The parameter 'mangle' is ignored!" << endl);
599  }
600 
601 
602  return cacheDir + prefix + target;
603 }
604 
virtual void unlock_cache()
Unlock the cache info file.
#define BES_DATA_ROOT
exception thrown if inernal error encountered
#define BES_CATALOG_ROOT
virtual libdap::DDS * cache_dataset(libdap::DDS &dds, const std::string &constraint, BESDapResponseBuilder *rb, libdap::ConstraintEvaluator *eval, std::string &cache_token)
Get the cached DDS object.
virtual bool create_and_lock(const string &target, int &fd)
Create a file in the cache and lock it for write access.
static string lowercase(const string &s)
Convert a string to all lower case.
Definition: BESUtil.cc:179
virtual string get_cache_file_name(const string &src, bool mangle=false)
Build the name of file that will holds the uncompressed data from 'src' in the cache.
const string getCacheDirectory()
static string getStoredResultsDirFromConfig()
Implementation of a caching mechanism for compressed data.
virtual string get_message()
get the error message for this exception
Definition: BESError.h:94
#define CRLF
static BESStoredDapResultCache * get_instance()
Get the default instance of the BESStoreResultCache object.
virtual void purge_file(const string &file)
Purge a single file from the cache.
virtual bool cache_too_big(unsigned long long current_size) const
look at the cache size; is it too large? Look at the cache size and see if it is too big...
static const string SUBDIR_KEY
static const string PREFIX_KEY
const string getCacheFilePrefix()
virtual bool get_read_lock(const string &target, int &fd)
Get a read-only lock on the file if it exists.
void get_value(const string &s, string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: BESKeys.cc:453
virtual void update_and_purge(const string &new_file)
Purge files from the cache.
static unsigned long getCacheSizeFromConfig()
virtual unsigned long long update_cache_info(const string &target)
Update the cache info file to include 'target'.
virtual void exclusive_to_shared_lock(int fd)
Transfer from an exclusive lock to a shared lock.
This class is used to build responses for/by the BES.
#define BESDEBUG(x, y)
macro used to send debug information to the debug stream
Definition: BESDebug.h:64
virtual void dataset_constraint_ddx(std::ostream &out, libdap::DDS &dds, libdap::ConstraintEvaluator &eval, const std::string &boundary, const std::string &start, bool ce_eval=true)
Build/return the DDX and the BLOB part of the DAP3.x data response.
static BESKeys * TheKeys()
Definition: TheBESKeys.cc:48
virtual std::string get_dataset_name() const
The ``dataset name'' is the filename or other string that the filter program will use to access the d...
This class is used to cache DAP2 response objects.