libdap++  Updated for version 3.8.2
HTTPCacheTable.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 #include "config.h"
27 
28 // #define DODS_DEBUG
29 
30 // TODO: Remove unneeded includes.
31 
32 #include <pthread.h>
33 #include <limits.h>
34 #include <unistd.h> // for stat
35 #include <sys/types.h> // for stat and mkdir
36 #include <sys/stat.h>
37 
38 #include <cstring>
39 #include <iostream>
40 #include <sstream>
41 #include <algorithm>
42 #include <iterator>
43 #include <set>
44 
45 #include "Error.h"
46 #include "InternalErr.h"
47 #include "ResponseTooBigErr.h"
48 #ifndef WIN32
49 #include "SignalHandler.h"
50 #endif
52 #include "HTTPCacheTable.h"
53 #include "HTTPCacheMacros.h"
54 
55 #include "util_mit.h"
56 #include "debug.h"
57 
58 #define NO_LM_EXPIRATION 24*3600 // 24 hours
59 #define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM
60 
61 // If using LM to find the expiration then take 10% and no more than
62 // MAX_LM_EXPIRATION.
63 #ifndef LM_EXPIRATION
64 #define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
65 #endif
66 
67 const int CACHE_TABLE_SIZE = 1499;
68 
69 using namespace std;
70 
71 namespace libdap {
72 
76 int
77 get_hash(const string &url)
78 {
79  int hash = 0;
80 
81  for (const char *ptr = url.c_str(); *ptr; ptr++)
82  hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
83 
84  return hash;
85 }
86 
87 HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size) :
88  d_cache_root(cache_root), d_block_size(block_size), d_current_size(0), d_new_entries(0)
89 {
90  d_cache_index = cache_root + CACHE_INDEX;
91 
92  d_cache_table = new CacheEntries*[CACHE_TABLE_SIZE];
93 
94  // Initialize the cache table.
95  for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
96  d_cache_table[i] = 0;
97 
99 }
100 
104 static inline void
105 delete_cache_entry(HTTPCacheTable::CacheEntry *e)
106 {
107  DBG2(cerr << "Deleting CacheEntry: " << e << endl);
108  delete e;
109 }
110 
112 {
113  for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
114  HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
115  if (cp) {
116  // delete each entry
117  for_each(cp->begin(), cp->end(), delete_cache_entry);
118 
119  // now delete the vector that held the entries
120  delete get_cache_table()[i];
121  get_cache_table()[i] = 0;
122  }
123  }
124 
125  delete[] d_cache_table;
126 }
127 
135 class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
136  time_t d_time;
137  HTTPCacheTable &d_table;
138 
139 public:
140  DeleteExpired(HTTPCacheTable &table, time_t t) :
141  d_time(t), d_table(table) {
142  if (!t)
143  d_time = time(0); // 0 == now
144  }
145 
146  void operator()(HTTPCacheTable::CacheEntry *&e) {
147  if (e && !e->readers && (e->freshness_lifetime
148  < (e->corrected_initial_age + (d_time - e->response_time)))) {
149  DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
150  d_table.remove_cache_entry(e);
151  delete e; e = 0;
152  }
153  }
154 };
155 
156 // @param time base deletes againt this time, defaults to 0 (now)
158  // Walk through and delete all the expired entries.
159  for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
160  HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
161  if (slot) {
162  for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
163  slot->erase(remove(slot->begin(), slot->end(),
164  static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
165  }
166  }
167 }
168 
175 class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
176  HTTPCacheTable &d_table;
177  int d_hits;
178 
179 public:
180  DeleteByHits(HTTPCacheTable &table, int hits) :
181  d_table(table), d_hits(hits) {
182  }
183 
184  void operator()(HTTPCacheTable::CacheEntry *&e) {
185  if (e && !e->readers && e->hits <= d_hits) {
186  DBG(cerr << "Deleting cache entry: " << e->url << endl);
187  d_table.remove_cache_entry(e);
188  delete e; e = 0;
189  }
190  }
191 };
192 
193 void
195  for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
196  if (get_cache_table()[cnt]) {
197  HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
198  for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
199  slot->erase(remove(slot->begin(), slot->end(),
200  static_cast<HTTPCacheTable::CacheEntry*>(0)),
201  slot->end());
202 
203  }
204  }
205 }
206 
211 class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
212  HTTPCacheTable &d_table;
213  unsigned int d_size;
214 
215 public:
216  DeleteBySize(HTTPCacheTable &table, unsigned int size) :
217  d_table(table), d_size(size) {
218  }
219 
220  void operator()(HTTPCacheTable::CacheEntry *&e) {
221  if (e && !e->readers && e->size > d_size) {
222  DBG(cerr << "Deleting cache entry: " << e->url << endl);
223  d_table.remove_cache_entry(e);
224  delete e; e = 0;
225  }
226  }
227 };
228 
229 void HTTPCacheTable::delete_by_size(unsigned int size) {
230  for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
231  if (get_cache_table()[cnt]) {
232  HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
233  for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
234  slot->erase(remove(slot->begin(), slot->end(),
235  static_cast<HTTPCacheTable::CacheEntry*>(0)),
236  slot->end());
237 
238  }
239  }
240 }
241 
248 
255 bool
257 {
258  d_new_entries = 0;
259 
260  return (REMOVE_BOOL(d_cache_index.c_str()) == 0);
261 }
262 
271 bool
273 {
274  FILE *fp = fopen(d_cache_index.c_str(), "r");
275  // If the cache index can't be opened that's OK; start with an empty
276  // cache. 09/05/02 jhrg
277  if (!fp) {
278  return false;
279  }
280 
281  char line[1024];
282  while (!feof(fp) && fgets(line, 1024, fp)) {
284  DBG2(cerr << line << endl);
285  }
286 
287  int res = fclose(fp) ;
288  if (res) {
289  DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
290  }
291 
292  d_new_entries = 0;
293 
294  return true;
295 }
296 
306 {
307  // Read the line and create the cache object
309  istringstream iss(line);
310  iss >> entry->url;
311  iss >> entry->cachename;
312 
313  iss >> entry->etag;
314  if (entry->etag == CACHE_EMPTY_ETAG)
315  entry->etag = "";
316 
317  iss >> entry->lm;
318  iss >> entry->expires;
319  iss >> entry->size;
320  iss >> entry->range; // range is not used. 10/02/02 jhrg
321 
322  iss >> entry->hash;
323  iss >> entry->hits;
324  iss >> entry->freshness_lifetime;
325  iss >> entry->response_time;
326  iss >> entry->corrected_initial_age;
327 
328  iss >> entry->must_revalidate;
329 
330  return entry;
331 }
332 
335 class WriteOneCacheEntry :
336  public unary_function<HTTPCacheTable::CacheEntry *, void>
337 {
338 
339  FILE *d_fp;
340 
341 public:
342  WriteOneCacheEntry(FILE *fp) : d_fp(fp)
343  {}
344 
345  void operator()(HTTPCacheTable::CacheEntry *e)
346  {
347  if (e && fprintf(d_fp,
348  "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
349  e->url.c_str(),
350  e->cachename.c_str(),
351  e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(),
352  (long)(e->lm),
353  (long)(e->expires),
354  e->size,
355  e->range ? '1' : '0', // not used. 10/02/02 jhrg
356  e->hash,
357  e->hits,
358  (long)(e->freshness_lifetime),
359  (long)(e->response_time),
360  (long)(e->corrected_initial_age),
361  e->must_revalidate ? '1' : '0') < 0)
362  throw Error("Cache Index. Error writing cache index\n");
363  }
364 };
365 
375 void
377 {
378  DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
379 
380  // Open the file for writing.
381  FILE * fp = NULL;
382  if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
383  throw Error(string("Cache Index. Can't open `") + d_cache_index
384  + string("' for writing"));
385  }
386 
387  // Walk through the list and write it out. The format is really
388  // simple as we keep it all in ASCII.
389 
390  for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
391  HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
392  if (cp)
393  for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
394  }
395 
396  /* Done writing */
397  int res = fclose(fp);
398  if (res) {
399  DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
400  << (void *)fp << endl);
401  }
402 
403  d_new_entries = 0;
404 }
405 
407 
420 string
422 {
423  struct stat stat_info;
424  ostringstream path;
425 
426  path << d_cache_root << hash;
427  string p = path.str();
428 
429  if (stat(p.c_str(), &stat_info) == -1) {
430  DBG2(cerr << "Cache....... Create dir " << p << endl);
431  if (MKDIR(p.c_str(), 0777) < 0) {
432  DBG2(cerr << "Cache....... Can't create..." << endl);
433  throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
434  }
435  }
436  else {
437  DBG2(cerr << "Cache....... Directory " << p << " already exists"
438  << endl);
439  }
440 
441  return p;
442 }
443 
458 void
460 {
461  string hash_dir = create_hash_directory(entry->hash);
462 #ifdef WIN32
463  hash_dir += "\\dodsXXXXXX";
464 #else
465  hash_dir += "/dodsXXXXXX"; // mkstemp uses six characters.
466 #endif
467 
468  // mkstemp uses the storage passed to it; must be writable and local.
469  // char *templat = new char[hash_dir.size() + 1];
470  vector<char> templat(hash_dir.size() + 1);
471  strncpy(&templat[0], hash_dir.c_str(), hash_dir.size() + 1);
472 
473  // Open truncated for update. NB: mkstemp() returns a file descriptor.
474  // man mkstemp says "... The file is opened with the O_EXCL flag,
475  // guaranteeing that when mkstemp returns successfully we are the only
476  // user." 09/19/02 jhrg
477 #ifndef WIN32
478  // Make sure that temp files are accessible only by the owner.
479  umask(077);
480 #endif
481  int fd = MKSTEMP(&templat[0]); // fd mode is 666 or 600 (Unix)
482  if (fd < 0) {
483  // delete[] templat; templat = 0;
484  close(fd);
485  throw Error("The HTTP Cache could not create a file to hold the response; it will not be cached.");
486  }
487 
488  entry->cachename = &templat[0];
489  // delete[] templat; templat = 0;
490  close(fd);
491 }
492 
493 
495 static inline int
496 entry_disk_space(int size, unsigned int block_size)
497 {
498  unsigned int num_of_blocks = (size + block_size) / block_size;
499 
500  DBG(cerr << "size: " << size << ", block_size: " << block_size
501  << ", num_of_blocks: " << num_of_blocks << endl);
502 
503  return num_of_blocks * block_size;
504 }
505 
509 
515 void
517 {
518  int hash = entry->hash;
519 
520  if (!d_cache_table[hash])
521  d_cache_table[hash] = new CacheEntries;
522 
523  d_cache_table[hash]->push_back(entry);
524 
525  DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
526  << ", entry->size: " << entry->size << ", block size: " << d_block_size
527  << endl);
528 
529  d_current_size += entry_disk_space(entry->size, d_block_size);
530 
531  DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
532 
534 }
535 
540 HTTPCacheTable::get_locked_entry_from_cache_table(const string &url) /*const*/
541 {
542  return get_locked_entry_from_cache_table(get_hash(url), url);
543 }
544 
553 HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url) /*const*/
554 {
555  DBG(cerr << "url: " << url << "; hash: " << hash << endl);
556  DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
557  if (d_cache_table[hash]) {
558  CacheEntries *cp = d_cache_table[hash];
559  for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
560  // Must test *i because perform_garbage_collection may have
561  // removed this entry; the CacheEntry will then be null.
562  if ((*i) && (*i)->url == url) {
563  (*i)->lock_read_response(); // Lock the response
564  return *i;
565  }
566  }
567  }
568 
569  return 0;
570 }
571 
578 HTTPCacheTable::CacheEntry *
580 {
581  int hash = get_hash(url);
582  if (d_cache_table[hash]) {
583  CacheEntries *cp = d_cache_table[hash];
584  for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
585  // Must test *i because perform_garbage_collection may have
586  // removed this entry; the CacheEntry will then be null.
587  if ((*i) && (*i)->url == url) {
588  (*i)->lock_write_response(); // Lock the response
589  return *i;
590  }
591  }
592  }
593 
594  return 0;
595 }
596 
604 void
606 {
607  // This should never happen; all calls to this method are protected by
608  // the caller, hence the InternalErr.
609  if (entry->readers)
610  throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
611 
612  REMOVE(entry->cachename.c_str());
613  REMOVE(string(entry->cachename + CACHE_META).c_str());
614 
615  DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
616 
617  unsigned int eds = entry_disk_space(entry->size, get_block_size());
618  set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
619 
620  DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
621 }
622 
625 class DeleteCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void>
626 {
627  string d_url;
628  HTTPCacheTable *d_cache_table;
629 
630 public:
631  DeleteCacheEntry(HTTPCacheTable *c, const string &url)
632  : d_url(url), d_cache_table(c)
633  {}
634 
635  void operator()(HTTPCacheTable::CacheEntry *&e)
636  {
637  if (e && e->url == d_url) {
638  e->lock_write_response();
639  d_cache_table->remove_cache_entry(e);
640  e->unlock_write_response();
641  delete e; e = 0;
642  }
643  }
644 };
645 
652 void
654 {
655  int hash = get_hash(url);
656  if (d_cache_table[hash]) {
657  CacheEntries *cp = d_cache_table[hash];
658  for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
659  cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry*>(0)),
660  cp->end());
661  }
662 }
663 
666 class DeleteUnlockedCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void> {
667  HTTPCacheTable &d_table;
668 
669 public:
670  DeleteUnlockedCacheEntry(HTTPCacheTable &t) :
671  d_table(t)
672  {
673  }
674  void operator()(HTTPCacheTable::CacheEntry *&e)
675  {
676  if (e) {
677  d_table.remove_cache_entry(e);
678  delete e;
679  e = 0;
680  }
681  }
682 };
683 
685 {
686  // Walk through the cache table and, for every entry in the cache, delete
687  // it on disk and in the cache table.
688  for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
689  HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
690  if (slot) {
691  for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
692  slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *> (0)), slot->end());
693  }
694  }
695 
697 }
698 
712 void
713 HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
714 {
715  entry->response_time = time(NULL);
716  time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
717  time_t corrected_received_age = max(apparent_age, entry->age);
718  time_t response_delay = entry->response_time - request_time;
719  entry->corrected_initial_age = corrected_received_age + response_delay;
720 
721  // Estimate an expires time using the max-age and expires time. If we
722  // don't have an explicit expires time then set it to 10% of the LM date
723  // (although max 24 h). If no LM date is available then use 24 hours.
724  time_t freshness_lifetime = entry->max_age;
725  if (freshness_lifetime < 0) {
726  if (entry->expires < 0) {
727  if (entry->lm < 0) {
728  freshness_lifetime = default_expiration;
729  }
730  else {
731  freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
732  }
733  }
734  else
735  freshness_lifetime = entry->expires - entry->date;
736  }
737 
738  entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
739 
740  DBG2(cerr << "Cache....... Received Age " << entry->age
741  << ", corrected " << entry->corrected_initial_age
742  << ", freshness lifetime " << entry->freshness_lifetime << endl);
743 }
744 
756 void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry, unsigned long max_entry_size,
757  const vector<string> &headers)
758 {
759  vector<string>::const_iterator i;
760  for (i = headers.begin(); i != headers.end(); ++i) {
761  // skip a blank header.
762  if ((*i).empty())
763  continue;
764 
765  string::size_type colon = (*i).find(':');
766 
767  // skip a header with no colon in it.
768  if (colon == string::npos)
769  continue;
770 
771  string header = (*i).substr(0, (*i).find(':'));
772  string value = (*i).substr((*i).find(": ") + 2);
773  DBG2(cerr << "Header: " << header << endl);DBG2(cerr << "Value: " << value << endl);
774 
775  if (header == "ETag") {
776  entry->etag = value;
777  }
778  else if (header == "Last-Modified") {
779  entry->lm = parse_time(value.c_str());
780  }
781  else if (header == "Expires") {
782  entry->expires = parse_time(value.c_str());
783  }
784  else if (header == "Date") {
785  entry->date = parse_time(value.c_str());
786  }
787  else if (header == "Age") {
788  entry->age = parse_time(value.c_str());
789  }
790  else if (header == "Content-Length") {
791  unsigned long clength = strtoul(value.c_str(), 0, 0);
792  if (clength > max_entry_size)
793  entry->set_no_cache(true);
794  }
795  else if (header == "Cache-Control") {
796  // Ignored Cache-Control values: public, private, no-transform,
797  // proxy-revalidate, s-max-age. These are used by shared caches.
798  // See section 14.9 of RFC 2612. 10/02/02 jhrg
799  if (value == "no-cache" || value == "no-store")
800  // Note that we *can* store a 'no-store' response in volatile
801  // memory according to RFC 2616 (section 14.9.2) but those
802  // will be rare coming from DAP servers. 10/02/02 jhrg
803  entry->set_no_cache(true);
804  else if (value == "must-revalidate")
805  entry->must_revalidate = true;
806  else if (value.find("max-age") != string::npos) {
807  string max_age = value.substr(value.find("=" + 1));
808  entry->max_age = parse_time(max_age.c_str());
809  }
810  }
811  }
812 }
813 
815 
816 // @TODO Change name to record locked response
818  entry->hits++; // Mark hit
819  d_locked_entries[body] = entry; // record lock, see release_cached_r...
820 }
821 
823 
824  HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
825  if (!entry)
826  throw InternalErr("There is no cache entry for the response given.");
827 
828  d_locked_entries.erase(body);
829  entry->unlock_read_response();
830 
831  if (entry->readers < 0)
832  throw InternalErr("An unlocked entry was released");
833 }
834 
836  return !d_locked_entries.empty();
837 }
838 
839 } // namespace libdap