bes  Updated for version 3.20.8
RemoteResource.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the BES http package, part of the Hyrax data server.
4 
5 // Copyright (c) 2020 OPeNDAP, Inc.
6 // Author: Nathan Potter <ndp@opendap.org>
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 
24 // Authors:
25 // ndp Nathan Potter <ndp@opendap.org>
26 
27 #include "config.h"
28 
29 #include <sstream>
30 #include <fstream>
31 #include <string>
32 #include <iostream>
33 
34 #include "rapidjson/document.h"
35 
36 #include "BESInternalError.h"
37 #include "BESForbiddenError.h"
38 #include "BESSyntaxUserError.h"
39 #include "BESNotFoundError.h"
40 #include "BESTimeoutError.h"
41 
42 #include "BESDebug.h"
43 #include "BESUtil.h"
44 
45 #include "HttpCache.h"
46 #include "HttpUtils.h"
47 #include "CurlUtils.h"
48 #include "HttpNames.h"
49 #include "RemoteResource.h"
50 #include "TheBESKeys.h"
51 #include "BESStopWatch.h"
52 #include "BESLog.h"
53 
54 using namespace std;
55 
56 #define BES_CATALOG_ROOT_KEY "BES.Catalog.catalog.RootDirectory"
57 
58 #define prolog std::string("RemoteResource::").append(__func__).append("() - ")
59 #define MODULE "rr"
60 
61 namespace http {
62 
63 
64 
65  RemoteResource::RemoteResource(const std::string &url,const std::string &uid){
66  d_fd = 0;
67  d_initialized = false;
68 
69  d_uid = uid;
70 
71  d_resourceCacheFileName.clear();
72  d_response_headers = new vector<string>();
73  d_http_response_headers = new map<string, string>();
74 
75  if (url.empty()) {
76  throw BESInternalError(prolog + "Remote resource URL is empty.", __FILE__, __LINE__);
77  }
78 
79  if(url.find(FILE_PROTOCOL) == 0){
80  d_resourceCacheFileName = url.substr(strlen(FILE_PROTOCOL));
81  while(BESUtil::endsWith(d_resourceCacheFileName,"/")){
82  // Strip trailing slashes, because this about files, not directories
83  d_resourceCacheFileName = d_resourceCacheFileName.substr(0,d_resourceCacheFileName.length()-1);
84  }
85  // Now we check that the data is in the BES_CATALOG_ROOT
86  string catalog_root;
87  bool found;
88  TheBESKeys::TheKeys()->get_value(BES_CATALOG_ROOT_KEY,catalog_root,found );
89  if(!found){
90  throw BESInternalError( prolog + "ERROR - "+ BES_CATALOG_ROOT_KEY + "is not set",__FILE__,__LINE__);
91  }
92  if(d_resourceCacheFileName.find(catalog_root) !=0 ){
93  d_resourceCacheFileName = BESUtil::pathConcat(catalog_root,d_resourceCacheFileName);
94  }
95  d_initialized =true;
96  }
97  else if(url.find(HTTPS_PROTOCOL) == 0 || url.find(HTTP_PROTOCOL) == 0){
98  d_remoteResourceUrl = url;
99  BESDEBUG(MODULE, prolog << "URL: " << d_remoteResourceUrl << endl);
100 #if 0
101 
102  if (!d_uid.empty()){
103  string client_id_hdr = "User-Id: " + d_uid;
104  BESDEBUG(MODULE, prolog << client_id_hdr << endl);
105  d_request_headers.push_back(client_id_hdr);
106  }
107  if (!d_echo_token.empty()){
108  string echo_token_hdr = "Echo-Token: " + d_echo_token;
109  BESDEBUG(MODULE, prolog << echo_token_hdr << endl);
110  d_request_headers.push_back(echo_token_hdr);
111  }
112 #endif
113 
114  }
115  else {
116  string err = prolog + "Unsupported protocol: " + url;
117  throw BESInternalError(err, __FILE__, __LINE__);
118  }
119 
120 
121 
122  // BESDEBUG(MODULE, prolog << "d_curl: " << d_curl << endl);
123 
124  }
125 
126 
127 #if 0
133  RemoteResource::RemoteResource(const std::string &url, const std::string &uid, const std::string &echo_token) {
134 
135  d_fd = 0;
136  d_initialized = false;
137 
138  d_uid = uid;
139  d_echo_token = echo_token;
140 
141  // d_curl = curl::init(url);
142 
143  d_resourceCacheFileName.clear();
144  d_response_headers = new vector<string>();
145  d_request_headers = new vector<string>();
146  d_http_response_headers = new map<string, string>();
147 
148  if (url.empty()) {
149  throw BESInternalError(prolog + "Remote resource URL is empty.", __FILE__, __LINE__);
150  }
151 
152  if(url.find(FILE_PROTOCOL) == 0){
153  d_resourceCacheFileName = url.substr(strlen(FILE_PROTOCOL));
154  while(BESUtil::endsWith(d_resourceCacheFileName,"/")){
155  // Strip trailing slashes, because this about files, not directories
156  d_resourceCacheFileName = d_resourceCacheFileName.substr(0,d_resourceCacheFileName.length()-1);
157  }
158  // Now we check that the data is in the BES_CATALOG_ROOT
159  string catalog_root;
160  bool found;
161  TheBESKeys::TheKeys()->get_value(BES_CATALOG_ROOT_KEY,catalog_root,found );
162  if(!found){
163  throw BESInternalError( prolog + "ERROR - "+ BES_CATALOG_ROOT_KEY + "is not set",__FILE__,__LINE__);
164  }
165  if(d_resourceCacheFileName.find(catalog_root) !=0 ){
166  d_resourceCacheFileName = BESUtil::pathConcat(catalog_root,d_resourceCacheFileName);
167  }
168  d_initialized =true;
169  }
170  else if(url.find(HTTPS_PROTOCOL) == 0 || url.find(HTTP_PROTOCOL) == 0){
171  d_remoteResourceUrl = url;
172  BESDEBUG(MODULE, prolog << "URL: " << d_remoteResourceUrl << endl);
173 
174  if (!d_uid.empty()){
175  string client_id_hdr = "User-Id: " + d_uid;
176  BESDEBUG(MODULE, prolog << client_id_hdr << endl);
177  d_request_headers->push_back(client_id_hdr);
178  }
179  if (!d_echo_token.empty()){
180  string echo_token_hdr = "Echo-Token: " + d_echo_token;
181  BESDEBUG(MODULE, prolog << echo_token_hdr << endl);
182  d_request_headers->push_back(echo_token_hdr);
183  }
184  }
185  else {
186  string err = prolog + "Unsupported protocol: " + url;
187  throw BESInternalError(err, __FILE__, __LINE__);
188  }
189 
190 
191 
192  // BESDEBUG(MODULE, prolog << "d_curl: " << d_curl << endl);
193  }
194 #endif
195 
196 
201  RemoteResource::~RemoteResource() {
202  BESDEBUG(MODULE, prolog << "BEGIN resourceURL: " << d_remoteResourceUrl << endl);
203 
204  delete d_response_headers;
205  d_response_headers = 0;
206  BESDEBUG(MODULE, prolog << "Deleted d_response_headers." << endl);
207 
208 
209  if (!d_resourceCacheFileName.empty()) {
210  HttpCache *cache = HttpCache::get_instance();
211  if (cache) {
212  cache->unlock_and_close(d_resourceCacheFileName);
213  BESDEBUG(MODULE, prolog << "Closed and unlocked " << d_resourceCacheFileName << endl);
214  d_resourceCacheFileName.clear();
215  }
216  }
217 
218 #if 0
219  if (d_curl) {
220  curl_easy_cleanup(d_curl);
221  BESDEBUG(MODULE, prolog << "Called curl_easy_cleanup()." << endl);
222  }
223  d_curl = 0;
224 #endif
225  BESDEBUG(MODULE, prolog << "Clearing resourceURL: " << d_remoteResourceUrl << endl);
226  d_remoteResourceUrl.clear();
227  BESDEBUG(MODULE, prolog << "END" << endl);
228  }
229 
234  std::string RemoteResource::getCacheFileName() {
235  if (!d_initialized) {
236  throw BESInternalError(prolog + "STATE ERROR: Remote Resource " + d_remoteResourceUrl +
237  " has Not Been Retrieved.", __FILE__, __LINE__);
238  }
239  return d_resourceCacheFileName;
240  }
241 
249  void RemoteResource::retrieveResource() {
250  string template_key;
251  string replace_value;
252  retrieveResource(template_key,replace_value);
253  }
254 
266  void RemoteResource::retrieveResource(const string &template_key, const string &replace_value) {
267  BESDEBUG(MODULE, prolog << "BEGIN resourceURL: " << d_remoteResourceUrl << endl);
268  bool mangle = true;
269 
270  if (d_initialized) {
271  BESDEBUG(MODULE, prolog << "END Already initialized." << endl);
272  return;
273  }
274  // Get a pointer to the singleton cache instance for this process.
275  HttpCache *cache = HttpCache::get_instance();
276  if (!cache) {
277  ostringstream oss;
278  oss << prolog << "FAILED to get local cache. ";
279  oss << "Unable to proceed with request for " << this->d_remoteResourceUrl;
280  oss << " The server MUST have a valid HTTP cache configuration to operate." << endl;
281  BESDEBUG(MODULE, oss.str());
282  throw BESInternalError(oss.str(), __FILE__, __LINE__);
283  }
284 
285  // Get the name of the file in the cache (either the code finds this file or
286  // or it makes it).
287  // FIXME THIS SHOULD USE THE uid,resourceURL version of the is function, and the cache name should be a hash
288  d_resourceCacheFileName = cache->get_cache_file_name(d_remoteResourceUrl, mangle);
289  BESDEBUG(MODULE, prolog << "d_resourceCacheFileName: " << d_resourceCacheFileName << endl);
290 
291  // @TODO MAKE THIS RETRIEVE THE CACHED DATA TYPE IF THE CACHED RESPONSE IF FOUND
292  // We need to know the type of the resource. HTTP headers are the preferred way to determine the type.
293  // Unfortunately, the current code losses both the HTTP headers sent from the request and the derived type
294  // to subsequent accesses of the cached object. Since we have to have a type, for now we just set the type
295  // from the url. If down below we DO an HTTP GET then the headers will be evaluated and the type set by setType()
296  // But really - we gotta fix this.
297  http::get_type_from_url(d_remoteResourceUrl, d_type);
298  BESDEBUG(MODULE, prolog << "d_type: " << d_type << endl);
299 
300  try {
301  if (cache->get_read_lock(d_resourceCacheFileName, d_fd)) {
302  BESDEBUG(MODULE,
303  prolog << "Remote resource is already in cache. cache_file_name: " << d_resourceCacheFileName
304  << endl);
305 
306  // #########################################################################################################
307  // I think in this if() is where we need to load the headers from the cache if we have them.
308  string hdr_filename = cache->get_cache_file_name(d_remoteResourceUrl,mangle) + ".hdrs";
309  std::ifstream hdr_ifs(hdr_filename.c_str());
310  try {
311  BESDEBUG(MODULE, prolog << "Reading response headers from: " << hdr_filename << endl);
312  for (std::string line; std::getline(hdr_ifs, line);) {
313  (*d_response_headers).push_back(line);
314  BESDEBUG(MODULE, prolog << "header: " << line << endl);
315  }
316  }
317  catch (...) {
318  hdr_ifs.close();
319  throw;
320  }
321  ingest_http_headers_and_type();
322  d_initialized = true;
323  return;
324  // #########################################################################################################
325  }
326 
327  // Now we actually need to reach out across the interwebs and retrieve the remote resource and put it's
328  // content into a local cache file, given that it's not in the cache.
329  // First make an empty file and get an exclusive lock on it.
330  if (cache->create_and_lock(d_resourceCacheFileName, d_fd)) {
331 
332  // Write the remote resource to the cache file.
333  try {
334  writeResourceToFile(d_fd);
335  }
336  catch (...) {
337  // If things went south then we need to dump the file because we'll end up with an empty/bogus file clogging the cache
338  unlink(d_resourceCacheFileName.c_str());
339  throw;
340  }
341 
342  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
343  // If we are filtering the response (for example to inject data URL into a dmr++ file),
344  // The file is locked and we have the information required to make the substitution.
345  // This is controlled by:
346  // - The template_key string must not be empty.
347  if(!template_key.empty()){
348  unsigned int count = filter_retrieved_resource(template_key, replace_value);
349  BESDEBUG(MODULE, prolog << "Replaced " << count <<
350  " instance(s) of template(" <<
351  template_key << ") with " << replace_value << " in cached RemoteResource" << endl);
352  }
353 
354  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
355  // I think right here is where I would be able to cache the data type/response headers. While I have
356  // the exclusive lock I could open another cache file for metadata and write to it.
357  {
358  // FIXME THIS IS WHERE WE NEED TO USE NGAP HASH CACHE FILE NAMES NOT FLC NAMES.
359  string hdr_filename = cache->get_cache_file_name(d_remoteResourceUrl, mangle) + ".hdrs";
360  std::ofstream hdr_out(hdr_filename.c_str());
361  try {
362  for (size_t i = 0; i < this->d_response_headers->size(); i++) {
363  hdr_out << (*d_response_headers)[i] << endl;
364  }
365  }
366  catch (...) {
367  // If this fails for any reason we:
368  hdr_out.close(); // Close the stream
369  unlink(hdr_filename.c_str()); // unlink the file
370  unlink(d_resourceCacheFileName.c_str()); // unlink the primary cache file.
371  throw;
372  }
373  }
374  // #########################################################################################################
375 
376  // Change the exclusive lock on the new file to a shared lock. This keeps
377  // other processes from purging the new file and ensures that the reading
378  // process can use it.
379  cache->exclusive_to_shared_lock(d_fd);
380  BESDEBUG(MODULE, prolog << "Converted exclusive cache lock to shared lock." << endl);
381 
382  // Now update the total cache size info and purge if needed. The new file's
383  // name is passed into the purge method because this process cannot detect its
384  // own lock on the file.
385  unsigned long long size = cache->update_cache_info(d_resourceCacheFileName);
386  BESDEBUG(MODULE, prolog << "Updated cache info" << endl);
387 
388  if (cache->cache_too_big(size)) {
389  cache->update_and_purge(d_resourceCacheFileName);
390  BESDEBUG(MODULE, prolog << "Updated and purged cache." << endl);
391  }
392  BESDEBUG(MODULE, prolog << "END" << endl);
393  d_initialized = true;
394  return;
395  } else {
396  if (cache->get_read_lock(d_resourceCacheFileName, d_fd)) {
397  BESDEBUG(MODULE,
398  prolog << "Remote resource is in cache. cache_file_name: " << d_resourceCacheFileName
399  << endl);
400  d_initialized = true;
401  return;
402  }
403  }
404 
405  string msg = prolog + "Failed to acquire cache read lock for remote resource: '";
406  msg += d_remoteResourceUrl + "\n";
407  throw BESInternalError(msg, __FILE__, __LINE__);
408 
409  }
410  catch (BESError &besError) {
411  BESDEBUG(MODULE, prolog << "Caught BESError. type: " << besError.get_bes_error_type() <<
412  " message: '" << besError.get_message() <<
413  "' file: " << besError.get_file() << " line: " << besError.get_line() <<
414  " Will unlock cache and re-throw." << endl);
415  cache->unlock_cache();
416  throw;
417  }
418  catch (...) {
419  BESDEBUG(MODULE, prolog << "Caught unknown exception. Will unlock cache and re-throw." << endl);
420  cache->unlock_cache();
421  throw;
422  }
423 
424  }
425 
434  void RemoteResource::writeResourceToFile(int fd) {
435 
436  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
437  try {
438 
439  BESStopWatch besTimer;
440  if (BESDebug::IsSet("rr") || BESDebug::IsSet(MODULE) || BESDebug::IsSet(TIMING_LOG_KEY) || BESLog::TheLog()->is_verbose()){
441  besTimer.start(prolog + "source url: " + d_remoteResourceUrl);
442  }
443 
444  BESDEBUG(MODULE, prolog << "Saving resource " << d_remoteResourceUrl << " to cache file " << d_resourceCacheFileName << endl);
445  curl::http_get_and_write_resource(d_remoteResourceUrl, fd, d_response_headers); // Throws BESInternalError if there is a curl error.
446 
447  BESDEBUG(MODULE, prolog << "Resource " << d_remoteResourceUrl << " saved to cache file " << d_resourceCacheFileName << endl);
448 
449  // rewind the file
450  // FIXME I think the idea here is that we have the file open and we should just keep
451  // reading from it. But the container mechanism works with file names, so we will
452  // likely have to open the file again. If that's true, lets remove this call. jhrg 3.2.18
453  int status = lseek(fd, 0, SEEK_SET);
454  if (-1 == status)
455  throw BESError("Could not seek within the response.", BES_NOT_FOUND_ERROR, __FILE__, __LINE__);
456  BESDEBUG(MODULE, prolog << "Reset file descriptor." << endl);
457 
458  // @TODO CACHE THE DATA TYPE OR THE HTTP HEADERS SO WHEN WE ARE RETRIEVING THE CACHED OBJECT WE CAN GET THE CORRECT TYPE
459  ingest_http_headers_and_type();
460  }
461  catch (BESError &e) {
462  throw;
463  }
464  BESDEBUG(MODULE, prolog << "END" << endl);
465  }
466 
470  void RemoteResource::ingest_http_headers_and_type() {
471  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
472 
473  const string colon_space = ": ";
474  for (size_t i = 0; i < this->d_response_headers->size(); i++) {
475  string header = (*d_response_headers)[i];
476  BESDEBUG(MODULE, prolog << "Processing header " << header << endl);
477  size_t colon_index = header.find(colon_space);
478  if(colon_index == string::npos){
479  BESDEBUG(MODULE, prolog << "Unable to locate the colon space \": \" delimiter in the header " <<
480  "string: '" << header << "' SKIPPING!" << endl);
481  }
482  else {
483  string key = BESUtil::lowercase(header.substr(0, colon_index));
484  string value = header.substr(colon_index + colon_space.length());
485  BESDEBUG(MODULE, prolog << "key: " << key << " value: " << value << endl);
486  (*d_http_response_headers)[key] = value;
487  }
488  }
489  std::map<string, string>::iterator it;
490  string type;
491 
492  // Try and figure out the file type first from the
493  // Content-Disposition in the http header response.
494 
495  string content_disp_hdr;
496  content_disp_hdr = get_http_response_header("content-disposition");
497  if (!content_disp_hdr.empty()) {
498  // Content disposition exists, grab the filename
499  // attribute
500  http::get_type_from_disposition(content_disp_hdr, type);
501  BESDEBUG(MODULE,prolog << "Evaluated content-disposition '" << content_disp_hdr << "' matched type: \"" << type << "\"" << endl);
502  }
503 
504  // still haven't figured out the type. Check the content-type
505  // next, translate to the BES MODULE name. It's also possible
506  // that even though Content-disposition was available, we could
507  // not determine the type of the file.
508  string content_type = get_http_response_header("content-type");
509  if (type.empty() && !content_type.empty()) {
510  http::get_type_from_content_type(content_type, type);
511  BESDEBUG(MODULE,prolog << "Evaluated content-type '" << content_type << "' matched type \"" << type << "\"" << endl);
512  }
513 
514  // still haven't figured out the type. Now check the actual URL
515  // and see if we can't match the URL to a MODULE name
516  if (type.empty()) {
517  http::get_type_from_url(d_remoteResourceUrl, type);
518  BESDEBUG(MODULE, prolog << "Evaluated url '" << d_remoteResourceUrl << "' matched type: \"" << type << "\"" << endl);
519  }
520 
521  // still couldn't figure it out, punt
522  if (type.empty()) {
523  string err = prolog + "Unable to determine the type of data"
524  + " returned from '" + d_remoteResourceUrl + "' Setting type to 'unknown'";
525  BESDEBUG(MODULE, err << endl);
526  type = "unknown";
527  //throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
528  }
529  d_type = type;
530  BESDEBUG(MODULE, prolog << "END (dataset type: " << d_type << ")" << endl);
531  }
532 
538  std::string
539  RemoteResource::get_http_response_header(const std::string header_name) {
540  string value("");
541  std::map<string, string>::iterator it;
542  it = d_http_response_headers->find(BESUtil::lowercase(header_name));
543  if (it != d_http_response_headers->end())
544  value = it->second;
545  return value;
546  }
547 
548 
559  unsigned int RemoteResource::filter_retrieved_resource(const std::string &template_str, const std::string &update_str){
560  unsigned int replace_count = 0;
561 
562  // - - - - - - - - - - - - - - - - - - - - - - - -
563  // Read the dmr++ file into a string object
564  std::ifstream cr_istrm(d_resourceCacheFileName);
565  if (!cr_istrm.is_open()) {
566  string msg = "Could not open '" + d_resourceCacheFileName + "' to read cached response.";
567  BESDEBUG(MODULE, prolog << msg << endl);
568  throw BESInternalError(msg, __FILE__, __LINE__);
569  }
570  std::stringstream buffer;
571  buffer << cr_istrm.rdbuf();
572  string resource_content(buffer.str());
573 
574  // - - - - - - - - - - - - - - - - - - - - - - - -
575  // Replace all occurrences of the dmr++ href attr key.
576  int startIndex = 0;
577  while ((startIndex = resource_content.find(template_str)) != -1) {
578  resource_content.erase(startIndex, template_str.length());
579  resource_content.insert(startIndex, update_str);
580  replace_count++;
581  }
582 
583  // - - - - - - - - - - - - - - - - - - - - - - - -
584  // Replace the contents of the cached dmr++ file with the modified string.
585  std::ofstream cr_ostrm(d_resourceCacheFileName);
586  if (!cr_ostrm.is_open()) {
587  string msg = "Could not open '" + d_resourceCacheFileName + "' to write modified cached response.";
588  BESDEBUG(MODULE, prolog << msg << endl);
589  throw BESInternalError(msg, __FILE__, __LINE__);
590  }
591  cr_ostrm << resource_content;
592 
593  return replace_count;
594  }
595 
599  std::string RemoteResource::get_response_as_string() {
600 
601  if(!d_initialized){
602  stringstream msg;
603  msg << "ERROR. Internal state error. " << __PRETTY_FUNCTION__ << " was called prior to retrieving resource.";
604  BESDEBUG(MODULE, prolog << msg.str() << endl);
605  throw BESInternalError(msg.str(), __FILE__, __LINE__);
606  }
607  string cache_file = getCacheFileName();
608  // - - - - - - - - - - - - - - - - - - - - - - - - - - -
609  // Set up cache file input stream.
610  std::ifstream file_istream(cache_file, std::ofstream::in);
611 
612  // If the cache filename is not valid, the stream will not open. Empty is not valid.
613  if(file_istream.is_open()){
614  // If it's open we've got a valid input stream.
615  BESDEBUG(MODULE, prolog << "Using cached file: " << cache_file << endl);
616  std::stringstream buffer;
617  buffer << file_istream.rdbuf();
618  return buffer.str();
619  }
620  else {
621  stringstream msg;
622  msg << "ERROR. Failed to open cache file " << cache_file << " for reading.";
623  BESDEBUG(MODULE, prolog << msg.str() << endl);
624  throw BESInternalError(msg.str(), __FILE__, __LINE__);
625  }
626 
627  }
628 
636  rapidjson::Document RemoteResource::get_as_json() {
637  string response = get_response_as_string();
639  d.Parse(response.c_str());
640  return d;
641  }
642 
646  vector<string> *RemoteResource::getResponseHeaders() {
647  if (!d_initialized){
648  throw BESInternalError(prolog +"STATE ERROR: Remote Resource Has Not Been Retrieved.",__FILE__,__LINE__);
649  }
650  return d_response_headers;
651  }
652 
653 
654 #if 0
655  void RemoteResource::setType(const vector<string> *resp_hdrs) {
656 
657  BESDEBUG(MODULE, prolog << "BEGIN" << endl);
658 
659  string type = "";
660 
661  // Try and figure out the file type first from the
662  // Content-Disposition in the http header response.
663  string disp;
664  string ctype;
665 
666  if (resp_hdrs) {
667  vector<string>::const_iterator i = resp_hdrs->begin();
668  vector<string>::const_iterator e = resp_hdrs->end();
669  for (; i != e; i++) {
670  string hdr_line = (*i);
671 
672  BESDEBUG(MODULE, prolog << "Evaluating header: " << hdr_line << endl);
673 
674  hdr_line = BESUtil::lowercase(hdr_line);
675 
676  string colon_space = ": ";
677  int index = hdr_line.find(colon_space);
678  string hdr_name = hdr_line.substr(0, index);
679  string hdr_value = hdr_line.substr(index + colon_space.length());
680 
681  BESDEBUG(MODULE, prolog << "hdr_name: '" << hdr_name << "' hdr_value: '" << hdr_value << "' " << endl);
682 
683  if (hdr_name.find("content-disposition") != string::npos) {
684  // Content disposition exists
685  BESDEBUG(MODULE, prolog << "Located content-disposition header." << endl);
686  disp = hdr_value;
687  }
688  if (hdr_name.find("content-type") != string::npos) {
689  BESDEBUG(MODULE, prolog << "Located content-type header." << endl);
690  ctype = hdr_value;
691  }
692  }
693  }
694 
695  if (!disp.empty()) {
696  // Content disposition exists, grab the filename
697  // attribute
698  HttpUtils::Get_type_from_disposition(disp, type);
699  BESDEBUG(MODULE,prolog << "Evaluated content-disposition '" << disp << "' matched type: \"" << type << "\"" << endl);
700  }
701 
702  // still haven't figured out the type. Check the content-type
703  // next, translate to the BES MODULE name. It's also possible
704  // that even though Content-disposition was available, we could
705  // not determine the type of the file.
706  if (type.empty() && !ctype.empty()) {
707  HttpUtils::Get_type_from_content_type(ctype, type);
708  BESDEBUG(MODULE,prolog << "Evaluated content-type '" << ctype << "' matched type \"" << type << "\"" << endl);
709  }
710 
711  // still haven't figured out the type. Now check the actual URL
712  // and see if we can't match the URL to a MODULE name
713  if (type.empty()) {
714  HttpUtils::Get_type_from_url(d_remoteResourceUrl, type);
715  BESDEBUG(MODULE,prolog << "Evaluated url '" << d_remoteResourceUrl << "' matched type: \"" << type << "\"" << endl);
716  }
717 
718  // still couldn't figure it out, punt
719  if (type.empty()) {
720  string err = prolog + "Unable to determine the type of data"
721  + " returned from '" + d_remoteResourceUrl + "' Setting type to 'unknown'";
722  BESDEBUG(MODULE, err << endl);
723  type = "unknown";
724  //throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ;
725  }
726 
727  // @TODO CACHE THE DATA TYPE OR THE HTTP HEADERS SO WHEN WE ARE RETRIEVING THE CACHED OBJECT WE CAN GET THE CORRECT TYPE
728 
729  d_type = type;
730  }
731 #endif
732 
733 
734 } // namespace http
735 
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:160
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual int get_bes_error_type()
Return the return code for this error class.
Definition: BESError.h:143
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
virtual void unlock_and_close(const std::string &target)
virtual unsigned long long update_cache_info(const std::string &target)
Update the cache info file to include 'target'.
virtual bool create_and_lock(const std::string &target, int &fd)
Create a file in the cache and lock it for write access.
virtual void exclusive_to_shared_lock(int fd)
Transfer from an exclusive lock to a shared lock.
virtual bool get_read_lock(const std::string &target, int &fd)
Get a read-only lock on the file if it exists.
virtual bool cache_too_big(unsigned long long current_size) const
look at the cache size; is it too large? Look at the cache size and see if it is too big.
virtual void update_and_purge(const std::string &new_file)
Purge files from the cache.
exception thrown if internal error encountered
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition: BESUtil.cc:942
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:200
static std::string pathConcat(const std::string &firstPart, const std::string &secondPart, char separator='/')
Concatenate path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:772
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:339
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
A cache for content accessed via HTTP.
Definition: HttpCache.h:54
virtual std::string get_cache_file_name(const std::string &uid, const std::string &src, bool mangle=true)
Definition: HttpCache.cc:266
GenericDocument< UTF8<> > Document
GenericDocument with UTF8 encoding.
Definition: document.h:2585
utility class for the HTTP catalog module
Definition: EffectiveUrl.cc:58
void get_type_from_disposition(const string &disp, string &type)
Definition: HttpUtils.cc:109