libdap++  Updated for version 3.14.0
HTTPConnect.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 
33 #include <sys/stat.h>
34 
35 #ifdef WIN32
36 #include <io.h>
37 #endif
38 
39 #include <string>
40 #include <vector>
41 #include <functional>
42 #include <algorithm>
43 #include <sstream>
44 #include <fstream>
45 #include <iterator>
46 #include <cstdlib>
47 #include <cstring>
48 
49 //#define DODS_DEBUG2
50 //#define HTTP_TRACE
51 //#define DODS_DEBUG
52 
53 #undef USE_GETENV
54 
55 
56 #include "debug.h"
57 #include "mime_util.h"
58 #include "media_types.h"
59 #include "GNURegex.h"
60 #include "HTTPCache.h"
61 #include "HTTPConnect.h"
62 #include "RCReader.h"
63 #include "HTTPResponse.h"
64 #include "HTTPCacheResponse.h"
65 
66 using namespace std;
67 
68 namespace libdap {
69 
70 // These global variables are not MT-Safe, but I'm leaving them as is because
71 // they are used only for debugging (set them in a debugger like gdb or ddd).
72 // They are not static because I think that many debuggers cannot access
73 // static variables. 08/07/02 jhrg
74 
75 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
76 int www_trace = 0;
77 
78 // Keep the temporary files; useful for debugging.
80 
81 #define CLIENT_ERR_MIN 400
82 #define CLIENT_ERR_MAX 417
83 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
84  {
85  "Bad Request:",
86  "Unauthorized: Contact the server administrator.",
87  "Payment Required.",
88  "Forbidden: Contact the server administrator.",
89  "Not Found: The data source or server could not be found.\n\
90  Often this means that the OPeNDAP server is missing or needs attention;\n\
91  Please contact the server administrator.",
92  "Method Not Allowed.",
93  "Not Acceptable.",
94  "Proxy Authentication Required.",
95  "Request Time-out.",
96  "Conflict.",
97  "Gone:.",
98  "Length Required.",
99  "Precondition Failed.",
100  "Request Entity Too Large.",
101  "Request URI Too Large.",
102  "Unsupported Media Type.",
103  "Requested Range Not Satisfiable.",
104  "Expectation Failed."
105  };
106 
107 #define SERVER_ERR_MIN 500
108 #define SERVER_ERR_MAX 505
109 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
110  {
111  "Internal Server Error.",
112  "Not Implemented.",
113  "Bad Gateway.",
114  "Service Unavailable.",
115  "Gateway Time-out.",
116  "HTTP Version Not Supported."
117  };
118 
121 static string
122 http_status_to_string(int status)
123 {
124  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
125  return string(http_client_errors[status - CLIENT_ERR_MIN]);
126  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
127  return string(http_server_errors[status - SERVER_ERR_MIN]);
128  else
129  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
130 }
131 
132 static ObjectType
133 determine_object_type(const string &header_value)
134 {
135  // DAP4 Data: application/vnd.opendap.dap4.data
136  // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
137 
138  string::size_type plus = header_value.find('+');
139  string base_type;
140  string type_extension = "";
141  if (plus != string::npos) {
142  base_type= header_value.substr(0, plus);
143  type_extension = header_value.substr(plus+1);
144  }
145  else
146  base_type = header_value;
147 
148  if (base_type == DMR_Content_Type
149  || (base_type.find("application/") != string::npos
150  && base_type.find("dap4.dataset-metadata") != string::npos)) {
151  if (type_extension == "xml")
152  return dap4_dmr;
153  else
154  return unknown_type;
155  }
156  else if (base_type == DAP4_DATA_Content_Type
157  || (base_type.find("application/") != string::npos
158  && base_type.find("dap4.data") != string::npos)) {
159  return dap4_data;
160  }
161  else if (header_value.find("text/html") != string::npos) {
162  return web_error;
163  }
164  else
165  return unknown_type;
166 }
167 
172 class ParseHeader : public unary_function<const string &, void>
173 {
174  ObjectType type; // What type of object is in the stream?
175  string server; // Server's version string.
176  string protocol; // Server's protocol version.
177  string location; // Url returned by server
178 
179 public:
180  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
181  { }
182 
183  void operator()(const string &line)
184  {
185  string name, value;
186  parse_mime_header(line, name, value);
187 
188  DBG2(cerr << name << ": " << value << endl);
189 
190  // Content-Type is used to determine the content of DAP4 responses, but allow the
191  // Content-Description header to override CT o preserve operation with DAP2 servers.
192  // jhrg 11/12/13
193  if (type == unknown_type && name == "content-type") {
194  type = determine_object_type(value); // see above
195  }
196  if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
197  type = get_description_type(value); // defined in mime_util.cc
198  }
199  // The second test (== "dods/0.0") tests if xopendap-server has already
200  // been seen. If so, use that header in preference to the old
201  // XDODS-Server header. jhrg 2/7/06
202  else if (name == "xdods-server" && server == "dods/0.0") {
203  server = value;
204  }
205  else if (name == "xopendap-server") {
206  server = value;
207  }
208  else if (name == "xdap") {
209  protocol = value;
210  }
211  else if (server == "dods/0.0" && name == "server") {
212  server = value;
213  }
214  else if (name == "location") {
215  location = value;
216  }
217  }
218 
219  ObjectType get_object_type()
220  {
221  return type;
222  }
223 
224  string get_server()
225  {
226  return server;
227  }
228 
229  string get_protocol()
230  {
231  return protocol;
232  }
233 
234  string get_location() {
235  return location;
236  }
237 };
238 
254 static size_t
255 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
256 {
257  DBG2(cerr << "Inside the header parser." << endl);
258  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
259 
260  // Grab the header, minus the trailing newline. Or \r\n pair.
261  string complete_line;
262  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
263  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
264  else
265  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
266 
267  // Store all non-empty headers that are not HTTP status codes
268  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
269  DBG(cerr << "Header line: " << complete_line << endl);
270  hdrs->push_back(complete_line);
271  }
272 
273  return size * nmemb;
274 }
275 
277 static int
278 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
279 {
280  string message(msg, size);
281 
282  switch (info) {
283  case CURLINFO_TEXT:
284  cerr << "Text: " << message; break;
285  case CURLINFO_HEADER_IN:
286  cerr << "Header in: " << message; break;
287  case CURLINFO_HEADER_OUT:
288  cerr << "Header out: " << message; break;
289  case CURLINFO_DATA_IN:
290  cerr << "Data in: " << message; break;
291  case CURLINFO_DATA_OUT:
292  cerr << "Data out: " << message; break;
293  case CURLINFO_END:
294  cerr << "End: " << message; break;
295 #ifdef CURLINFO_SSL_DATA_IN
296  case CURLINFO_SSL_DATA_IN:
297  cerr << "SSL Data in: " << message; break;
298 #endif
299 #ifdef CURLINFO_SSL_DATA_OUT
300  case CURLINFO_SSL_DATA_OUT:
301  cerr << "SSL Data out: " << message; break;
302 #endif
303  default:
304  cerr << "Curl info: " << message; break;
305  }
306  return 0;
307 }
308 
312 void
313 HTTPConnect::www_lib_init()
314 {
315  d_curl = curl_easy_init();
316  if (!d_curl)
317  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
318 
319  // Now set options that will remain constant for the duration of this
320  // CURL object.
321 
322  // Set the proxy host.
323  if (!d_rcr->get_proxy_server_host().empty()) {
324  DBG(cerr << "Setting up a proxy server." << endl);
325  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
326  << endl);
327  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
328  << endl);
329  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
330  << endl);
331  curl_easy_setopt(d_curl, CURLOPT_PROXY,
332  d_rcr->get_proxy_server_host().c_str());
333  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
334  d_rcr->get_proxy_server_port());
335 
336  // As of 4/21/08 only NTLM, Digest and Basic work.
337 #ifdef CURLOPT_PROXYAUTH
338  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
339 #endif
340 
341  // Password might not be required. 06/21/04 jhrg
342  if (!d_rcr->get_proxy_server_userpw().empty())
343  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
344  d_rcr->get_proxy_server_userpw().c_str());
345  }
346 
347  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
348  // We have to set FailOnError to false for any of the non-Basic
349  // authentication schemes to work. 07/28/03 jhrg
350  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
351 
352  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
353  // choosing the the 'safest' one supported by the server.
354  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
355  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
356 
357  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
358  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
359  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
360  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
361  // param of save_raw_http_headers to a vector<string> object.
362 
363  // Follow 302 (redirect) responses
364  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
365  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
366 
367  // If the user turns off SSL validation...
368  if (d_rcr->get_validate_ssl() == 0) {
369  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
370  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
371  }
372 
373  // Look to see if cookies are turned on in the .dodsrc file. If so,
374  // activate here. We honor 'session cookies' (cookies without an
375  // expiration date) here so that session-based SSO systems will work as
376  // expected.
377  if (!d_cookie_jar.empty()) {
378  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
379  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
380  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
381  }
382 
383  if (www_trace) {
384  cerr << "Curl version: " << curl_version() << endl;
385  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
386  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
387  }
388 }
389 
393 class BuildHeaders : public unary_function<const string &, void>
394 {
395  struct curl_slist *d_cl;
396 
397 public:
398  BuildHeaders() : d_cl(0)
399  {}
400 
401  void operator()(const string &header)
402  {
403  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
404  << endl);
405  d_cl = curl_slist_append(d_cl, header.c_str());
406  }
407 
408  struct curl_slist *get_headers()
409  {
410  return d_cl;
411  }
412 };
413 
428 long
429 HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
430 {
431  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
432 
433 #ifdef WIN32
434  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
435  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
436  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
437  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
438  // this issue is that one should not pass a FILE * to a windows DLL. Close
439  // inspection of libcurl yields that their default write function when using
440  // the CURLOPT_WRITEDATA is just "fwrite".
441  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
442  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
443 #else
444  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
445 #endif
446 
447  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
448  ostream_iterator<string>(cerr, "\n")));
449 
450  BuildHeaders req_hdrs;
451  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
452  req_hdrs);
453  if (headers)
454  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
455  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
456 
457  // Turn off the proxy for this URL?
458  bool temporary_proxy = false;
459  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
460  DBG(cerr << "Suppress proxy for url: " << url << endl);
461  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
462  }
463 
464  string::size_type at_sign = url.find('@');
465  // Assume username:password present *and* assume it's an HTTP URL; it *is*
466  // HTTPConnect, after all. 7 is position after "http://"; the second arg
467  // to substr() is the sub string length.
468  if (at_sign != url.npos)
469  d_upstring = url.substr(7, at_sign - 7);
470 
471  if (!d_upstring.empty())
472  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
473 
474  // Pass save_raw_http_headers() a pointer to the vector<string> where the
475  // response headers may be stored. Callers can use the resp_hdrs
476  // value/result parameter to get the raw response header information .
477  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
478 
479  // This is the call that causes curl to go and get the remote resource and "write it down"
480  // utilizing the configuration state that has been previously conditioned by various perturbations
481  // of calls to curl_easy_setopt().
482  CURLcode res = curl_easy_perform(d_curl);
483 
484  // Free the header list and null the value in d_curl.
485  curl_slist_free_all(req_hdrs.get_headers());
486  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
487 
488  // Reset the proxy?
489  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
490  curl_easy_setopt(d_curl, CURLOPT_PROXY,
491  d_rcr->get_proxy_server_host().c_str());
492 
493  if (res != 0)
494  throw Error(d_error_buffer);
495 
496  long status;
497  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
498  if (res != 0)
499  throw Error(d_error_buffer);
500 
501  char *ct_ptr = 0;
502  res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
503  if (res == CURLE_OK && ct_ptr)
504  d_content_type = ct_ptr;
505  else
506  d_content_type = "";
507 
508  return status;
509 }
510 
514 bool
515 HTTPConnect::url_uses_proxy_for(const string &url) throw()
516 {
517  if (d_rcr->is_proxy_for_used()) {
518  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
519  int index = 0, matchlen;
520  return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
521  }
522 
523  return false;
524 }
525 
529 bool
530 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
531 {
532  return d_rcr->is_no_proxy_for_used()
533  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
534 }
535 
536 // Public methods. Mostly...
537 
544 HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
545  d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
546 
547 {
548  d_accept_deflate = rcr->get_deflate();
549  d_rcr = rcr;
550 
551  // Load in the default headers to send with a request. The empty Pragma
552  // headers overrides libcurl's default Pragma: no-cache header (which
553  // will disable caching by Squid, et c.). The User-Agent header helps
554  // make server logs more readable. 05/05/03 jhrg
555  d_request_headers.push_back(string("Pragma:"));
556  string user_agent = string("User-Agent: ") + string(CNAME)
557  + string("/") + string(CVER);
558  d_request_headers.push_back(user_agent);
559  if (d_accept_deflate)
560  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
561 
562  // HTTPCache::instance returns a valid ptr or 0.
563  if (d_rcr->get_use_cache())
564  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
565  else
566  d_http_cache = 0;
567 
568  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
569  << ")" << endl);
570 
571  if (d_http_cache) {
572  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
573  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
574  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
575  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
576  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
577  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
578  }
579 
580  d_cookie_jar = rcr->get_cookie_jar();
581 
582  www_lib_init(); // This may throw either Error or InternalErr
583 }
584 
586 {
587  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
588 
589  curl_easy_cleanup(d_curl);
590 
591  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
592 }
593 
595 class HeaderMatch : public unary_function<const string &, bool> {
596  const string &d_header;
597  public:
598  HeaderMatch(const string &header) : d_header(header) {}
599  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
600 };
601 
614 HTTPResponse *
615 HTTPConnect::fetch_url(const string &url)
616 {
617 #ifdef HTTP_TRACE
618  cout << "GET " << url << " HTTP/1.0" << endl;
619 #endif
620 
621  HTTPResponse *stream;
622 
623  if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
624  stream = caching_fetch_url(url);
625  }
626  else {
627  stream = plain_fetch_url(url);
628  }
629 
630 #ifdef HTTP_TRACE
631  stringstream ss;
632  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
633  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
634  ss << stream->get_headers()->at(i) << endl;
635  }
636  cout << ss.str();
637 #endif
638 
639  ParseHeader parser;
640 
641  // An apparent quirk of libcurl is that it does not pass the Content-type
642  // header to the callback used to save them, but check and add it from the
643  // saved state variable only if it's not there (without this a test failed
644  // in HTTPCacheTest). jhrg 11/12/13
645  if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
646  HeaderMatch("Content-Type:")) == stream->get_headers()->end())
647  stream->get_headers()->push_back("Content-Type: " + d_content_type);
648  parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
649 
650 #ifdef HTTP_TRACE
651  cout << endl << endl;
652 #endif
653 
654  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
655  if (parser.get_location() != "" &&
656  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
657  delete stream;
658  return fetch_url(parser.get_location());
659  }
660 
661  stream->set_type(parser.get_object_type()); // uses the value of content-description
662 
663  stream->set_version(parser.get_server());
664  stream->set_protocol(parser.get_protocol());
665 
666  if (d_use_cpp_streams) {
667  stream->transform_to_cpp();
668  }
669 
670  return stream;
671 }
672 
673 // Look around for a reasonable place to put a temporary file. Check first
674 // the value of the TMPDIR env var. If that does not yeild a path that's
675 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
676 // defined in stdio.h. If both come up empty, then use `./'.
677 
678 // Change this to a version that either returns a string or an open file
679 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
680 // (see open()) to make it more secure. Ideal solution: get deserialize()
681 // methods to read from a stream returned by libcurl, not from a temporary
682 // file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
683 static string
684 get_tempfile_template(const string &file_template)
685 {
686  string c;
687 
688  // Windows has one idea of the standard name(s) for a temporary files dir
689 #ifdef WIN32
690  // white list for a WIN32 directory
691  Regex directory("[-a-zA-Z0-9_:\\]*");
692 
693  // If we're OK to use getenv(), try it.
694 #ifdef USE_GETENV
695  c = getenv("TEMP");
696  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
697  goto valid_temp_directory;
698 
699  c= getenv("TMP");
700  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
701  goto valid_temp_directory;
702 #endif // USE_GETENV
703 
704  // The windows default
705  c = "c:\tmp";
706  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
707  goto valid_temp_directory;
708 
709 #else // Unix/Linux/OSX has another...
710  // white list for a directory
711  Regex directory("[-a-zA-Z0-9_/]*");
712 #ifdef USE_GETENV
713  c = getenv("TMPDIR");
714  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
715  goto valid_temp_directory;
716 #endif // USE_GETENV
717 
718  // Unix defines this sometimes - if present, use it.
719 #ifdef P_tmpdir
720  if (access(P_tmpdir, W_OK | R_OK) == 0) {
721  c = P_tmpdir;
722  goto valid_temp_directory;
723  }
724 #endif
725 
726  // The Unix default
727  c = "/tmp";
728  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
729  goto valid_temp_directory;
730 
731 #endif // WIN32
732 
733  // If we found nothing useful, use the current directory
734  c = ".";
735 
736 valid_temp_directory:
737 
738 #ifdef WIN32
739  c += "\\" + file_template;
740 #else
741  c += "/" + file_template;
742 #endif
743 
744  return c;
745 }
746 
765 string
766 get_temp_file(FILE *&stream) throw(Error)
767 {
768  string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
769 
770  vector<char> pathname(dods_temp.length() + 1);
771 
772  strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
773 
774  DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
775 
776  // Open truncated for update. NB: mkstemp() returns a file descriptor.
777 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
778  stream = fopen(_mktemp(&pathname[0]), "w+b");
779 #else
780  // Make sure that temp files are accessible only by the owner.
781  umask(077);
782  stream = fdopen(mkstemp(&pathname[0]), "w+");
783 #endif
784 
785  if (!stream)
786  throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
787 
788  dods_temp = &pathname[0];
789  return dods_temp;
790 }
791 
792 
798 void
799 close_temp(FILE *s, const string &name)
800 {
801  int res = fclose(s);
802  if (res)
803  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
804 
805  res = unlink(name.c_str());
806  if (res != 0)
807  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
808 }
809 
831 HTTPResponse *
832 HTTPConnect::caching_fetch_url(const string &url)
833 {
834  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
835 
836  vector<string> *headers = new vector<string>;
837  string file_name;
838  FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
839  if (!s) {
840  // url not in cache; get it and cache it
841  DBGN(cerr << "no; getting response and caching." << endl);
842  delete headers; headers = 0;
843  time_t now = time(0);
844  HTTPResponse *rs = plain_fetch_url(url);
845  d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
846 
847  return rs;
848  }
849  else { // url in cache
850  DBGN(cerr << "yes... ");
851 
852  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
853  DBGN(cerr << "and it's valid; using cached response." << endl);
854  HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
855  return crs;
856  }
857  else { // url in cache but not valid; validate
858  DBGN(cerr << "but it's not valid; validating... ");
859 
860  d_http_cache->release_cached_response(s); // This closes 's'
861  headers->clear();
862  vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
863  FILE *body = 0;
864  string dods_temp = get_temp_file(body);
865  time_t now = time(0); // When was the request made (now).
866  long http_status;
867 
868  try {
869  http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
870  rewind(body);
871  }
872  catch (Error &e) {
873  close_temp(body, dods_temp);
874  delete headers;
875  throw ;
876  }
877 
878  switch (http_status) {
879  case 200: { // New headers and new body
880  DBGN(cerr << "read a new response; caching." << endl);
881 
882  d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
883  HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
884 
885  return rs;
886  }
887 
888  case 304: { // Just new headers, use cached body
889  DBGN(cerr << "cached response valid; updating." << endl);
890 
891  close_temp(body, dods_temp);
892  d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
893  string file_name;
894  FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
895  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
896  return crs;
897  }
898 
899  default: { // Oops.
900  close_temp(body, dods_temp);
901  if (http_status >= 400) {
902  delete headers; headers = 0;
903  string msg = "Error while reading the URL: ";
904  msg += url;
905  msg
906  += ".\nThe OPeNDAP server returned the following message:\n";
907  msg += http_status_to_string(http_status);
908  throw Error(msg);
909  }
910  else {
911  delete headers; headers = 0;
912  throw InternalErr(__FILE__, __LINE__,
913  "Bad response from the HTTP server: " + long_to_string(http_status));
914  }
915  }
916  }
917  }
918  }
919 
920  throw InternalErr(__FILE__, __LINE__, "Should never get here");
921 }
922 
934 HTTPResponse *
935 HTTPConnect::plain_fetch_url(const string &url)
936 {
937  DBG(cerr << "Getting URL: " << url << endl);
938  FILE *stream = 0;
939  string dods_temp = get_temp_file(stream);
940  vector<string> *resp_hdrs = new vector<string>;
941 
942  int status = -1;
943  try {
944  status = read_url(url, stream, resp_hdrs); // Throws Error.
945  if (status >= 400) {
946  // delete resp_hdrs; resp_hdrs = 0;
947  string msg = "Error while reading the URL: ";
948  msg += url;
949  msg += ".\nThe OPeNDAP server returned the following message:\n";
950  msg += http_status_to_string(status);
951  throw Error(msg);
952  }
953  }
954 
955  catch (Error &e) {
956  delete resp_hdrs;
957  close_temp(stream, dods_temp);
958  throw;
959  }
960 
961 #if 0
962  if (d_use_cpp_streams) {
963  fclose(stream);
964  fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
965  return new HTTPResponse(in, status, resp_hdrs, dods_temp);
966  }
967  else {
968 #endif
969  rewind(stream);
970  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
971 #if 0
972 }
973 #endif
974 }
975 
987 void
989 {
990  d_accept_deflate = deflate;
991 
992  if (d_accept_deflate) {
993  if (find(d_request_headers.begin(), d_request_headers.end(),
994  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
995  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
996  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
997  ostream_iterator<string>(cerr, "\n")));
998  }
999  else {
1000  vector<string>::iterator i;
1001  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1002  bind2nd(equal_to<string>(),
1003  string("Accept-Encoding: deflate, gzip, compress")));
1004  d_request_headers.erase(i, d_request_headers.end());
1005  }
1006 }
1007 
1016 void
1017 HTTPConnect::set_xdap_protocol(int major, int minor)
1018 {
1019  // Look for, and remove if one exists, an XDAP-Accept header
1020  vector<string>::iterator i;
1021  i = find_if(d_request_headers.begin(), d_request_headers.end(),
1022  HeaderMatch("XDAP-Accept:"));
1023  if (i != d_request_headers.end())
1024  d_request_headers.erase(i);
1025 
1026  // Record and add the new header value
1027  d_dap_client_protocol_major = major;
1028  d_dap_client_protocol_minor = minor;
1029  ostringstream xdap_accept;
1030  xdap_accept << "XDAP-Accept: " << major << "." << minor;
1031 
1032  d_request_headers.push_back(xdap_accept.str());
1033 
1034  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1035  ostream_iterator<string>(cerr, "\n")));
1036 }
1037 
1053 void
1054 HTTPConnect::set_credentials(const string &u, const string &p)
1055 {
1056  if (u.empty())
1057  return;
1058 
1059  // Store the credentials locally.
1060  d_username = u;
1061  d_password = p;
1062 
1063  d_upstring = u + ":" + p;
1064 }
1065 
1066 } // namespace libdap
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1233
virtual int get_status() const
Definition: Response.h:105
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:617
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1372
void set_credentials(const string &u, const string &p)
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:126
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:707
int get_ignore_expires() const
Definition: RCReader.h:151
#define DBGN(x)
Definition: debug.h:59
#define SERVER_ERR_MIN
Definition: HTTPConnect.cc:107
virtual void set_type(ObjectType o)
Definition: Response.h:121
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1464
string get_cookie_jar() const
Definition: RCReader.h:258
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:58
#define SERVER_ERR_MAX
Definition: HTTPConnect.cc:108
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:615
int get_default_expires() const
Definition: RCReader.h:155
#define DBG2(x)
Definition: debug.h:73
virtual void set_version(const std::string &v)
Definition: Response.h:122
A class for software fault reporting.
Definition: InternalErr.h:64
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:898
unsigned int get_max_cached_obj() const
Definition: RCReader.h:147
bool get_deflate() const
Definition: RCReader.h:168
#define DBG(x)
Definition: debug.h:58
int get_max_cache_size() const
Definition: RCReader.h:143
#define CLIENT_ERR_MAX
Definition: HTTPConnect.cc:82
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:339
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1303
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:799
friend class ParseHeader
Definition: HTTPConnect.h:112
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:766
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1140
void set_accept_deflate(bool defalte)
Definition: HTTPConnect.cc:988
string long_to_string(long val, int base)
Definition: util.cc:1012
void set_always_validate(bool validate)
Definition: HTTPCache.cc:825
void set_xdap_protocol(int major, int minor)
virtual ~HTTPConnect()
Definition: HTTPConnect.cc:585
int dods_keep_temps
Definition: HTTPConnect.cc:79
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:803
bool get_use_cache() const
Definition: RCReader.h:139
int get_always_validate() const
Definition: RCReader.h:159
virtual void set_protocol(const std::string &p)
Definition: Response.h:123
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1555
#define CVER
Definition: config.h:37
#define CNAME
Definition: config.h:26
A class for error processing.
Definition: Error.h:90
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:673
#define CLIENT_ERR_MIN
Definition: HTTPConnect.cc:81
string get_dods_cache_root() const
Definition: RCReader.h:135
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:756
virtual std::vector< std::string > * get_headers() const
Definition: HTTPResponse.h:158
int www_trace
Definition: HTTPConnect.cc:76