bes  Updated for version 3.20.8
url_impl.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of the BES http package, part of the Hyrax data server.
5 
6 // Copyright (c) 2020 OPeNDAP, Inc.
7 // Author: Nathan Potter <ndp@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 // Authors:
26 // ndp Nathan Potter <ndp@opendap.org>
27 
28 #include "config.h"
29 
30 #include <string>
31 #include <sstream>
32 #include <map>
33 #include <vector>
34 #include <algorithm>
35 #include <cctype>
36 #include <functional>
37 #include <time.h>
38 
39 #include "BESDebug.h"
40 #include "BESUtil.h"
41 #include "HttpNames.h"
42 
43 #include "url_impl.h"
44 
45 using namespace std;
46 
47 #define MODULE "http"
48 #define prolog string("url::").append(__func__).append("() - ")
49 
50 #define PROTOCOL_KEY "http_url_protocol"
51 #define HOST_KEY "http_url_host"
52 #define PATH_KEY "http_url_path"
53 #define QUERY_KEY "http_url_query"
54 #define SOURCE_URL_KEY "http_url_target_url"
55 #define INGEST_TIME_KEY "http_url_ingest_time"
56 
57 #define REFRESH_THRESHOLD 600
58 
59 namespace http {
60 
61 #if 0
66 url::url(const map<string,string> &kvp)
67 {
68  map<string,string> kvp_copy = kvp;
69  map<string,string>::const_iterator it;
70  map<string,string>::const_iterator itc;
71 
72  it = kvp.find(PROTOCOL_KEY);
73  itc = kvp_copy.find(PROTOCOL_KEY);
74  if(it != kvp.end() && itc != kvp_copy.end()){
75  d_protocol = it->second;
76  kvp_copy.erase(it->first);
77  BESDEBUG(MODULE, prolog << "Located PROTOCOL_KEY(" << PROTOCOL_KEY << ") value: " << d_protocol << endl);
78  }
79  it = kvp.find(HOST_KEY);
80  itc = kvp_copy.find(HOST_KEY);
81  if(it != kvp.end() && itc != kvp_copy.end()){
82  d_host = it->second;
83  kvp_copy.erase(it->first);
84  BESDEBUG(MODULE, prolog << "Located HOST_KEY(" << HOST_KEY << ") value: " << d_host << endl);
85  }
86  it = kvp.find(PATH_KEY);
87  itc = kvp_copy.find(PATH_KEY);
88  if(it != kvp.end() && itc != kvp_copy.end()){
89  d_path = it->second;
90  kvp_copy.erase(it->first);
91  BESDEBUG(MODULE, prolog << "Located PATH_KEY(" << PATH_KEY << ") value: " << d_path << endl);
92  }
93  it = kvp.find(QUERY_KEY);
94  itc = kvp_copy.find(QUERY_KEY);
95  if(it != kvp.end() && itc != kvp_copy.end()){
96  d_query = it->second;
97  kvp_copy.erase(it->first);
98  BESDEBUG(MODULE, prolog << "Located QUERY_KEY(" << QUERY_KEY << ") value: " << d_query << endl);
99  }
100  it = kvp.find(SOURCE_URL_KEY);
101  itc = kvp_copy.find(SOURCE_URL_KEY);
102  if(it != kvp.end() && itc != kvp_copy.end()){
103  d_source_url = it->second;
104  kvp_copy.erase(it->first);
105  BESDEBUG(MODULE, prolog << "Located SOURCE_URL_KEY(" << SOURCE_URL_KEY << ") value: " << d_source_url << endl);
106  }
107 
108  for(itc = kvp_copy.begin(); itc != kvp_copy.end(); itc++){
109  string key = itc->first;
110  string value = itc->second;
111  map<string, vector<string>* >::const_iterator record_it;
112  record_it = d_query_kvp.find(key);
113  if(record_it != d_query_kvp.end()){
114  vector<string> *values = record_it->second;
115  values->push_back(value);
116  }
117  else {
118  vector<string> *values = new vector<string>();
119  values->push_back(value);
120  d_query_kvp.insert(pair<string, vector<string>*>(key, values));
121  }
122  }
123 
124 }
125 #endif
126 
130 url::~url()
131 {
132  if(!d_query_kvp.empty()){
133  map<string, vector<string>* >::const_iterator it;
134  for(it = d_query_kvp.begin() ; it != d_query_kvp.end(); it++){
135  delete it->second;
136  }
137  }
138 }
139 
140 
145 void url::parse(const string &source_url) {
146  const string prot_end("://");
147  string::const_iterator prot_i = search(source_url.begin(), source_url.end(),
148  prot_end.begin(), prot_end.end());
149  d_protocol.reserve(distance(source_url.begin(), prot_i));
150  transform(source_url.begin(), prot_i,
151  back_inserter(d_protocol),
152  ptr_fun<int, int>(tolower)); // protocol is icase
153  if (prot_i == source_url.end())
154  return;
155  advance(prot_i, prot_end.length());
156  string::const_iterator path_i = find(prot_i, source_url.end(), '/');
157  d_host.reserve(distance(prot_i, path_i));
158  transform(prot_i, path_i,
159  back_inserter(d_host),
160  ptr_fun<int, int>(tolower)); // host is icase
161  string::const_iterator query_i = find(path_i, source_url.end(), '?');
162  d_path.assign(path_i, query_i);
163  if (query_i != source_url.end())
164  ++query_i;
165  d_query.assign(query_i, source_url.end());
166 
167 
168  if(!d_query.empty()){
169  vector<string> records;
170  string delimiters = "&";
171  BESUtil::tokenize(d_query, records, delimiters);
172  vector<string>::iterator i = records.begin();
173  for(; i!=records.end(); i++){
174  size_t index = i->find('=');
175  if(index != string::npos) {
176  string key = i->substr(0, index);
177  string value = i->substr(index+1);
178  BESDEBUG(MODULE, prolog << "key: " << key << " value: " << value << endl);
179  map<string, vector<string>* >::const_iterator record_it;
180  record_it = d_query_kvp.find(key);
181  if(record_it != d_query_kvp.end()){
182  vector<string> *values = record_it->second;
183  values->push_back(value);
184  }
185  else {
186  vector<string> *values = new vector<string>();
187  values->push_back(value);
188  d_query_kvp.insert(pair<string, vector<string>*>(key, values));
189  }
190  }
191  }
192  }
193  time(&d_ingest_time);
194 }
195 
201 string url::query_parameter_value(const string &key) const
202 {
203  string value;
204  map<string, vector<string>* >::const_iterator it;
205  it = d_query_kvp.find(key);
206  if(it != d_query_kvp.end()){
207  vector<string> *values = it->second;
208  if(!values->empty()){
209  value = (*values)[0];
210  }
211  }
212  return value;
213 }
214 
220 void url::query_parameter_values(const string &key, vector<string> &values) const
221 {
222  map<string, vector<string>* >::const_iterator it;
223  it = d_query_kvp.find(key);
224  if(it != d_query_kvp.end()){
225  values = *it->second;
226  }
227 }
228 
229 #if 0
230 
235 void url::kvp(map<string,string> &kvp){
236  stringstream ss;
237 
238  // Do the basic stuff
239  kvp.insert(pair<string,string>(PROTOCOL_KEY, d_protocol));
240  kvp.insert(pair<string,string>(HOST_KEY, d_host));
241  kvp.insert(pair<string,string>(PATH_KEY, d_path));
242  kvp.insert(pair<string,string>(QUERY_KEY, d_query));
243  kvp.insert(pair<string,string>(SOURCE_URL_KEY, d_source_url));
244  ss << d_ingest_time;
245  kvp.insert(pair<string,string>(INGEST_TIME_KEY,ss.str()));
246 
247  // Now grab the query string. Only the first value of multi valued keys is used.
248  map<string, vector<string>* >::const_iterator it;
249  for(it=d_query_kvp.begin(); it != d_query_kvp.end(); it++){
250  kvp.insert(pair<string,string>(it->first,(*it->second)[0]));
251  }
252 }
253 #endif
254 
261 bool url::is_expired()
262 {
263  bool is_expired;
264  time_t now;
265  time(&now); /* get current time; same as: timer = time(NULL) */
266  BESDEBUG(MODULE, prolog << "now: " << now << endl);
267 
268  time_t expires = now;
269  string cf_expires = query_parameter_value(CLOUDFRONT_EXPIRES_HEADER_KEY);
270  string aws_expires = query_parameter_value(AMS_EXPIRES_HEADER_KEY);
271 
272  if(!cf_expires.empty()){ // CloudFront expires header?
273  expires = stoll(cf_expires);
274  BESDEBUG(MODULE, prolog << "Using "<< CLOUDFRONT_EXPIRES_HEADER_KEY << ": " << expires << endl);
275  }
276  else if(!aws_expires.empty()){
277  // AWS Expires header?
278  //
279  // By default we'll use the time we made the URL object, ingest_time
280  time_t start_time = ingest_time();
281  // But if there's an AWS Date we'll parse that and compute the time
282  // @TODO move to NgapApi::decompose_url() and add the result to the map
283  string aws_date = query_parameter_value(AWS_DATE_HEADER_KEY);
284  if(!aws_date.empty()){
285  string date = aws_date; // 20200624T175046Z
286  string year = date.substr(0,4);
287  string month = date.substr(4,2);
288  string day = date.substr(6,2);
289  string hour = date.substr(9,2);
290  string minute = date.substr(11,2);
291  string second = date.substr(13,2);
292 
293  BESDEBUG(MODULE, prolog << "date: "<< date <<
294  " year: " << year << " month: " << month << " day: " << day <<
295  " hour: " << hour << " minute: " << minute << " second: " << second << endl);
296 
297  struct tm *ti = gmtime(&now);
298  ti->tm_year = stoll(year) - 1900;
299  ti->tm_mon = stoll(month) - 1;
300  ti->tm_mday = stoll(day);
301  ti->tm_hour = stoll(hour);
302  ti->tm_min = stoll(minute);
303  ti->tm_sec = stoll(second);
304 
305  BESDEBUG(MODULE, prolog << "ti->tm_year: "<< ti->tm_year <<
306  " ti->tm_mon: " << ti->tm_mon <<
307  " ti->tm_mday: " << ti->tm_mday <<
308  " ti->tm_hour: " << ti->tm_hour <<
309  " ti->tm_min: " << ti->tm_min <<
310  " ti->tm_sec: " << ti->tm_sec << endl);
311 
312 
313  start_time = mktime(ti);
314  BESDEBUG(MODULE, prolog << "AWS (computed) start_time: "<< start_time << endl);
315  }
316  expires = start_time + stoll(aws_expires);
317  BESDEBUG(MODULE, prolog << "Using "<< AMS_EXPIRES_HEADER_KEY << ": " << aws_expires <<
318  " (expires: " << expires << ")" << endl);
319  }
320  time_t remaining = expires - now;
321  BESDEBUG(MODULE, prolog << "expires: " << expires <<
322  " remaining: " << remaining <<
323  " threshold: " << REFRESH_THRESHOLD << endl);
324 
325  is_expired = remaining < REFRESH_THRESHOLD;
326  BESDEBUG(MODULE, prolog << "is_expired: " << (is_expired?"true":"false") << endl);
327 
328  return is_expired;
329 }
330 
335 string url::dump(){
336  stringstream ss;
337  string indent_inc = " ";
338  string indent = indent_inc;
339 
340  ss << "http::url [" << this << "] " << endl;
341  ss << indent << "d_source_url: " << d_source_url << endl;
342  ss << indent << "d_protocol: " << d_protocol << endl;
343  ss << indent << "d_host: " << d_host << endl;
344  ss << indent << "d_path: " << d_path << endl;
345  ss << indent << "d_query: " << d_query << endl;
346 
347  std::map<std::string, std::vector<std::string>* >::iterator it;
348 
349  string idt = indent+indent_inc;
350  for(it=d_query_kvp.begin(); it !=d_query_kvp.end(); it++){
351  ss << indent << "d_query_kvp["<<it->first<<"]: " << endl;
352  std::vector<std::string> *values = it->second;
353  for(size_t i=0; i<values->size(); i++){
354  ss << idt << "value[" << i << "]: " << (*values)[i] << endl;
355  }
356  }
357  ss << indent << "d_ingest_time: " << d_ingest_time << endl;
358  return ss.str();
359 }
360 
361 
362 
363 } // namespace http
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
Definition: BESUtil.cc:1057
utility class for the HTTP catalog module
Definition: EffectiveUrl.cc:58