41 #include "HttpNames.h"
48 #define prolog string("url::").append(__func__).append("() - ")
50 #define PROTOCOL_KEY "http_url_protocol"
51 #define HOST_KEY "http_url_host"
52 #define PATH_KEY "http_url_path"
53 #define QUERY_KEY "http_url_query"
54 #define SOURCE_URL_KEY "http_url_target_url"
55 #define INGEST_TIME_KEY "http_url_ingest_time"
57 #define REFRESH_THRESHOLD 600
66 url::url(
const map<string,string> &kvp)
68 map<string,string> kvp_copy = kvp;
69 map<string,string>::const_iterator it;
70 map<string,string>::const_iterator itc;
72 it = kvp.find(PROTOCOL_KEY);
73 itc = kvp_copy.find(PROTOCOL_KEY);
74 if(it != kvp.end() && itc != kvp_copy.end()){
75 d_protocol = it->second;
76 kvp_copy.erase(it->first);
77 BESDEBUG(MODULE, prolog <<
"Located PROTOCOL_KEY(" << PROTOCOL_KEY <<
") value: " << d_protocol << endl);
79 it = kvp.find(HOST_KEY);
80 itc = kvp_copy.find(HOST_KEY);
81 if(it != kvp.end() && itc != kvp_copy.end()){
83 kvp_copy.erase(it->first);
84 BESDEBUG(MODULE, prolog <<
"Located HOST_KEY(" << HOST_KEY <<
") value: " << d_host << endl);
86 it = kvp.find(PATH_KEY);
87 itc = kvp_copy.find(PATH_KEY);
88 if(it != kvp.end() && itc != kvp_copy.end()){
90 kvp_copy.erase(it->first);
91 BESDEBUG(MODULE, prolog <<
"Located PATH_KEY(" << PATH_KEY <<
") value: " << d_path << endl);
93 it = kvp.find(QUERY_KEY);
94 itc = kvp_copy.find(QUERY_KEY);
95 if(it != kvp.end() && itc != kvp_copy.end()){
97 kvp_copy.erase(it->first);
98 BESDEBUG(MODULE, prolog <<
"Located QUERY_KEY(" << QUERY_KEY <<
") value: " << d_query << endl);
100 it = kvp.find(SOURCE_URL_KEY);
101 itc = kvp_copy.find(SOURCE_URL_KEY);
102 if(it != kvp.end() && itc != kvp_copy.end()){
103 d_source_url = it->second;
104 kvp_copy.erase(it->first);
105 BESDEBUG(MODULE, prolog <<
"Located SOURCE_URL_KEY(" << SOURCE_URL_KEY <<
") value: " << d_source_url << endl);
108 for(itc = kvp_copy.begin(); itc != kvp_copy.end(); itc++){
109 string key = itc->first;
110 string value = itc->second;
111 map<string, vector<string>* >::const_iterator record_it;
112 record_it = d_query_kvp.find(key);
113 if(record_it != d_query_kvp.end()){
114 vector<string> *values = record_it->second;
115 values->push_back(value);
118 vector<string> *values =
new vector<string>();
119 values->push_back(value);
120 d_query_kvp.insert(pair<
string, vector<string>*>(key, values));
132 if(!d_query_kvp.empty()){
133 map<string, vector<string>* >::const_iterator it;
134 for(it = d_query_kvp.begin() ; it != d_query_kvp.end(); it++){
145 void url::parse(
const string &source_url) {
146 const string prot_end(
"://");
147 string::const_iterator prot_i = search(source_url.begin(), source_url.end(),
148 prot_end.begin(), prot_end.end());
149 d_protocol.reserve(distance(source_url.begin(), prot_i));
150 transform(source_url.begin(), prot_i,
151 back_inserter(d_protocol),
152 ptr_fun<int, int>(tolower));
153 if (prot_i == source_url.end())
155 advance(prot_i, prot_end.length());
156 string::const_iterator path_i = find(prot_i, source_url.end(),
'/');
157 d_host.reserve(distance(prot_i, path_i));
158 transform(prot_i, path_i,
159 back_inserter(d_host),
160 ptr_fun<int, int>(tolower));
161 string::const_iterator query_i = find(path_i, source_url.end(),
'?');
162 d_path.assign(path_i, query_i);
163 if (query_i != source_url.end())
165 d_query.assign(query_i, source_url.end());
168 if(!d_query.empty()){
169 vector<string> records;
170 string delimiters =
"&";
172 vector<string>::iterator i = records.begin();
173 for(; i!=records.end(); i++){
174 size_t index = i->find(
'=');
175 if(index != string::npos) {
176 string key = i->substr(0, index);
177 string value = i->substr(index+1);
178 BESDEBUG(MODULE, prolog <<
"key: " << key <<
" value: " << value << endl);
179 map<string, vector<string>* >::const_iterator record_it;
180 record_it = d_query_kvp.find(key);
181 if(record_it != d_query_kvp.end()){
182 vector<string> *values = record_it->second;
183 values->push_back(value);
186 vector<string> *values =
new vector<string>();
187 values->push_back(value);
188 d_query_kvp.insert(pair<
string, vector<string>*>(key, values));
193 time(&d_ingest_time);
201 string url::query_parameter_value(
const string &key)
const
204 map<string, vector<string>* >::const_iterator it;
205 it = d_query_kvp.find(key);
206 if(it != d_query_kvp.end()){
207 vector<string> *values = it->second;
208 if(!values->empty()){
209 value = (*values)[0];
220 void url::query_parameter_values(
const string &key, vector<string> &values)
const
222 map<string, vector<string>* >::const_iterator it;
223 it = d_query_kvp.find(key);
224 if(it != d_query_kvp.end()){
225 values = *it->second;
235 void url::kvp(map<string,string> &kvp){
239 kvp.insert(pair<string,string>(PROTOCOL_KEY, d_protocol));
240 kvp.insert(pair<string,string>(HOST_KEY, d_host));
241 kvp.insert(pair<string,string>(PATH_KEY, d_path));
242 kvp.insert(pair<string,string>(QUERY_KEY, d_query));
243 kvp.insert(pair<string,string>(SOURCE_URL_KEY, d_source_url));
245 kvp.insert(pair<string,string>(INGEST_TIME_KEY,ss.str()));
248 map<string, vector<string>* >::const_iterator it;
249 for(it=d_query_kvp.begin(); it != d_query_kvp.end(); it++){
250 kvp.insert(pair<string,string>(it->first,(*it->second)[0]));
261 bool url::is_expired()
266 BESDEBUG(MODULE, prolog <<
"now: " << now << endl);
268 time_t expires = now;
269 string cf_expires = query_parameter_value(CLOUDFRONT_EXPIRES_HEADER_KEY);
270 string aws_expires = query_parameter_value(AMS_EXPIRES_HEADER_KEY);
272 if(!cf_expires.empty()){
273 expires = stoll(cf_expires);
274 BESDEBUG(MODULE, prolog <<
"Using "<< CLOUDFRONT_EXPIRES_HEADER_KEY <<
": " << expires << endl);
276 else if(!aws_expires.empty()){
280 time_t start_time = ingest_time();
283 string aws_date = query_parameter_value(AWS_DATE_HEADER_KEY);
284 if(!aws_date.empty()){
285 string date = aws_date;
286 string year = date.substr(0,4);
287 string month = date.substr(4,2);
288 string day = date.substr(6,2);
289 string hour = date.substr(9,2);
290 string minute = date.substr(11,2);
291 string second = date.substr(13,2);
293 BESDEBUG(MODULE, prolog <<
"date: "<< date <<
294 " year: " << year <<
" month: " << month <<
" day: " << day <<
295 " hour: " << hour <<
" minute: " << minute <<
" second: " << second << endl);
297 struct tm *ti = gmtime(&now);
298 ti->tm_year = stoll(year) - 1900;
299 ti->tm_mon = stoll(month) - 1;
300 ti->tm_mday = stoll(day);
301 ti->tm_hour = stoll(hour);
302 ti->tm_min = stoll(minute);
303 ti->tm_sec = stoll(second);
305 BESDEBUG(MODULE, prolog <<
"ti->tm_year: "<< ti->tm_year <<
306 " ti->tm_mon: " << ti->tm_mon <<
307 " ti->tm_mday: " << ti->tm_mday <<
308 " ti->tm_hour: " << ti->tm_hour <<
309 " ti->tm_min: " << ti->tm_min <<
310 " ti->tm_sec: " << ti->tm_sec << endl);
313 start_time = mktime(ti);
314 BESDEBUG(MODULE, prolog <<
"AWS (computed) start_time: "<< start_time << endl);
316 expires = start_time + stoll(aws_expires);
317 BESDEBUG(MODULE, prolog <<
"Using "<< AMS_EXPIRES_HEADER_KEY <<
": " << aws_expires <<
318 " (expires: " << expires <<
")" << endl);
320 time_t remaining = expires - now;
321 BESDEBUG(MODULE, prolog <<
"expires: " << expires <<
322 " remaining: " << remaining <<
323 " threshold: " << REFRESH_THRESHOLD << endl);
325 is_expired = remaining < REFRESH_THRESHOLD;
326 BESDEBUG(MODULE, prolog <<
"is_expired: " << (is_expired?
"true":
"false") << endl);
337 string indent_inc =
" ";
338 string indent = indent_inc;
340 ss <<
"http::url [" <<
this <<
"] " << endl;
341 ss << indent <<
"d_source_url: " << d_source_url << endl;
342 ss << indent <<
"d_protocol: " << d_protocol << endl;
343 ss << indent <<
"d_host: " << d_host << endl;
344 ss << indent <<
"d_path: " << d_path << endl;
345 ss << indent <<
"d_query: " << d_query << endl;
347 std::map<std::string, std::vector<std::string>* >::iterator it;
349 string idt = indent+indent_inc;
350 for(it=d_query_kvp.begin(); it !=d_query_kvp.end(); it++){
351 ss << indent <<
"d_query_kvp["<<it->first<<
"]: " << endl;
352 std::vector<std::string> *values = it->second;
353 for(
size_t i=0; i<values->size(); i++){
354 ss << idt <<
"value[" << i <<
"]: " << (*values)[i] << endl;
357 ss << indent <<
"d_ingest_time: " << d_ingest_time << endl;
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
utility class for the HTTP catalog module