38 #include "EffectiveUrlCache.h"
40 #include "BESSyntaxUserError.h"
41 #include "TheBESKeys.h"
43 #include "BESStopWatch.h"
46 #include "CurlUtils.h"
47 #include "HttpNames.h"
48 #include "EffectiveUrl.h"
53 #define MODULE_DUMPER "euc:dump"
54 #define prolog std::string("EffectiveUrlCache::").append(__func__).append("() - ")
58 EffectiveUrlCache *EffectiveUrlCache::d_instance = 0;
59 pthread_once_t EffectiveUrlCache::d_init_control = PTHREAD_ONCE_INIT;
61 EucLock::EucLock(pthread_mutex_t &lock) : m_mutex(lock) {
62 int status = pthread_mutex_lock(&m_mutex);
64 throw BESInternalError(prolog +
"Could not acquire mutex lock.", __FILE__, __LINE__);
66 BESDEBUG(MODULE,prolog <<
"Locked. (thread: " << pthread_self() <<
")" << endl);
70 int status = pthread_mutex_unlock(&m_mutex);
72 ERROR_LOG(prolog +
"Failed to release mutex lock.");
74 BESDEBUG(MODULE,prolog <<
"Unlocked. (thread: " << pthread_self() <<
")" << endl);
97 EffectiveUrlCache::TheCache()
99 if (d_instance == 0) {
100 pthread_once(&d_init_control,EffectiveUrlCache::initialize_instance);
110 void EffectiveUrlCache::initialize_instance()
115 atexit(delete_instance);
123 void EffectiveUrlCache::delete_instance()
133 EffectiveUrlCache::EffectiveUrlCache(): d_skip_regex(NULL), d_enabled(-1)
135 if (pthread_mutex_init(&d_get_effective_url_cache_mutex, 0) != 0)
136 throw BESInternalError(
"Could not initialize mutex in CurlHandlePool", __FILE__, __LINE__);
144 EffectiveUrlCache::~EffectiveUrlCache()
146 map<string , http::EffectiveUrl *>::iterator it;
147 for(it = d_effective_urls.begin(); it!= d_effective_urls.end(); it++){
150 d_effective_urls.clear();
168 strm << BESIndent::LMarg << prolog <<
"(this: " << (
void *)
this <<
")" << endl;
170 strm << BESIndent::LMarg <<
"d_skip_regex: " << (d_skip_regex?d_skip_regex->pattern():
"WAS NOT SET") << endl;
171 if (!d_effective_urls.empty()) {
172 strm << BESIndent::LMarg <<
"effective url list:" << endl;
174 auto it = d_effective_urls.begin();
175 while( it!= d_effective_urls.end()){
176 strm << BESIndent::LMarg << (*it).first <<
" --> " << (*it).second->str();
179 BESIndent::UnIndent();
182 strm << BESIndent::LMarg <<
"effective url list: EMPTY" << endl;
184 BESIndent::UnIndent();
208 auto it = d_effective_urls.find(source_url);
209 if(it!=d_effective_urls.end()){
210 effective_url = (*it).second;
212 return effective_url;
232 EucLock dat_lock(this->d_get_effective_url_cache_mutex);
234 BESDEBUG(MODULE, prolog <<
"BEGIN url: " << source_url << endl);
235 string effective_url_str = source_url;
239 BESDEBUG(MODULE_DUMPER, prolog <<
"dump: " << endl <<
dump() << endl);
241 size_t match_length=0;
244 if (source_url.find(
"http://") != 0 && source_url.find(
"https://") != 0) {
245 BESDEBUG(MODULE, prolog <<
"END Not an HTTP request, SKIPPING." << endl);
246 return effective_url_str;
249 BESRegex *skip_regex = get_skip_regex();
251 match_length = skip_regex->
match(source_url.c_str(), source_url.length());
252 if (match_length == source_url.length()) {
253 BESDEBUG(MODULE, prolog <<
"END Candidate url matches the "
254 "no_redirects_regex_pattern [" << skip_regex->pattern() <<
255 "][match_length=" << match_length <<
"] SKIPPING." << endl);
256 return effective_url_str;
258 BESDEBUG(MODULE, prolog <<
"Candidate url: '" << source_url <<
"' does NOT match the "
259 "skip_regex pattern [" << skip_regex->pattern() <<
"]" << endl);
262 BESDEBUG(MODULE, prolog <<
"The cache_effective_urls_skip_regex() was NOT SET "<< endl);
268 bool retrieve_and_cache = !effective_url;
270 BESDEBUG(MODULE, prolog <<
"Cache hit for: " << source_url << endl);
271 retrieve_and_cache = effective_url->
is_expired();
272 BESDEBUG(MODULE, prolog <<
"Cached target URL is " << (retrieve_and_cache?
"":
"not ") <<
"expired." << endl);
275 if(retrieve_and_cache){
276 BESDEBUG(MODULE, prolog <<
"Acquiring effective URL for " << source_url << endl);
281 effective_url = curl::retrieve_effective_url(source_url);
283 BESDEBUG(MODULE, prolog <<
" source_url: " << source_url << endl);
284 BESDEBUG(MODULE, prolog <<
"effective_url: " << effective_url->
dump() << endl);
286 d_effective_urls[source_url] = effective_url;
288 BESDEBUG(MODULE, prolog <<
"Updated record for "<< source_url <<
" cache size: " << d_effective_urls.size() << endl);
290 effective_url_str = effective_url->str();
291 BESDEBUG(MODULE_DUMPER, prolog <<
"dump: " << endl <<
dump() << endl);
294 BESDEBUG(MODULE, prolog <<
"CACHE IS DISABLED." << endl);
296 BESDEBUG(MODULE, prolog <<
"END" << endl);
297 return effective_url_str;
305 bool EffectiveUrlCache::is_enabled()
313 BESDEBUG(MODULE, prolog << HTTP_CACHE_EFFECTIVE_URLS_KEY <<
": '" << value <<
"'" << endl);
316 BESDEBUG(MODULE, prolog <<
"d_enabled: " << (d_enabled?
"true":
"false") << endl);
324 BESRegex *EffectiveUrlCache::get_skip_regex()
330 if(found && value.length()){
331 BESDEBUG(MODULE, prolog << HTTP_CACHE_EFFECTIVE_URLS_SKIP_REGEX_KEY <<
": " << value << endl);
332 d_skip_regex =
new BESRegex(value.c_str());
335 BESDEBUG(MODULE, prolog <<
"d_skip_regex: " << (d_skip_regex?d_skip_regex->pattern():
"Value has not been set.") << endl);
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
exception thrown if internal error encountered
int match(const char *s, int len, int pos=0)
Does the pattern match.
virtual bool start(std::string name)
static std::string lowercase(const std::string &s)
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
std::string get_effective_url(const std::string &source_url)
virtual std::string dump() const
dumps information about this object
std::string dump() override
A string dump of the instance.
bool is_expired() override
Returns true if URL is reusable, false otherwise.
utility class for the HTTP catalog module