bes  Updated for version 3.20.8
AllowedHosts.cc
1 // RemoteAccess.cc
2 
3 // -*- mode: c++; c-basic-offset:4 -*-
4 
5 // This file is part of the OPeNDAP Back-End Server (BES)
6 // and creates an allowed hosts list of which systems that may be
7 // accessed by the server as part of it's routine operation.
8 
9 // Copyright (c) 2018 OPeNDAP, Inc.
10 // Author: Nathan D. Potter <ndp@opendap.org>
11 //
12 // This library is free software; you can redistribute it and/or
13 // modify it under the terms of the GNU Lesser General Public
14 // License as published by the Free Software Foundation; either
15 // version 2.1 of the License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 //
26 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
27 
28 #include "config.h"
29 
30 #include <BESUtil.h>
31 #include <BESCatalog.h>
32 #include <BESCatalogList.h>
33 #include <BESCatalogUtils.h>
34 #include <BESRegex.h>
35 #include <TheBESKeys.h>
36 #include <BESInternalError.h>
37 #include <BESSyntaxUserError.h>
38 #include <BESDebug.h>
39 #include <BESNotFoundError.h>
40 #include <BESForbiddenError.h>
41 
42 #include "AllowedHosts.h"
43 
44 using namespace std;
45 using namespace bes;
46 
47 #define MODULE "ah"
48 #define prolog string("AllowedHosts::").append(__func__).append("() - ")
49 
50 AllowedHosts *AllowedHosts::d_instance = 0;
51 
58 AllowedHosts::theHosts()
59 {
60  if (d_instance) return d_instance;
61  d_instance = new AllowedHosts;
62  return d_instance;
63 }
64 
65 AllowedHosts::AllowedHosts()
66 {
67  bool found = false;
68  string key = ALLOWED_HOSTS_BES_KEY;
69  TheBESKeys::TheKeys()->get_values(ALLOWED_HOSTS_BES_KEY, d_allowed_hosts, found);
70  if(!found){
71  throw BESInternalError(string("The allowed hosts key, '") + ALLOWED_HOSTS_BES_KEY
72  + "' has not been configured.", __FILE__, __LINE__);
73  }
74 }
75 
90 bool AllowedHosts::is_allowed(const std::string &candidate_url)
91 {
92  BESDEBUG(MODULE, prolog << "BEGIN candidate_url: " << candidate_url << endl);
93  bool isAllowed = false;
94  const string file_url("file://");
95  const string http_url("http://");
96  const string https_url("https://");
97 
98  // Special case: This allows any file: URL to pass if the URL starts with the default
99  // catalog's path.
100  if (candidate_url.compare(0, file_url.size(), file_url) == 0 /*equals a file url*/) {
101 
102  // Ensure that the file path starts with the catalog root dir.
103  string file_path = candidate_url.substr(file_url.size());
104  BESDEBUG(MODULE, prolog << "file_path: "<< file_path << endl);
105 
107  string default_catalog_name = bcl->default_catalog_name();
108  BESDEBUG(MODULE, prolog << "Searching for catalog: "<< default_catalog_name << endl);
109  BESCatalog *bcat = bcl->find_catalog(default_catalog_name);
110  if (bcat) {
111  BESDEBUG(MODULE, prolog << "Found catalog: "<< bcat->get_catalog_name() << endl);
112  }
113  else {
114  string msg = "OUCH! Unable to locate default catalog!";
115  BESDEBUG(MODULE, prolog << msg << endl);
116  throw BESInternalError(msg, __FILE__, __LINE__);
117  }
118 
119  string catalog_root = bcat->get_root();
120  BESDEBUG(MODULE, prolog << "Catalog root: "<< catalog_root << endl);
121 
122 
123  // Never a relative path shall be accepted.
124  // change??
125  // if( file_path[0] != '/'){
126  // file_path.insert(0,"/");
127  //}
128 
129  string relative_path;
130  if(file_path[0] == '/'){
131  if(file_path.length() < catalog_root.length()) {
132  isAllowed = false;
133  }
134  else {
135  int ret = file_path.compare(0, catalog_root.npos, catalog_root) == 0;
136  BESDEBUG(MODULE, prolog << "file_path.compare(): " << ret << endl);
137  isAllowed = (ret==0);
138  relative_path = file_path.substr(catalog_root.length());
139  }
140  }
141  else {
142  BESDEBUG(MODULE, prolog << "Relative path detected");
143  relative_path = file_path;
144  isAllowed = true;
145  }
146 
147  // string::compare() returns 0 if the path strings match exactly.
148  // And since we are just looking at the catalog.root as a prefix of the resource
149  // name we only allow access to the resource for an exact match.
150  if(isAllowed){
151  // If we stop adding a '/' to file_path values that don't begin with one
152  // then we need to detect the use of the relative path here
153  bool follow_sym_links = bcat->get_catalog_utils()->follow_sym_links();
154  try {
155  BESUtil::check_path(relative_path, catalog_root, follow_sym_links);
156  }
157  catch (BESNotFoundError &e) {
158  isAllowed=false;
159  }
160  catch (BESForbiddenError &e) {
161  isAllowed=false;
162  }
163  }
164 
165 
166  BESDEBUG(MODULE, prolog << "File Access Allowed: "<< (isAllowed?"true ":"false ") << endl);
167  }
168  else {
169  // We assume it's an http(s) URL.
170  vector<string>::const_iterator it = d_allowed_hosts.begin();
171  vector<string>::const_iterator end_it = d_allowed_hosts.end();
172  for (; it != end_it && !isAllowed; it++) {
173  string a_regex_pattern = *it;
174  BESRegex reg_expr(a_regex_pattern.c_str());
175  int match_result = reg_expr.match(candidate_url.c_str(), candidate_url.length());
176  if(match_result>=0) {
177  auto match_length = (unsigned int) match_result;
178  if (match_length == candidate_url.length()) {
179  BESDEBUG(MODULE,
180  prolog << "FULL MATCH. pattern: " << a_regex_pattern << " url: " << candidate_url << endl);
181  isAllowed = true;;
182  } else {
183  BESDEBUG(MODULE,
184  prolog << "No Match. pattern: " << a_regex_pattern << " url: " << candidate_url << endl);
185  }
186  }
187  }
188  BESDEBUG(MODULE, prolog << "HTTP Access Allowed: "<< (isAllowed?"true ":"false ") << endl);
189  }
190  BESDEBUG(MODULE, prolog << "END Access Allowed: "<< (isAllowed?"true ":"false ") << endl);
191  return isAllowed;
192 }
193 
List of all registered catalogs.
virtual std::string default_catalog_name() const
The name of the default catalog.
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
Catalogs provide a hierarchical organization for data.
Definition: BESCatalog.h:51
virtual std::string get_root() const =0
virtual std::string get_catalog_name() const
Get the name for this catalog.
Definition: BESCatalog.h:103
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:113
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
int match(const char *s, int len, int pos=0)
Does the pattern match.
Definition: BESRegex.cc:107
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:254
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:370
Can a given URL be dereferenced given the BES's configuration?
Definition: AllowedHosts.h:51