Page Speed Optimization Libraries  1.13.35.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
serf_url_async_fetcher.h
Go to the documentation of this file.
1 // Copyright 2010 Google Inc.
17 
18 #ifndef PAGESPEED_SYSTEM_SERF_URL_ASYNC_FETCHER_H_
19 #define PAGESPEED_SYSTEM_SERF_URL_ASYNC_FETCHER_H_
20 
21 #include <cstddef>
22 #include <vector>
23 
24 #include "apr_network_io.h"
32 #include "pagespeed/kernel/base/thread_annotations.h"
35 
36 #include "third_party/serf/src/serf.h"
37 
48 #ifndef SERF_HTTPS_FETCHING
49 #define SERF_HTTPS_FETCHING 1
50 #endif
51 
52 struct apr_pool_t;
53 struct apr_uri_t;
54 struct serf_context_t;
55 
56 namespace net_instaweb {
57 
58 class AsyncFetch;
59 class MessageHandler;
60 class Statistics;
61 class SerfFetch;
62 class SerfThreadedFetcher;
63 class Timer;
64 class UpDownCounter;
65 class Variable;
66 
67 struct SerfStats {
68  static const char kSerfFetchRequestCount[];
69  static const char kSerfFetchByteCount[];
70  static const char kSerfFetchTimeDurationMs[];
71  static const char kSerfFetchCancelCount[];
72  static const char kSerfFetchActiveCount[];
73  static const char kSerfFetchTimeoutCount[];
74  static const char kSerfFetchFailureCount[];
75  static const char kSerfFetchCertErrors[];
76  static const char kSerfFetchReadCalls[];
77 
80  static const char kSerfFetchUltimateSuccess[];
81 
84  static const char kSerfFetchUltimateFailure[];
85 
88  static const char kSerfFetchLastCheckTimestampMs[];
89 };
90 
91 enum class SerfCompletionResult {
92  kClientCancel,
93  kSuccess,
94  kFailure
95 };
96 
101 #define SERF_HTTPS_KEYWORDS \
102  "enable,disable,allow_self_signed," \
103  "allow_unknown_certificate_authority,allow_certificate_not_yet_valid"
104 
112  public:
113  enum WaitChoice {
114  kThreadedOnly,
115  kMainlineOnly,
116  kThreadedAndMainline
117  };
118 
119  SerfUrlAsyncFetcher(const char* proxy, apr_pool_t* pool,
120  ThreadSystem* thread_system,
121  Statistics* statistics, Timer* timer, int64 timeout_ms,
122  MessageHandler* handler);
123  SerfUrlAsyncFetcher(SerfUrlAsyncFetcher* parent, const char* proxy);
124  virtual ~SerfUrlAsyncFetcher();
125 
126  static void InitStats(Statistics* statistics);
127 
130  virtual void ShutDown();
131 
132  virtual bool SupportsHttps() const;
133 
134  virtual void Fetch(const GoogleString& url,
135  MessageHandler* message_handler,
136  AsyncFetch* callback);
138  int Poll(int64 max_wait_ms);
139 
140  bool WaitForActiveFetches(int64 max_milliseconds,
141  MessageHandler* message_handler,
142  WaitChoice wait_choice);
143 
146  void FetchComplete(SerfFetch* fetch);
147 
149  void ReportCompletedFetchStats(const SerfFetch* fetch);
150 
152  void ReportFetchSuccessStats(SerfCompletionResult result,
153  const ResponseHeaders* headers,
154  const SerfFetch* fetch);
155 
156 
157  apr_pool_t* pool() const { return pool_; }
158 
159  void PrintActiveFetches(MessageHandler* handler) const;
160  virtual int64 timeout_ms() { return timeout_ms_; }
161  ThreadSystem* thread_system() { return thread_system_; }
162 
166 
170  return track_original_content_length_;
171  }
172  void set_track_original_content_length(bool x);
173 
183  bool SetHttpsOptions(StringPiece directive);
184 
187  static bool ValidateHttpsOptions(StringPiece directive,
188  GoogleString* error_message) {
189  uint32 options;
190  return ParseHttpsOptions(directive, &options, error_message);
191  }
192 
193  void SetSslCertificatesDir(StringPiece dir);
194  const GoogleString& ssl_certificates_dir() const {
195  return ssl_certificates_dir_;
196  }
197 
198  void SetSslCertificatesFile(StringPiece file);
199  const GoogleString& ssl_certificates_file() const {
200  return ssl_certificates_file_;
201  }
202 
203  protected:
204  typedef Pool<SerfFetch> SerfFetchPool;
205 
207  inline bool allow_https() const;
208  inline bool allow_self_signed() const;
209  inline bool allow_unknown_certificate_authority() const;
210  inline bool allow_certificate_not_yet_valid() const;
211 
212  void set_https_options(uint32 https_options) {
213  https_options_ = https_options;
214  }
215 
216  void Init(apr_pool_t* parent_pool, const char* proxy)
217  EXCLUSIVE_LOCKS_REQUIRED(mutex_);
218  bool SetupProxy(const char* proxy) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
219 
222  bool StartFetch(SerfFetch* fetch) EXCLUSIVE_LOCKS_REQUIRED(mutex_);
223 
228  virtual bool AnyPendingFetches();
232 
233  void CancelActiveFetches();
234  void CancelActiveFetchesMutexHeld() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
235  bool WaitForActiveFetchesHelper(int64 max_ms,
236  MessageHandler* message_handler);
237 
240  void CleanupFetchesWithErrors() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
241 
242  bool shutdown() const EXCLUSIVE_LOCKS_REQUIRED(mutex_) { return shutdown_; }
243  void set_shutdown(bool s) EXCLUSIVE_LOCKS_REQUIRED(mutex_) { shutdown_ = s; }
244 
245  apr_pool_t* pool_;
246  ThreadSystem* thread_system_;
247  Timer* timer_;
248 
252 
253  typedef std::vector<SerfFetch*> FetchVector;
254  SerfFetchPool completed_fetches_;
255  SerfThreadedFetcher* threaded_fetcher_;
256 
260 
261  private:
262  friend class SerfFetch;
263 
265  static const char* ExtractHostHeader(const apr_uri_t& uri,
266  apr_pool_t* pool);
267  FRIEND_TEST(SerfUrlAsyncFetcherTest, TestHostConstruction);
268 
271  static GoogleString RemovePortFromHostHeader(const GoogleString& in);
272  FRIEND_TEST(SerfUrlAsyncFetcherTest, TestPortRemoval);
273 
274  static bool ParseHttpsOptions(StringPiece directive, uint32* options,
275  GoogleString* error_message);
276 
277  serf_context_t* serf_context_ GUARDED_BY(mutex_);
278  SerfFetchPool active_fetches_ GUARDED_BY(mutex_);
279 
280  Variable* request_count_;
281  Variable* byte_count_;
282  Variable* time_duration_ms_;
283  Variable* cancel_count_;
284  Variable* timeout_count_;
285  Variable* failure_count_;
286  Variable* cert_errors_;
287  Variable* read_calls_count_;
288  Variable* ultimate_success_;
289  Variable* ultimate_failure_;
290  UpDownCounter* last_check_timestamp_ms_;
291  const int64 timeout_ms_;
292  bool shutdown_ GUARDED_BY(mutex_);
293  bool list_outstanding_urls_on_error_;
294  bool track_original_content_length_;
295  uint32 https_options_;
296  MessageHandler* message_handler_;
297  GoogleString ssl_certificates_dir_;
298  GoogleString ssl_certificates_file_;
299 
300 
301 };
302 
304 class SerfFetch : public PoolElement<SerfFetch> {
305  public:
306  enum class CancelCause {
307  kClientDecision,
308  kSerfError,
309  kFetchTimeout,
310  };
311 
313  SerfFetch(const GoogleString& url,
314  AsyncFetch* async_fetch,
315  MessageHandler* message_handler,
316  Timer* timer);
317  ~SerfFetch();
318 
321  bool Start(SerfUrlAsyncFetcher* fetcher, serf_context_t* context);
322 
323  GoogleString DebugInfo();
324 
326  void Cancel(CancelCause cause);
327 
336  void CallCallback(SerfCompletionResult result);
337  void CallbackDone(SerfCompletionResult result);
338 
341  void CleanupIfError();
342 
345  void ParseUrlForTesting(bool* status,
346  apr_uri_t** url,
347  const char** host_header,
348  const char** sni_host);
349 
350  void SetFetcherForTesting(SerfUrlAsyncFetcher* fetcher);
351 
352  int64 TimeDuration() const;
353 
354  int64 fetch_start_ms() const { return fetch_start_ms_; }
355 
356  size_t bytes_received() const { return bytes_received_; }
357  MessageHandler* message_handler() { return message_handler_; }
358 
359  private:
361 
367 #if SERF_HTTPS_FETCHING
368  static apr_status_t SSLCertValidate(void *data, int failures,
369  const serf_ssl_certificate_t *cert);
370 
371  static apr_status_t SSLCertChainValidate(
372  void *data, int failures, int error_depth,
373  const serf_ssl_certificate_t * const *certs,
374  apr_size_t certs_count);
375 #endif
376 
377  static apr_status_t ConnectionSetup(
378  apr_socket_t* socket, serf_bucket_t **read_bkt, serf_bucket_t **write_bkt,
379  void* setup_baton, apr_pool_t* pool);
380  static void ClosedConnection(serf_connection_t* conn,
381  void* closed_baton,
382  apr_status_t why,
383  apr_pool_t* pool);
384  static serf_bucket_t* AcceptResponse(serf_request_t* request,
385  serf_bucket_t* stream,
386  void* acceptor_baton,
387  apr_pool_t* pool);
388  static apr_status_t HandleResponse(serf_request_t* request,
389  serf_bucket_t* response,
390  void* handler_baton,
391  apr_pool_t* pool);
397  static bool StatusIndicatesDataPossible(apr_status_t status);
398 
399 #if SERF_HTTPS_FETCHING
400  apr_status_t HandleSSLCertValidation(
411  int errors, int failure_depth, const serf_ssl_certificate_t *cert);
412 #endif
413 
414  apr_status_t HandleResponse(serf_bucket_t* response);
415 
416  apr_status_t ReadStatusLine(serf_bucket_t* response);
417 
421  apr_status_t ReadHeaders(serf_bucket_t* response);
422 
426  apr_status_t ReadBody(serf_bucket_t* response);
427 
430  void FixUserAgent();
431  static apr_status_t SetupRequest(serf_request_t* request,
432  void* setup_baton,
433  serf_bucket_t** req_bkt,
434  serf_response_acceptor_t* acceptor,
435  void** acceptor_baton,
436  serf_response_handler_t* handler,
437  void** handler_baton,
438  apr_pool_t* pool);
439  bool ParseUrl();
440 
441  SerfUrlAsyncFetcher* fetcher_;
442  Timer* timer_;
443  const GoogleString str_url_;
444  AsyncFetch* async_fetch_;
445  ResponseHeadersParser parser_;
446  bool status_line_read_;
447  MessageHandler* message_handler_;
448 
449  apr_pool_t* pool_;
450  serf_bucket_alloc_t* bucket_alloc_;
451  apr_uri_t url_;
452  const char* host_header_;
453  const char* sni_host_;
454  serf_connection_t* connection_;
455  size_t bytes_received_;
456  int64 fetch_start_ms_;
457  int64 fetch_end_ms_;
458 
460  bool using_https_;
461  serf_ssl_context_t* ssl_context_;
462  const char* ssl_error_message_;
463 
464 
465 };
466 
467 }
468 
469 #endif
bool Start(SerfUrlAsyncFetcher *fetcher, serf_context_t *context)
Definition: serf_url_async_fetcher.h:304
int Poll(int64 max_wait_ms)
void Cancel(CancelCause cause)
This must be called while holding SerfUrlAsyncFetcher's mutex_.
bool track_original_content_length() const
Definition: serf_url_async_fetcher.h:169
Definition: statistics.h:43
virtual void Fetch(const GoogleString &url, MessageHandler *message_handler, AsyncFetch *callback)
void CallCallback(SerfCompletionResult result)
Base class for implementations of monitoring statistics.
Definition: statistics.h:342
SerfFetch(const GoogleString &url, AsyncFetch *async_fetch, MessageHandler *message_handler, Timer *timer)
Definition: serf_url_async_fetcher.h:111
static const char kSerfFetchUltimateFailure[]
Definition: serf_url_async_fetcher.h:84
Read/write API for HTTP response headers.
Definition: response_headers.h:37
bool SetHttpsOptions(StringPiece directive)
void ParseUrlForTesting(bool *status, apr_uri_t **url, const char **host_header, const char **sni_host)
void ReportFetchSuccessStats(SerfCompletionResult result, const ResponseHeaders *headers, const SerfFetch *fetch)
Updates states used for success/failure monitoring.
static bool ValidateHttpsOptions(StringPiece directive, GoogleString *error_message)
Definition: serf_url_async_fetcher.h:187
void CleanupFetchesWithErrors() EXCLUSIVE_LOCKS_REQUIRED(mutex_)
std::string GoogleString
PAGESPEED_KERNEL_BASE_STRING_H_.
Definition: string.h:24
bool allow_https() const
Determines whether https is allowed in the current configuration.
void ReportCompletedFetchStats(const SerfFetch *fetch)
Update the statistics object with results of the (completed) fetch.
void FetchComplete(SerfFetch *fetch)
Definition: async_fetch.h:53
UpDownCounter * active_count_
Definition: serf_url_async_fetcher.h:259
Parses a stream of HTTP header text into a ResponseHeaders instance.
Definition: response_headers_parser.h:30
Definition: thread_system.h:40
Definition: statistics.h:73
Definition: message_handler.h:39
static const char kSerfFetchUltimateSuccess[]
Definition: serf_url_async_fetcher.h:80
virtual bool SupportsHttps() const
friend class SerfFetch
To access stats variables below.
Definition: serf_url_async_fetcher.h:262
ThreadSystem::CondvarCapableMutex * mutex_
Definition: serf_url_async_fetcher.h:251
Definition: serf_url_async_fetcher.h:67
virtual int64 timeout_ms()
Definition: serf_url_async_fetcher.h:160
void set_list_outstanding_urls_on_error(bool x)
Timer interface, made virtual so it can be mocked for tests.
Definition: timer.h:27
bool StartFetch(SerfFetch *fetch) EXCLUSIVE_LOCKS_REQUIRED(mutex_)
Definition: url_async_fetcher.h:33
Definition: pool_element.h:33
static const char kSerfFetchLastCheckTimestampMs[]
Definition: serf_url_async_fetcher.h:88