Page Speed Optimization Libraries  1.13.35.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
rewrite_driver.h
Go to the documentation of this file.
1 /*
2  * Copyright 2010 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http:///www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #ifndef NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_
20 #define NET_INSTAWEB_REWRITER_PUBLIC_REWRITE_DRIVER_H_
21 
22 #include <map>
23 #include <set>
24 #include <vector>
25 
26 #include "base/logging.h"
29 #include "net/instaweb/http/public/request_context.h"
31 #include "net/instaweb/rewriter/cached_result.pb.h"
37 #include "net/instaweb/rewriter/public/inline_resource_slot.h"
57 #include "pagespeed/kernel/base/thread_annotations.h"
68 #include "pagespeed/kernel/http/user_agent_matcher.h"
75 
76 namespace net_instaweb {
77 
78 class AbstractLogRecord;
79 class AsyncFetch;
80 class CommonFilter;
81 class DebugFilter;
82 class DependencyTracker;
83 class DomStatsFilter;
84 class DomainRewriteFilter;
85 class FallbackPropertyPage;
86 class FileSystem;
87 class FlushEarlyInfo;
88 class HtmlWriterFilter;
89 class MessageHandler;
90 class RequestProperties;
91 class RequestTrace;
92 class RewriteDriverPool;
93 class RewriteFilter;
94 class Statistics;
95 class UrlLeftTrimFilter;
96 class UrlNamer;
97 
100 class RewriteDriver : public HtmlParse {
101  public:
104  kWriteFailed,
105  kNoResolutionNeeded,
106  kSuccess
107  };
108 
110  enum WaitMode {
116  };
118 
130  enum XhtmlStatus {
131  kXhtmlUnknown,
132  kIsXhtml,
133  kIsNotXhtml
134  };
135 
138  kInlineUnauthorizedResources,
139  kInlineOnlyAuthorizedResources
140  };
141 
143  enum IntendedFor {
144  kIntendedForInlining,
145  kIntendedForGeneral
146  };
147 
151  static const char kDomCohort[];
153  static const char kBeaconCohort[];
157  static const char kDependenciesCohort[];
158 
161  static const char kLastRequestTimestamp[];
163  static const char kParseSizeLimitExceeded[];
165  static const char kSubresourcesPropertyName[];
167  static const char kStatusCodePropertyName[];
168 
169  RewriteDriver(MessageHandler* message_handler,
170  FileSystem* file_system,
171  UrlAsyncFetcher* url_async_fetcher);
172 
175  virtual ~RewriteDriver();
176 
185  RewriteDriver* Clone();
186 
192  void Clear();
193 
195  static void InitStats(Statistics* statistics);
196 
198  static void Initialize();
199  static void Terminate();
200 
202  static GoogleString DeadlineExceededMessage(StringPiece filter_name);
203 
206  void SetServerContext(ServerContext* server_context);
207 
210  bool MayCacheExtendCss() const;
211  bool MayCacheExtendImages() const;
212  bool MayCacheExtendPdfs() const;
213  bool MayCacheExtendScripts() const;
214 
215  const GoogleString& user_agent() const { return user_agent_; }
216 
217  const RequestProperties* request_properties() const {
218  return request_properties_.get();
219  }
220 
222  void ClearRequestProperties();
223 
224  bool write_property_cache_dom_cohort() const {
225  return write_property_cache_dom_cohort_;
226  }
227  void set_write_property_cache_dom_cohort(bool x) {
228  write_property_cache_dom_cohort_ = x;
229  }
230 
233  static PropertyCache::CohortVector GetCohortList(
234  const PropertyCache* pcache, const RewriteOptions* options,
235  const ServerContext* server_context);
236 
239  void PropertyCacheSetupDone();
240 
241  RequestContextPtr request_context() { return request_context_; }
242  void set_request_context(const RequestContextPtr& x);
243 
246  RequestTrace* trace_context();
247 
250  void TracePrintf(const char* fmt, ...);
251  void TraceLiteral(const char* literal);
252  void TraceString(const GoogleString& s);
253 
257  return flush_occurred_ ? NULL : response_headers_;
258  }
259 
267  return response_headers_;
268  }
269 
274  response_headers_ = headers;
275  }
276 
285  void SetRequestHeaders(const RequestHeaders& headers);
286 
287  const RequestHeaders* request_headers() const {
288  return request_headers_.get();
289  }
290 
291  UserAgentMatcher* user_agent_matcher() const {
292  DCHECK(server_context() != NULL);
293  return server_context()->user_agent_matcher();
294  }
295 
300  void AddFilters();
301 
305  void AddOwnedEarlyPreRenderFilter(HtmlFilter* filter);
306 
308  void PrependOwnedPreRenderFilter(HtmlFilter* filter);
310  void AppendOwnedPreRenderFilter(HtmlFilter* filter);
312  void AppendUnownedPreRenderFilter(HtmlFilter* filter);
313 
315  void AddOwnedPostRenderFilter(HtmlFilter* filter);
317  void AddUnownedPostRenderFilter(HtmlFilter* filter);
318 
330  void AppendRewriteFilter(RewriteFilter* filter);
331 
334  void PrependRewriteFilter(RewriteFilter* filter);
335 
344 
350  void SetWriter(Writer* writer);
351 
352  Writer* writer() const { return writer_; }
353 
382  bool FetchResource(const StringPiece& url, AsyncFetch* fetch);
383 
401  void FetchInPlaceResource(const GoogleUrl& gurl, bool proxy_mode,
402  AsyncFetch* async_fetch);
403 
410  bool FetchOutputResource(const OutputResourcePtr& output_resource,
411  RewriteFilter* filter,
412  AsyncFetch* async_fetch);
413 
420  OutputResourcePtr DecodeOutputResource(const GoogleUrl& url,
421  RewriteFilter** filter) const;
422 
428  bool DecodeOutputResourceName(const GoogleUrl& url,
429  const RewriteOptions* options_to_use,
430  const UrlNamer* url_namer,
431  ResourceNamer* name_out,
432  OutputResourceKind* kind_out,
433  RewriteFilter** filter_out) const;
434 
447  StringPiece url,
448  GoogleString* error_out,
449  RewriteContext::CacheLookupResultCallback* callback);
450 
452  bool DecodeUrl(const GoogleUrl& url,
453  StringVector* decoded_urls) const;
454 
457  bool DecodeUrlGivenOptions(const GoogleUrl& url,
458  const RewriteOptions* options,
459  const UrlNamer* url_namer,
460  StringVector* decoded_urls) const;
461 
462  FileSystem* file_system() { return file_system_; }
463  UrlAsyncFetcher* async_fetcher() { return url_async_fetcher_; }
464 
469  void SetSessionFetcher(UrlAsyncFetcher* f);
470 
473  CacheUrlAsyncFetcher* CreateCacheFetcher();
475  CacheUrlAsyncFetcher* CreateCacheOnlyFetcher();
476 
477  ServerContext* server_context() const { return server_context_; }
478  Statistics* statistics() const;
479 
482  set_options_for_pool(NULL, options);
483  }
484 
488  controlling_pool_ = pool;
489  options_.reset(options);
490  }
491 
493  RewriteDriverPool* controlling_pool() { return controlling_pool_; }
494 
496  const RewriteOptions* options() const { return options_.get(); }
497 
501  virtual bool StartParseId(const StringPiece& url, const StringPiece& id,
502  const ContentType& content_type);
503 
510  virtual void FinishParse();
511 
515  void FinishParseAsync(Function* callback);
516 
520  void InfoAt(const RewriteContext* context,
521  const char* msg, ...) INSTAWEB_PRINTF_FORMAT(3, 4);
522 
525  const UrlSegmentEncoder* encoder,
526  const ResourceContext* data,
527  const ResourcePtr& input_resource,
528  GoogleString* name,
529  GoogleUrl* mapped_gurl,
530  GoogleString* failure_reason);
531 
536 
544  const char* filter_id,
545  const UrlSegmentEncoder* encoder,
546  const ResourceContext* data,
547  const ResourcePtr& input_resource,
548  OutputResourceKind kind,
549  GoogleString* failure_reason);
550 
568  const StringPiece& mapped_path, const StringPiece& unmapped_path,
569  const StringPiece& base_url, const StringPiece& filter_id,
570  const StringPiece& name, OutputResourceKind kind,
571  GoogleString* failure_reason);
572 
576  const StringPiece& filter_id,
577  const StringPiece& name,
578  ResourceNamer* full_name);
579 
584  const GoogleUrl& unmapped_gurl, const StringPiece& filter_id,
585  const StringPiece& name, OutputResourceKind kind,
586  GoogleString* failure_reason);
587 
591  const StringPiece& mapped_path, const StringPiece& unmapped_path,
592  const StringPiece& filter_id, const StringPiece& name,
593  OutputResourceKind kind, GoogleString* failure_reason) {
594  return CreateOutputResourceWithPath(mapped_path, unmapped_path,
595  decoded_base_url_.AllExceptLeaf(),
596  filter_id, name, kind, failure_reason);
597  }
598 
602  const StringPiece& path, const StringPiece& filter_id,
603  const StringPiece& name, OutputResourceKind kind,
604  GoogleString* failure_reason) {
605  return CreateOutputResourceWithPath(path, path, path, filter_id, name,
606  kind, failure_reason);
607  }
608 
611  enum class InputRole {
612  kScript,
613  kStyle,
614  kImg,
617  kUnknown,
622  };
623 
629  ResourcePtr CreateInputResource(const GoogleUrl& input_url,
630  InputRole role,
631  bool* is_authorized);
632 
649  const GoogleUrl& input_url,
650  InlineAuthorizationPolicy inline_authorization_policy,
651  IntendedFor intended_for,
652  InputRole role,
653  bool* is_authorized);
654 
659  const StringPiece& absolute_url);
660 
664  bool IsResourceUrlClaimed(const GoogleUrl& url) const;
665 
670  bool MatchesBaseUrl(const GoogleUrl& input_url) const;
671 
677  bool MayRewriteUrl(const GoogleUrl& domain_url,
678  const GoogleUrl& input_url,
679  InlineAuthorizationPolicy inline_authorization_policy,
680  IntendedFor intended_for,
681  bool* is_authorized_domain) const;
682 
686  const GoogleUrl& base_url() const { return base_url_; }
687 
689  StringPiece fetch_url() const { return fetch_url_; }
690 
694  const GoogleUrl& decoded_base_url() const { return decoded_base_url_; }
695  StringPiece decoded_base() const { return decoded_base_url_.Spec(); }
696 
698  bool IsHttps() const { return google_url().SchemeIs("https"); }
699 
700  const UrlSegmentEncoder* default_encoder() const { return &default_encoder_; }
701 
703  RewriteFilter* FindFilter(const StringPiece& id) const;
704 
706  bool refs_before_base() const { return refs_before_base_; }
707  bool other_base_problem() const { return other_base_problem_; }
708 
713  void set_refs_before_base() { refs_before_base_ = true; }
714 
716  void set_other_base_problem() { other_base_problem_ = true; }
717 
722  StringPiece containing_charset() { return containing_charset_; }
723  void set_containing_charset(const StringPiece charset) {
724  charset.CopyToString(&containing_charset_);
725  }
726 
731  HtmlResourceSlotPtr GetSlot(const ResourcePtr& resource,
732  HtmlElement* elt,
733  HtmlElement::Attribute* attr);
734 
739  InlineResourceSlotPtr GetInlineSlot(const ResourcePtr& resource,
740  HtmlCharactersNode* char_node);
741 
746  InlineAttributeSlotPtr GetInlineAttributeSlot(
747  const ResourcePtr& resource, HtmlElement* element,
748  HtmlElement::Attribute* attribute);
749 
759  SrcSetSlotCollectionPtr GetSrcSetSlotCollection(
760  CommonFilter* filter, HtmlElement* element, HtmlElement::Attribute* attr);
761 
766  bool InitiateRewrite(RewriteContext* rewrite_context)
767  LOCKS_EXCLUDED(rewrite_mutex());
768  void InitiateFetch(RewriteContext* rewrite_context);
769 
780  void RewriteComplete(RewriteContext* rewrite_context, RenderOp permit_render);
781 
785  void ReportSlowRewrites(int num);
786 
791  void Cleanup();
792 
797  void AddUserReference();
798 
800  GoogleString ToString(bool show_detached_contexts) const
801  LOCKS_EXCLUDED(rewrite_mutex());
802  GoogleString ToStringLockHeld(bool show_detached_contexts) const
803  EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
804  void PrintState(bool show_detached_contexts);
805  void PrintStateToErrorLog(bool show_detached_contexts);
806 
809  void WaitForCompletion();
810 
817  void WaitForShutDown();
818 
822  void BoundedWaitFor(WaitMode mode, int64 timeout_ms)
823  LOCKS_EXCLUDED(rewrite_mutex());
824 
833  fully_rewrite_on_flush_ = x;
834  }
835 
837  bool fully_rewrite_on_flush() const {
838  return fully_rewrite_on_flush_;
839  }
840 
845  fast_blocking_rewrite_ = x;
846  }
847 
848  bool fast_blocking_rewrite() const {
849  return fast_blocking_rewrite_;
850  }
851 
854  void EnableBlockingRewrite(RequestHeaders* request_headers);
855 
862  void set_externally_managed(bool x) { externally_managed_ = x; }
863 
867  void DetachFetch();
868 
871  void DetachedFetchComplete();
872 
876  void FetchComplete();
877 
883  void DeleteRewriteContext(RewriteContext* rewrite_context);
884 
885  int rewrite_deadline_ms() { return options()->rewrite_deadline_ms(); }
886 
892  max_page_processing_delay_ms_ = x;
893  }
894  int max_page_processing_delay_ms() { return max_page_processing_delay_ms_; }
895 
897  void set_device_type(UserAgentMatcher::DeviceType x) { device_type_ = x; }
898  UserAgentMatcher::DeviceType device_type() const { return device_type_; }
899 
905  RewriteContext* RegisterForPartitionKey(const GoogleString& partition_key,
906  RewriteContext* candidate);
907 
913  const GoogleString& partition_key, RewriteContext* candidate);
914 
917  void RequestFlush() { flush_requested_ = true; }
918  bool flush_requested() const { return flush_requested_; }
919 
932 
936  void ExecuteFlushIfRequestedAsync(Function* callback);
937 
946  virtual void Flush();
947 
951  void FlushAsync(Function* done);
952 
954  void AddRewriteTask(Function* task);
955 
958  void AddLowPriorityRewriteTask(Function* task);
959 
960  QueuedWorkerPool::Sequence* html_worker() { return html_worker_; }
961  Sequence* rewrite_worker();
962  Scheduler::Sequence* scheduler_sequence() {
963  return scheduler_sequence_.get();
964  }
965 
966  QueuedWorkerPool::Sequence* low_priority_rewrite_worker() {
967  return low_priority_rewrite_worker_;
968  }
969 
974 
980  void SwitchToQueuedWorkerPool() EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
981 
982  Scheduler* scheduler() { return scheduler_; }
983 
986  DomainRewriteFilter* domain_rewriter() { return domain_rewriter_.get(); }
987  UrlLeftTrimFilter* url_trim_filter() { return url_trim_filter_.get(); }
988 
996  CssResolutionStatus ResolveCssUrls(const GoogleUrl& input_css_base,
997  const StringPiece& output_css_base,
998  const StringPiece& contents,
999  Writer* writer,
1000  MessageHandler* handler);
1001 
1009  bool ShouldAbsolutifyUrl(const GoogleUrl& input_base,
1010  const GoogleUrl& output_base,
1011  bool* proxy_mode) const;
1012 
1021  AbstractPropertyPage* page,
1022  StringPiece property_name,
1023  StringPiece property_value);
1024 
1027  PropertyPage* property_page() const;
1028 
1034  return fallback_property_page_;
1035  }
1036 
1040 
1042  void set_property_page(PropertyPage* page);
1049 
1055 
1058  return critical_images_info_.get();
1059  }
1060 
1066  return critical_selector_info_.get();
1067  }
1068 
1073  critical_selector_info_.reset(info);
1074  }
1075 
1081  critical_images_info_.reset(critical_images_info);
1082  }
1083 
1087  return (options()->Enabled(RewriteOptions::kFlattenCssImports) ||
1088  (!options()->Forbidden(RewriteOptions::kFlattenCssImports) &&
1089  (options()->Enabled(RewriteOptions::kPrioritizeCriticalCss) ||
1090  options()->Enabled(RewriteOptions::kComputeCriticalCss))));
1091  }
1092 
1096  int num_inline_preview_images() const { return num_inline_preview_images_; }
1097 
1100 
1104 
1107 
1111 
1115 
1119 
1120  void set_is_lazyload_script_flushed(bool x) {
1121  is_lazyload_script_flushed_ = x;
1122  }
1123  bool is_lazyload_script_flushed() const {
1124  return is_lazyload_script_flushed_; }
1125 
1127  FlushEarlyInfo* flush_early_info();
1128 
1132  return dependency_tracker_.get();
1133  }
1134 
1137  bool DebugMode() const { return options()->Enabled(RewriteOptions::kDebug); }
1138 
1144  void InsertDebugComment(StringPiece unescaped_message, HtmlNode* node);
1145  void InsertDebugComments(
1146  const protobuf::RepeatedPtrField<GoogleString>& unescaped_messages,
1147  HtmlElement* element);
1148  void InsertUnauthorizedDomainDebugComment(StringPiece url,
1149  InputRole role,
1150  HtmlElement* element);
1151 
1154  const GoogleUrl& gurl, InputRole role);
1155 
1159 
1160  DomStatsFilter* dom_stats_filter() const {
1161  return dom_stats_filter_;
1162  }
1163 
1166  bool can_rewrite_resources() const { return can_rewrite_resources_; }
1167 
1169  bool is_nested() const { return is_nested_; }
1170 
1183  bool Write(const ResourceVector& inputs,
1184  const StringPiece& contents,
1185  const ContentType* type,
1186  StringPiece charset,
1187  OutputResource* output);
1188 
1189  void set_defer_instrumentation_script(bool x) {
1190  defer_instrumentation_script_ = x;
1191  }
1192  bool defer_instrumentation_script() const {
1193  return defer_instrumentation_script_;
1194  }
1195 
1199  ScopedMutex lock(rewrite_mutex());
1200  num_initiated_rewrites_ = x;
1201  }
1202  int64 num_initiated_rewrites() const {
1203  ScopedMutex lock(rewrite_mutex());
1204  return num_initiated_rewrites_;
1205  }
1208  ScopedMutex lock(rewrite_mutex());
1209  num_detached_rewrites_ = x;
1210  }
1211  int64 num_detached_rewrites() const {
1212  ScopedMutex lock(rewrite_mutex());
1213  return num_detached_rewrites_;
1214  }
1215 
1216  void set_pagespeed_query_params(StringPiece x) {
1217  x.CopyToString(&pagespeed_query_params_);
1218  }
1219  StringPiece pagespeed_query_params() const {
1220  return pagespeed_query_params_;
1221  }
1222 
1223  void set_pagespeed_option_cookies(StringPiece x) {
1224  x.CopyToString(&pagespeed_option_cookies_);
1225  }
1226  StringPiece pagespeed_option_cookies() const {
1227  return pagespeed_option_cookies_;
1228  }
1229 
1232  const GoogleString& CacheFragment() const;
1233 
1240  bool SetOrClearPageSpeedOptionCookies(const GoogleUrl& gurl,
1241  ResponseHeaders* response_headers);
1242 
1245  bool Decode(StringPiece leaf, ResourceNamer* resource_namer) const;
1246 
1247  bool filters_added() const { return filters_added_; }
1248  bool has_html_writer_filter() const {
1249  return html_writer_filter_.get() != nullptr;
1250  }
1251 
1255  void SetIsAmpDocument(bool is_amp);
1256  bool is_amp_document() const { return is_amp_; }
1257 
1258  const CspContext& content_security_policy() const { return csp_context_; }
1259  CspContext* mutable_content_security_policy() { return &csp_context_; }
1260  bool IsLoadPermittedByCsp(const GoogleUrl& url, InputRole role);
1261  bool IsLoadPermittedByCsp(const GoogleUrl& url, CspDirective role);
1262 
1263  protected:
1264  virtual void DetermineFiltersBehaviorImpl();
1265 
1266  private:
1267  friend class RewriteContext;
1268  friend class RewriteDriverTest;
1269  friend class RewriteTestBase;
1270  friend class ServerContextTest;
1271 
1272  typedef std::map<GoogleString, RewriteFilter*> StringFilterMap;
1273 
1278  void CheckForCompletionAsync(WaitMode wait_mode, int64 timeout_ms,
1279  Function* done)
1280  EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1281 
1285  void TryCheckForCompletion(WaitMode wait_mode, int64 end_time_ms,
1286  Function* done)
1287  EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1288 
1290  bool IsDone(WaitMode wait_mode, bool deadline_reached)
1291  EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1292 
1295  bool WaitForPendingAsyncEvents(WaitMode wait_mode) {
1296  return wait_mode == kWaitForShutDown ||
1297  (fully_rewrite_on_flush_ && !fast_blocking_rewrite_);
1298  }
1299 
1303  void FlushAsyncDone(int num_rewrites, Function* callback);
1304 
1309  int64 ComputeCurrentFlushWindowRewriteDelayMs();
1310 
1312  void QueueFlushAsyncDone(int num_rewrites, Function* callback);
1313 
1316  void QueueFinishParseAfterFlush(Function* user_callback);
1317  void FinishParseAfterFlush(Function* user_callback);
1318 
1319  bool RewritesComplete() const EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1320 
1323  void SetBaseUrlIfUnset(const StringPiece& new_base);
1324 
1327  void SetBaseUrlForFetch(const StringPiece& url);
1328 
1331  void SetDecodedUrlFromBase();
1332 
1334  AbstractMutex* rewrite_mutex() const LOCK_RETURNED(scheduler_->mutex()) {
1335  return scheduler_->mutex();
1336  }
1337 
1339  virtual void ParseTextInternal(const char* content, int size);
1340 
1342  bool ShouldSkipParsing();
1343 
1345  int SignatureLength() const;
1346 
1347  friend class ScanFilter;
1348 
1352  void RegisterRewriteFilter(RewriteFilter* filter);
1353 
1358  void EnableRewriteFilter(const char* id);
1359 
1365  ResourcePtr CreateInputResourceUnchecked(const GoogleUrl& gurl,
1366  bool is_authorized_domain);
1367 
1368  void AddPreRenderFilters();
1369  void AddPostRenderFilters();
1370 
1372  bool DecodeOutputResourceNameHelper(const GoogleUrl& url,
1373  const RewriteOptions* options_to_use,
1374  const UrlNamer* url_namer,
1375  ResourceNamer* name_out,
1376  OutputResourceKind* kind_out,
1377  RewriteFilter** filter_out,
1378  GoogleString* url_base,
1379  StringVector* urls) const;
1380 
1390  void WriteDomCohortIntoPropertyCache();
1391 
1393  CacheUrlAsyncFetcher* CreateCustomCacheFetcher(UrlAsyncFetcher* base_fetcher);
1394 
1401  void PossiblyPurgeCachedResponseAndReleaseDriver();
1402 
1404  void LogStats();
1405 
1421  bool PrepareShouldSignal() EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1422  void SignalIfRequired(bool result_of_prepare_should_signal)
1423  EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex());
1424 
1427  void CleanupRequestThread();
1428 
1440  bool base_was_set_;
1441 
1446  bool refs_before_base_;
1447 
1449  bool other_base_problem_;
1450 
1452  GoogleString containing_charset_;
1453 
1456  void PopulateRequestContext();
1457 
1458  bool filters_added_;
1459  bool externally_managed_;
1460 
1469  enum RefCategory {
1470  kRefUser,
1471  kRefParsing,
1472 
1476  kRefPendingRewrites,
1477 
1481  kRefDetachedRewrites,
1482 
1490  kRefDeletingRewrites,
1491 
1493  kRefFetchUserFacing,
1494 
1496  kRefFetchBackground,
1497 
1502  kRefAsyncEvents,
1503 
1506  kRefRenderBlockingAsyncEvents,
1507 
1508  kNumRefCategories
1509  };
1510 
1511  friend class CategorizedRefcount<RewriteDriver, RefCategory>;
1512 
1514  CategorizedRefcount<RewriteDriver, RefCategory> ref_counts_;
1515 
1517  void LastRefRemoved();
1518  StringPiece RefCategoryName(RefCategory cat);
1519 
1522  void DropReference(RefCategory cat);
1523 
1526  bool release_driver_;
1527 
1531  WaitMode waiting_ GUARDED_BY(rewrite_mutex());
1532 
1534  bool waiting_deadline_reached_ GUARDED_BY(rewrite_mutex());
1535 
1540  bool fully_rewrite_on_flush_;
1541 
1544  bool fast_blocking_rewrite_;
1545 
1546  bool flush_requested_;
1547  bool flush_occurred_;
1548 
1551  bool is_lazyload_script_flushed_;
1552 
1556  bool write_property_cache_dom_cohort_;
1557 
1560  GoogleUrl base_url_;
1561 
1565  GoogleUrl decoded_base_url_;
1566 
1569  GoogleString fetch_url_;
1570 
1571  GoogleString user_agent_;
1572 
1573  LazyBool should_skip_parsing_;
1574 
1575  StringFilterMap resource_filter_map_;
1576 
1577  ResponseHeaders* response_headers_;
1578 
1581  scoped_ptr<const RequestHeaders> request_headers_;
1582 
1583  int status_code_;
1584 
1587  typedef std::vector<RewriteContext*> RewriteContextVector;
1588  RewriteContextVector rewrites_;
1589 
1592  int max_page_processing_delay_ms_;
1593 
1594  typedef std::set<RewriteContext*> RewriteContextSet;
1595 
1600  RewriteContextSet initiated_rewrites_ GUARDED_BY(rewrite_mutex());
1601 
1603  int64 num_initiated_rewrites_ GUARDED_BY(rewrite_mutex());
1604 
1612  int64 num_detached_rewrites_ GUARDED_BY(rewrite_mutex());
1613 
1621  RewriteContextSet detached_rewrites_ GUARDED_BY(rewrite_mutex());
1622 
1624  int possibly_quick_rewrites_ GUARDED_BY(rewrite_mutex());
1625 
1628  RewriteContextVector fetch_rewrites_;
1629 
1632  FileSystem* file_system_;
1633  ServerContext* server_context_;
1634  Scheduler* scheduler_;
1635  UrlAsyncFetcher* default_url_async_fetcher_;
1636 
1640  UrlAsyncFetcher* url_async_fetcher_;
1641 
1644  std::vector<UrlAsyncFetcher*> owned_url_async_fetchers_;
1645 
1646  DomStatsFilter* dom_stats_filter_;
1647  scoped_ptr<HtmlWriterFilter> html_writer_filter_;
1648 
1649  ScanFilter scan_filter_;
1650  scoped_ptr<DomainRewriteFilter> domain_rewriter_;
1651  scoped_ptr<UrlLeftTrimFilter> url_trim_filter_;
1652 
1655  typedef std::map<GoogleString, RewriteContext*> PrimaryRewriteContextMap;
1656  PrimaryRewriteContextMap primary_rewrite_context_map_;
1657 
1658  HtmlResourceSlotSet slots_;
1659  InlineResourceSlotSet inline_slots_;
1660  InlineAttributeSlotSet inline_attribute_slots_;
1661  SrcSetSlotCollectionSet srcset_collections_;
1662 
1663  scoped_ptr<RewriteOptions> options_;
1664 
1665  RewriteDriverPool* controlling_pool_;
1666 
1668  scoped_ptr<CacheUrlAsyncFetcher::AsyncOpHooks>
1669  cache_url_async_fetcher_async_op_hooks_;
1670 
1672  UrlSegmentEncoder default_encoder_;
1673 
1675  FilterList early_pre_render_filters_;
1677  FilterList pre_render_filters_;
1678 
1680  std::vector<ResourceUrlClaimant*> resource_claimants_;
1681 
1685  FilterVector filters_to_delete_;
1686 
1687  QueuedWorkerPool::Sequence* html_worker_;
1688  QueuedWorkerPool::Sequence* rewrite_worker_;
1689  QueuedWorkerPool::Sequence* low_priority_rewrite_worker_;
1690  scoped_ptr<Scheduler::Sequence> scheduler_sequence_;
1691 
1692  Writer* writer_;
1693 
1696  FallbackPropertyPage* fallback_property_page_;
1697 
1699  bool owns_property_page_;
1700 
1702  scoped_ptr<PropertyPage> origin_property_page_;
1703 
1705  UserAgentMatcher::DeviceType device_type_;
1706 
1709  scoped_ptr<CriticalImagesInfo> critical_images_info_;
1710  scoped_ptr<CriticalSelectorInfo> critical_selector_info_;
1711 
1713  bool xhtml_mimetype_computed_;
1714  XhtmlStatus xhtml_status_ : 8;
1715 
1718  int num_inline_preview_images_;
1719 
1721  int num_bytes_in_;
1722 
1723  DebugFilter* debug_filter_;
1724 
1725  scoped_ptr<FlushEarlyInfo> flush_early_info_;
1726  scoped_ptr<DependencyTracker> dependency_tracker_;
1727 
1728  bool can_rewrite_resources_;
1729  bool is_nested_;
1730 
1733  RequestContextPtr request_context_;
1734 
1736  int64 start_time_ms_;
1737 
1738  scoped_ptr<RequestProperties> request_properties_;
1739 
1743  static int initialized_count_;
1744 
1747  bool defer_instrumentation_script_;
1748 
1750  bool is_amp_;
1751 
1753  AtomicBool executing_rewrite_tasks_;
1754 
1756  DownstreamCachePurger downstream_cache_purger_;
1757 
1759  GoogleString pagespeed_query_params_;
1760 
1762  GoogleString pagespeed_option_cookies_;
1763 
1765  CspContext csp_context_;
1766 
1767 
1768 };
1769 
1773  public:
1774  virtual ~OptionsAwareHTTPCacheCallback();
1775  virtual bool IsCacheValid(const GoogleString& key,
1776  const ResponseHeaders& headers);
1777  virtual int64 OverrideCacheTtlMs(const GoogleString& key);
1778  virtual ResponseHeaders::VaryOption RespectVaryOnResources() const;
1779 
1783  static bool IsCacheValid(const GoogleString& key,
1784  const RewriteOptions& rewrite_options,
1785  const RequestContextPtr& request_ctx,
1786  const ResponseHeaders& headers);
1787 
1788  protected:
1792  const RewriteOptions* rewrite_options,
1793  const RequestContextPtr& request_ctx);
1794 
1795  private:
1796  const RewriteOptions* rewrite_options_;
1797 
1798 
1799 };
1800 
1801 }
1802 
1803 #endif
virtual ResponseHeaders::VaryOption RespectVaryOnResources() const
class GoogleUrl
Definition: google_url.h:58
OutputResourcePtr DecodeOutputResource(const GoogleUrl &url, RewriteFilter **filter) const
Definition: rewrite_driver.h:115
static const char kDomCohort[]
Definition: rewrite_driver.h:151
void EnableBlockingRewrite(RequestHeaders *request_headers)
void set_max_page_processing_delay_ms(int x)
Definition: rewrite_driver.h:891
InlineResourceSlotPtr GetInlineSlot(const ResourcePtr &resource, HtmlCharactersNode *char_node)
void PopulateResourceNamer(const StringPiece &filter_id, const StringPiece &name, ResourceNamer *full_name)
const ResponseHeaders * response_headers()
Definition: rewrite_driver.h:266
OutputResourcePtr CreateOutputResourceWithPath(const StringPiece &mapped_path, const StringPiece &unmapped_path, const StringPiece &base_url, const StringPiece &filter_id, const StringPiece &name, OutputResourceKind kind, GoogleString *failure_reason)
DomainRewriteFilter * domain_rewriter()
Definition: rewrite_driver.h:986
Read/write API for HTTP request (RequestHeaders is a misnomer).
Definition: request_headers.h:32
Definition: critical_selector_finder.h:43
bool Write(const ResourceVector &inputs, const StringPiece &contents, const ContentType *type, StringPiece charset, OutputResource *output)
const GoogleUrl & google_url() const
Gets a parsed GoogleUrl& corresponding to url().
Definition: html_parse.h:385
bool DecodeUrlGivenOptions(const GoogleUrl &url, const RewriteOptions *options, const UrlNamer *url_namer, StringVector *decoded_urls) const
GoogleString GenerateUnauthorizedDomainDebugComment(const GoogleUrl &gurl, InputRole role)
Generates an unauthorized domain debug comment. Public for unit tests.
void AppendRewriteFilter(RewriteFilter *filter)
bool MayCacheExtendCss() const
void ClearRequestProperties()
Reinitializes request_properties_, clearing any cached values.
const GoogleUrl & base_url() const
Definition: rewrite_driver.h:686
void SwitchToQueuedWorkerPool() EXCLUSIVE_LOCKS_REQUIRED(rewrite_mutex())
bool FetchOutputResource(const OutputResourcePtr &output_resource, RewriteFilter *filter, AsyncFetch *async_fetch)
virtual bool IsCacheValid(const GoogleString &key, const ResponseHeaders &headers)
void AddOwnedPostRenderFilter(HtmlFilter *filter)
Adds a filter to the end of the post-render chain, taking ownership.
void set_options_for_pool(RewriteDriverPool *pool, RewriteOptions *options)
Definition: rewrite_driver.h:487
RewriteFilter * FindFilter(const StringPiece &id) const
Finds a filter with the given ID, or returns NULL if none found.
static const char kSubresourcesPropertyName[]
Flush Subresources Info associted with the HTML page.
Definition: rewrite_driver.h:165
void set_response_headers_ptr(ResponseHeaders *headers)
Definition: rewrite_driver.h:273
InlineAttributeSlotPtr GetInlineAttributeSlot(const ResourcePtr &resource, HtmlElement *element, HtmlElement::Attribute *attribute)
Base class for implementations of monitoring statistics.
Definition: statistics.h:342
void InfoAt(const RewriteContext *context, const char *msg,...) INSTAWEB_PRINTF_FORMAT(3
DependencyTracker * dependency_tracker() const
Definition: rewrite_driver.h:1131
static PropertyCache::CohortVector GetCohortList(const PropertyCache *pcache, const RewriteOptions *options, const ServerContext *server_context)
CssResolutionStatus
Status return-code for ResolveCssUrls.
Definition: rewrite_driver.h:103
CriticalSelectorInfo * critical_selector_info()
Definition: rewrite_driver.h:1065
FallbackPropertyPage * fallback_property_page() const
Definition: rewrite_driver.h:1033
StringPiece containing_charset()
Definition: rewrite_driver.h:722
PropertyPage * origin_property_page() const
ResourcePtr CreateInputResourceAbsoluteUncheckedForTestsOnly(const StringPiece &absolute_url)
FlushEarlyInfo * flush_early_info()
This method is not thread-safe. Call it only from the html parser thread.
void AddRewriteTask(Function *task)
Queues up a task to run on the (high-priority) rewrite thread.
static void InitStats(Statistics *statistics)
Initialize statistics for all filters that need it.
void RewriteComplete(RewriteContext *rewrite_context, RenderOp permit_render)
void SetSessionFetcher(UrlAsyncFetcher *f)
bool DebugMode() const
Definition: rewrite_driver.h:1137
Definition: url_left_trim_filter.h:47
ResourcePtr CreateInputResource(const GoogleUrl &input_url, InputRole role, bool *is_authorized)
bool FetchResource(const StringPiece &url, AsyncFetch *fetch)
void set_fully_rewrite_on_flush(bool x)
Definition: rewrite_driver.h:832
Definition: html_parse.h:88
bool is_nested() const
Determine whether this driver is nested inside another.
Definition: rewrite_driver.h:1169
void increment_num_inline_preview_images()
We expect to this method to be called on the HTML parser thread.
bool LookupMetadataForOutputResource(StringPiece url, GoogleString *error_out, RewriteContext::CacheLookupResultCallback *callback)
void set_critical_images_info(CriticalImagesInfo *critical_images_info)
Definition: rewrite_driver.h:1080
Definition: html_element.h:42
void set_origin_property_page(PropertyPage *page)
Takes ownership of page.
Read/write API for HTTP response headers.
Definition: response_headers.h:37
void FlushAsync(Function *done)
Definition: log_record.h:59
void FinishParseAsync(Function *callback)
void DecrementAsyncEventsCount()
Decrements a reference count bumped up by IncrementAsyncEventsCount()
void AddOwnedEarlyPreRenderFilter(HtmlFilter *filter)
static void Initialize()
Initialize statics. Initialize/Terminate calls must be paired.
bool IsResourceUrlClaimed(const GoogleUrl &url) const
RequestTrace * trace_context()
void set_custom_options(RewriteOptions *options)
Takes ownership of 'options'.
Definition: rewrite_driver.h:481
CspDirective
Definition: csp_directive.h:37
virtual void DetermineFiltersBehaviorImpl()
Definition: property_cache.h:323
void PrintStateToErrorLog(bool show_detached_contexts)
For logs.
XhtmlStatus MimeTypeXhtmlStatus()
const char * url() const
Definition: html_parse.h:383
InputRole
Definition: rewrite_driver.h:611
WaitMode
Mode for BoundedWaitForCompletion.
Definition: rewrite_driver.h:110
void SetIsAmpDocument(bool is_amp)
bool MatchesBaseUrl(const GoogleUrl &input_url) const
bool FlattenCssImportsEnabled() const
Definition: rewrite_driver.h:1086
Callback2< const GoogleUrl &, bool * > ResourceUrlClaimant
Definition: resource.h:357
Definition: rewrite_driver_pool.h:34
Definition: function.h:47
StringPiece Spec() const
It is illegal to call this for invalid urls (check IsWebValid() first).
void SetRequestHeaders(const RequestHeaders &headers)
static GoogleString DeadlineExceededMessage(StringPiece filter_name)
Formats a "deadline exceeded" message for a given filter.
bool DecodeOutputResourceName(const GoogleUrl &url, const RewriteOptions *options_to_use, const UrlNamer *url_namer, ResourceNamer *name_out, OutputResourceKind *kind_out, RewriteFilter **filter_out) const
void FetchInPlaceResource(const GoogleUrl &gurl, bool proxy_mode, AsyncFetch *async_fetch)
static const char kStatusCodePropertyName[]
Status codes of previous responses.
Definition: rewrite_driver.h:167
void set_num_detached_rewrites(int64 x)
Sets the num_detached_rewrites_. This should only be called from test code.
Definition: rewrite_driver.h:1207
void DeleteRewriteContext(RewriteContext *rewrite_context)
OutputResourcePtr CreateOutputResourceWithMappedPath(const StringPiece &mapped_path, const StringPiece &unmapped_path, const StringPiece &filter_id, const StringPiece &name, OutputResourceKind kind, GoogleString *failure_reason)
Definition: rewrite_driver.h:590
void set_fast_blocking_rewrite(bool x)
Definition: rewrite_driver.h:844
void set_unowned_fallback_property_page(FallbackPropertyPage *page)
Does not take the ownership of the page.
ResponseHeaders * mutable_response_headers()
Definition: rewrite_driver.h:256
void set_other_base_problem()
Sets if we had other difficulty handling <base> tag.
Definition: rewrite_driver.h:716
CssResolutionStatus ResolveCssUrls(const GoogleUrl &input_css_base, const StringPiece &output_css_base, const StringPiece &contents, Writer *writer, MessageHandler *handler)
std::string GoogleString
PAGESPEED_KERNEL_BASE_STRING_H_.
Definition: string.h:24
XhtmlStatus
Definition: rewrite_driver.h:130
void set_fallback_property_page(FallbackPropertyPage *page)
Takes ownership of page.
void set_critical_selector_info(CriticalSelectorInfo *info)
Definition: rewrite_driver.h:1072
RewriteDriver * Clone()
bool fully_rewrite_on_flush() const
Returns if this response has a blocking rewrite or not.
Definition: rewrite_driver.h:837
CacheUrlAsyncFetcher * CreateCacheOnlyFetcher()
Returns a cache fetcher that does not fall back to an actual fetcher.
void DeregisterForPartitionKey(const GoogleString &partition_key, RewriteContext *candidate)
static const char kLastRequestTimestamp[]
Definition: rewrite_driver.h:161
void AppendOwnedPreRenderFilter(HtmlFilter *filter)
Adds a filter to the end of the pre-render chain, taking ownership.
void DecrementRenderBlockingAsyncEventsCount()
Helper class for lexically scoped mutexing.
Definition: abstract_mutex.h:46
PropertyPage * property_page() const
bool MayRewriteUrl(const GoogleUrl &domain_url, const GoogleUrl &input_url, InlineAuthorizationPolicy inline_authorization_policy, IntendedFor intended_for, bool *is_authorized_domain) const
bool can_rewrite_resources() const
Definition: rewrite_driver.h:1166
bool refs_before_base() const
Returns refs_before_base.
Definition: rewrite_driver.h:706
LazyBool
Lazily-initialized boolean value.
Definition: basictypes.h:68
StringPiece fetch_url() const
The URL that was requested if FetchResource was called.
Definition: rewrite_driver.h:689
OutputResourcePtr CreateOutputResourceFromResource(const char *filter_id, const UrlSegmentEncoder *encoder, const ResourceContext *data, const ResourcePtr &input_resource, OutputResourceKind kind, GoogleString *failure_reason)
Definition: file_system.h:76
void BoundedWaitFor(WaitMode mode, int64 timeout_ms) LOCKS_EXCLUDED(rewrite_mutex())
void AddUnownedPostRenderFilter(HtmlFilter *filter)
Same, without taking ownership.
bool IsHttps() const
Quick way to tell if the document url is https (ie was fetched via https).
Definition: rewrite_driver.h:698
Definition: html_node.h:43
GoogleString ToString(bool show_detached_contexts) const LOCKS_EXCLUDED(rewrite_mutex())
Debugging routines to print out data about the driver.
void ReportSlowRewrites(int num)
void PrependRewriteFilter(RewriteFilter *filter)
HtmlResourceSlotPtr GetSlot(const ResourcePtr &resource, HtmlElement *elt, HtmlElement::Attribute *attr)
Definition: rewrite_driver.h:100
const GoogleUrl & decoded_base_url() const
Definition: rewrite_driver.h:694
bool SetOrClearPageSpeedOptionCookies(const GoogleUrl &gurl, ResponseHeaders *response_headers)
void set_num_initiated_rewrites(int64 x)
Definition: rewrite_driver.h:1198
void set_property_page(PropertyPage *page)
Takes ownership of page.
void PrependOwnedPreRenderFilter(HtmlFilter *filter)
Adds a filter to the beginning of the pre-render chain, taking ownership.
Definition: server_context.h:99
virtual bool StartParseId(const StringPiece &url, const StringPiece &id, const ContentType &content_type)
Definition: content_type.h:31
RewriteDriverPool * controlling_pool()
Pool in which this driver can be recycled. May be NULL.
Definition: rewrite_driver.h:493
Definition: rewrite_context.h:155
void set_externally_managed(bool x)
Definition: rewrite_driver.h:862
OutputResourcePtr CreateOutputResourceWithUnmappedUrl(const GoogleUrl &unmapped_gurl, const StringPiece &filter_id, const StringPiece &name, OutputResourceKind kind, GoogleString *failure_reason)
StringPiece AllExceptLeaf() const
void AppendUnownedPreRenderFilter(HtmlFilter *filter)
Same, without taking ownership.
bool InitiateRewrite(RewriteContext *rewrite_context) LOCKS_EXCLUDED(rewrite_mutex())
void PrintState(bool show_detached_contexts)
For debugging.
int num_inline_preview_images() const
Definition: rewrite_driver.h:1096
void SetWriter(Writer *writer)
bool ShouldAbsolutifyUrl(const GoogleUrl &input_base, const GoogleUrl &output_base, bool *proxy_mode) const
void AddResourceUrlClaimant(ResourceUrlClaimant *claimant)
void bool GenerateOutputResourceNameAndUrl(const UrlSegmentEncoder *encoder, const ResourceContext *data, const ResourcePtr &input_resource, GoogleString *name, GoogleUrl *mapped_gurl, GoogleString *failure_reason)
Constructs name and URL for the specified input resource and encoder.
void AddLowPriorityRewriteTask(Function *task)
OptionsAwareHTTPCacheCallback(const RewriteOptions *rewrite_options, const RequestContextPtr &request_ctx)
void TracePrintf(const char *fmt,...)
InlineAuthorizationPolicy
See CreateInputResource.
Definition: rewrite_driver.h:137
Definition: url_segment_encoder.h:33
SrcSetSlotCollectionPtr GetSrcSetSlotCollection(CommonFilter *filter, HtmlElement *element, HtmlElement::Attribute *attr)
const GoogleString & CacheFragment() const
wait for everything to complete (up to deadline)
Definition: rewrite_driver.h:112
static const char kBeaconCohort[]
The cohort for properties that are written by the beacon handler.
Definition: rewrite_driver.h:153
#define INSTAWEB_PRINTF_FORMAT(x, y)
< Not GCC
Definition: printf_format.h:34
Definition: message_handler.h:39
bool Decode(StringPiece leaf, ResourceNamer *resource_namer) const
Definition: http_cache.h:132
virtual int64 OverrideCacheTtlMs(const GoogleString &key)
DeviceType
Definition: user_agent_matcher.h:49
OutputResourcePtr CreateOutputResourceWithPath(const StringPiece &path, const StringPiece &filter_id, const StringPiece &name, OutputResourceKind kind, GoogleString *failure_reason)
Definition: rewrite_driver.h:601
Definition: output_resource.h:44
CacheUrlAsyncFetcher * CreateCacheFetcher()
void set_refs_before_base()
Definition: rewrite_driver.h:713
RewriteContext * RegisterForPartitionKey(const GoogleString &partition_key, RewriteContext *candidate)
Used internally. Do not pass in.
Definition: rewrite_driver.h:111
Definition: dependency_tracker.h:41
void ExecuteFlushIfRequestedAsync(Function *callback)
AbstractLogRecord * log_record()
Definition: domain_rewrite_filter.h:50
void SetServerContext(ServerContext *server_context)
static const char kDependenciesCohort[]
Definition: rewrite_driver.h:157
const RewriteOptions * options() const
Return the options used for this RewriteDriver.
Definition: rewrite_driver.h:496
void UpdatePropertyValueInDomCohort(AbstractPropertyPage *page, StringPiece property_name, StringPiece property_value)
Definition: rewrite_options.h:84
void set_device_type(UserAgentMatcher::DeviceType x)
Sets the device type chosen for the current property_page.
Definition: rewrite_driver.h:897
Definition: rewrite_driver.h:1772
Counts some basic statistics observed as HTML is parsed.
Definition: dom_stats_filter.h:34
static const char kParseSizeLimitExceeded[]
Tracks if we exceeded the maximum size limit of html which we should parse.
Definition: rewrite_driver.h:163
bool DecodeUrl(const GoogleUrl &url, StringVector *decoded_urls) const
Decodes the incoming pagespeed url to original url(s).
Definition: request_properties.h:37
void InsertDebugComment(StringPiece unescaped_message, HtmlNode *node)
CriticalImagesInfo * critical_images_info() const
Used by ImageRewriteFilter for identifying critical images.
Definition: rewrite_driver.h:1057
IntendedFor
See CreateInputResource.
Definition: rewrite_driver.h:143
Definition: url_async_fetcher.h:33
void IncrementRenderBlockingAsyncEventsCount()
Definition: resource_namer.h:32
Definition: critical_images_finder.h:52
void RequestFlush()
Definition: rewrite_driver.h:917
OutputResourceKind
Definition: output_resource_kind.h:26
Definition: fallback_property_page.h:38