start_post_scan(). * Operates independently of the batch queue — does not write to cmplz_wsc_batch_map. * * ## WSC batch scan (automated) * * Runs WSC per-post scans across all published posts of every scannable post type, * up to cmplz_wsc_batch_max_items (default 200), independent of cmplz_pages_list. * * Persistent state — option cmplz_wsc_batch_map: * queue int[] Post IDs waiting to be dispatched. * inflight array Post IDs dispatched, awaiting webhook { post_id => scan_id }. * done int[] Post IDs completed or skipped in the current cycle. * * Advancement is webhook-driven: each webhook delivery frees one concurrency slot * and immediately calls batch_dispatch() for the next post. The cron heartbeat * (cmplz_every_five_minutes_hook) provides kick-start and stall recovery when * webhooks fail to arrive (registry TTL). * * ## WSC webhook routing (pro override) * * cmplz_wsc_scan_validate_request Validates both site-level and per-post scan IDs * via cmplz_wsc_registry, replacing the free * path-based validation for scan webhooks. * cmplz_wsc_scan_webhook_handled Routes per-post webhooks to the pro handler; * site-level webhooks fall through to free handler. * * ## Registry * * cmplz_wsc_registry (class-wsc-registry.php) is the TTL-backed store for active * scan IDs. It covers both site-level (cmplz_wsc_scan_id option) and per-post * (cmplz_wsc_scan_registry option) scans. Concurrency cap defaults to 1 — the WSC * API serialises scans per account (see CONCURRENCY_CAP). Rate limiting (15 calls / 5 min) is enforced * in this class via check_rate_limit(). * * Instantiated as COMPLIANZ::$wsc_scan_pro in the main plugin class. */ if ( ! class_exists( 'cmplz_wsc_scan_pro' ) ) { // phpcs:disable PEAR.NamingConventions.ValidClassName.StartWithCapital, PEAR.NamingConventions.ValidClassName.Invalid, Squiz.Commenting.ClassComment.Missing class cmplz_wsc_scan_pro { // phpcs:enable PEAR.NamingConventions.ValidClassName.StartWithCapital, PEAR.NamingConventions.ValidClassName.Invalid, Squiz.Commenting.ClassComment.Missing /** * Maximum simultaneous per-post WSC scans in flight site-wide. * * The WSC API serialises scans per account: while one scan is running, any * new request returns the same UUID regardless of the requested URL. Cap=1 * ensures batch_dispatch() never attempts a second API call before the first * webhook fires, eliminating wasted duplicate requests. * * Can be overridden at runtime via the cmplz_wsc_concurrency_cap transient * (set by handle_cb_response() from the CB API response) if the API ever * supports concurrent scans per account. */ private const CONCURRENCY_CAP = 1; /** Maximum WSC API calls allowed per rate-limit window. */ private const RATE_LIMIT_MAX = 15; /** Seconds a post must wait before being re-scanned by the manual trigger (1 hour). */ private const COOLDOWN_SECONDS = 3600; /** Seconds a post must wait before being re-scanned by the batch (30 days). */ private const BATCH_COOLDOWN_SECONDS = 2592000; /** Default maximum post IDs fetched per batch_get_page_ids() call. */ private const DEFAULT_MAX_ITEMS = 200; /** WordPress option key for the persistent batch map { queue, inflight, done }. */ private const BATCH_MAP_OPTION = 'cmplz_wsc_batch_map'; /** Post meta key storing the Unix timestamp of the last WSC scan completion. */ private const META_WSC_SCANNED = '_cmplz_wsc_scanned_post'; /** Transient key for the sliding-window WSC API rate limiter. */ private const RATE_LIMIT_KEY = 'cmplz_wsc_api_rate'; /** Rate-limit window in seconds (5 minutes). */ private const RATE_LIMIT_WINDOW = 300; /** Singleton instance. * * @var self */ private static $_this; /** * Constructor. */ public function __construct() { if ( isset( self::$_this ) ) { wp_die( esc_html( sprintf( '%s is a singleton class and you cannot create a second instance.', get_class( $this ) ) ) ); } self::$_this = $this; // WSC webhook routing (pro overrides free path-based validation). add_filter( 'cmplz_wsc_scan_validate_request', array( $this, 'validate_request' ), 10, 3 ); add_filter( 'cmplz_wsc_scan_webhook_handled', array( $this, 'webhook_handler' ), 10, 2 ); // Local scanner pro extensions. add_filter( 'cmplz_cookiescan_post_types', array( $this, 'scan_post_types' ) ); add_filter( 'cmplz_scan_batch_size', array( $this, 'scan_batch_size' ) ); add_filter( 'cmplz_scan_fixed_pages', array( $this, 'fixed_pages_woocommerce' ) ); add_filter( 'cmplz_scan_fixed_pages', array( $this, 'fixed_pages_edd' ) ); // Show the Save button on the cookie-scan page — only premium has a writable // field there (wsc_scan_post_types). Free ships with save_buttons_required=false // because the multicheckbox is locked and the rest of the page is actions only. add_filter( 'cmplz_menu', array( $this, 'enable_cookie_scan_save_button' ) ); // Scan column UI — pro state + button cascade. add_filter( 'cmplz_scan_column_wsc_state', array( $this, 'scan_column_wsc_state' ), 10, 2 ); add_action( 'cmplz_render_scan_column_button', array( $this, 'render_scan_column_button' ), 10, 2 ); // Batch lifecycle. add_action( 'cmplz_scan_reset', array( $this, 'batch_reset' ) ); add_action( 'cmplz_every_five_minutes_hook', array( $this, 'batch_dispatch' ) ); add_action( 'cmplz_wsc_batch_dispatch', array( $this, 'batch_dispatch' ) ); add_action( 'cmplz_wsc_api_open_response', array( $this, 'handle_cb_response' ), 10, 2 ); // WSC settings response — inject batch stats (pro-only key). add_filter( 'cmplz_wsc_actions_response', array( $this, 'add_batch_stats_to_response' ) ); } /** * Return singleton instance. * * @return self */ public static function this(): self { return self::$_this; } // ── WSC webhook routing ─────────────────────────────────────────────────── /** * Validate WSC webhook requests using the pro registry. * * Handles both site-level and per-post scan IDs. * Returns [] (valid) to skip free default validation. * Returns an error array to reject the request. * Returns null to fall through to free default validation (checks path). * * @param array|null $result Existing filter value. * @param WP_REST_Request $request Incoming REST request. * @param string $type 'scan' or 'checks'. * @return array|null */ public function validate_request( $result, WP_REST_Request $request, string $type ): ?array { // The 'checks' webhook path is validated by the free handler — nothing to do here. if ( 'scan' !== $type ) { return null; } $header_error = cmplz_wsc_api::validate_scan_headers( $request ); if ( ! empty( $header_error ) ) { return $header_error; } $body = json_decode( $request->get_body() ); $incoming_id = $body->data->id ?? ''; // Registry covers both site-level (cmplz_wsc_scan_id option) and per-post // (cmplz_wsc_scan_registry option) scan IDs within their 2-hour TTL. if ( ! $incoming_id || ! COMPLIANZ::$wsc_registry->is_valid( $incoming_id ) ) { return array( 'code' => 'invalid_wsc_scan', 'message' => 'No active scan found.', 'status' => 400, ); } // Return an empty array to signal "valid" and prevent the free handler // from running its own (path-based) validation on this request. return array(); } /** * Route WSC webhook to per-post handler when applicable. * * For per-post scans: stores cookies, timestamps the post meta, frees the * registry slot, advances the batch queue (if this post was part of a batch), * then short-circuits default handling with a 200 response. * * For site-level scans: returns null so the free handler runs unchanged. * * @param WP_REST_Response|null $handled Existing filter value. * @param WP_REST_Request $request Incoming REST request. * @return WP_REST_Response|null */ public function webhook_handler( $handled, WP_REST_Request $request ): ?WP_REST_Response { $result = json_decode( $request->get_body() ); $incoming_id = $result->data->id ?? ''; if ( ! $incoming_id ) { return null; } // Site-level scans have type 'site' — let the free handler process them. // Only intercept per-post scans initiated by start_post_scan(). $type = COMPLIANZ::$wsc_registry->get_type( $incoming_id ); if ( 'per_post' !== $type ) { return null; } // Re-fetch the registry entry; it may have expired (2-hour TTL) between // validation and this callback if the WSC API was slow to respond. $entry = COMPLIANZ::$wsc_registry->get_per_post_entry( $incoming_id ); if ( ! $entry ) { return new WP_REST_Response( 'Scan expired.', 400 ); } $post_id = $entry['post_id']; // Merge discovered trackers into the shared cookie store. // wsc_scan_store_cookies() deduplicates against existing cookies. $cookies = $result->data->result->trackers ?? array(); if ( ! empty( $cookies ) ) { COMPLIANZ::$wsc_scanner->wsc_scan_store_cookies( $cookies ); } // Timestamp drives the cooldown check in start_post_scan() // and the WSC-done status read in cookie/class-scan.php column render. update_post_meta( $post_id, self::META_WSC_SCANNED, time() ); // Remove from registry so the concurrency slot is freed for the next dispatch. COMPLIANZ::$wsc_registry->remove( $incoming_id ); // If this post was part of the batch queue, move it from inflight → done // and schedule an immediate dispatch to fill the freed slot. $map = $this->batch_get_map(); if ( isset( $map['inflight'][ $post_id ] ) ) { unset( $map['inflight'][ $post_id ] ); $map['done'][] = $post_id; $this->batch_save_map( $map ); // Schedule a single cron event instead of calling batch_dispatch() directly. // Webhooks fire in an unauthenticated REST context where admin-only classes // (cmplz_wsc, cmplz_wsc_auth) are not loaded. Cron runs with // cmplz_admin_logged_in() = true, so all deps are available. // Delay 5 s: WSC API serialises scans per account — firing dispatch // immediately returns the same UUID (slot not freed yet) causing a stall. if ( ! wp_next_scheduled( 'cmplz_wsc_batch_dispatch' ) ) { wp_schedule_single_event( time() + 5, 'cmplz_wsc_batch_dispatch' ); } } return new WP_REST_Response( 'Per-post cookies updated!', 200 ); } // ── Local scanner pro extensions ────────────────────────────────────────── /** * Extend the local scanner to the user-selected public post types (pro-only). * * Pro override of the free default (post + page). Applies to the local iframe * scanner, the WSC batch query (batch_get_page_ids()) and the scan column * registration — all consumers of get_scannable_post_types(). * * Posts and pages are always included (free baseline — never narrowed). * Custom post types are filtered by the wsc_scan_post_types multicheckbox: * - option never saved → all public CPTs (field default, preserves * pre-existing premium behaviour on upgrade) * - option saved → only the selected CPTs * * @return array */ public function scan_post_types(): array { $universe = COMPLIANZ::$scan->get_public_scannable_post_types(); $selected = cmplz_get_option( 'wsc_scan_post_types' ); // Option never saved (fresh upgrade) → fall back to the full universe so // existing premium behaviour is preserved until the user makes a choice. if ( ! is_array( $selected ) ) { return array_values( $universe ); } return array_values( array_intersect( $universe, $selected ) ); } /** * Increase the local iframe batch size for pro installs (pro-only). * * Pro override of the free default (5). Applies to the local scanner only — * controls how many post IDs get_pages_list_single_run() loads per cycle. * The WSC batch uses its own cap (cmplz_wsc_batch_max_items). * * @return int */ public function scan_batch_size(): int { return 20; } /** * Enable the Save button on the cookie-scan settings page (pro-only). * * Free ships with save_buttons_required=false because the only writable * field there (wsc_scan_post_types multicheckbox) is locked. Premium * unlocks the multicheckbox, so the page needs a Save button. * * @param array $menu Menu structure from cmplz_menu(). * @return array */ public function enable_cookie_scan_save_button( array $menu ): array { $this->set_cookie_scan_save_buttons_required( $menu ); return $menu; } /** * Recursive walker — toggles save_buttons_required=true on the * cookie-scan menu item wherever it sits in the nested menu tree. * * @param array $items Menu items array (passed by reference). * @return bool True when the item is found and updated. */ private function set_cookie_scan_save_buttons_required( array &$items ): bool { foreach ( $items as &$item ) { if ( isset( $item['id'] ) && 'cookie-scan' === $item['id'] ) { $item['save_buttons_required'] = true; return true; } if ( ! empty( $item['menu_items'] ) && is_array( $item['menu_items'] ) ) { if ( $this->set_cookie_scan_save_buttons_required( $item['menu_items'] ) ) { return true; } } } return false; } /** * Add WooCommerce critical pages to the local scanner fixed-page list (pro-only). * * Pro extension of cmplz_scan_fixed_pages — the free version includes only * 'home' (and optionally 'remote' and 'loginpage'). These WooCommerce pages * are added to every local scan cycle regardless of batch rotation because * they load third-party cookies (payment providers, cart scripts) that must * always be detected. * * @param array $pages Existing fixed page IDs. * @return array */ public function fixed_pages_woocommerce( array $pages ): array { if ( ! class_exists( 'WooCommerce' ) ) { return $pages; } $woo_ids = array_filter( array_map( 'intval', array( get_option( 'woocommerce_shop_page_id' ), get_option( 'woocommerce_cart_page_id' ), get_option( 'woocommerce_checkout_page_id' ), get_option( 'woocommerce_myaccount_page_id' ), ) ) ); return array_merge( $pages, array_values( $woo_ids ) ); } /** * Add Easy Digital Downloads critical pages to the local scanner fixed-page list (pro-only). * * Pro extension of cmplz_scan_fixed_pages — mirrors the WooCommerce equivalent * for EDD stores. Checkout, success, failure and purchase history pages are * included in every local scan cycle regardless of batch rotation. * * @param array $pages Existing fixed page IDs. * @return array */ public function fixed_pages_edd( array $pages ): array { // Guard both the class and the helper function: some EDD builds load the // class without registering edd_get_option() (e.g. during plugin updates). if ( ! class_exists( 'Easy_Digital_Downloads' ) || ! function_exists( 'edd_get_option' ) ) { return $pages; } $edd_ids = array_filter( array_map( 'intval', array( edd_get_option( 'purchase_page' ), edd_get_option( 'success_page' ), edd_get_option( 'failure_page' ), edd_get_option( 'purchase_history_page' ), ) ) ); return array_merge( $pages, array_values( $edd_ids ) ); } // ── Scan column UI ──────────────────────────────────────────────────────── /** * Populate WSC state for the scan column (pro-only). * * @param array $state Default state array from free. * @param int $post_id Post ID. * @return array */ public function scan_column_wsc_state( array $state, int $post_id ): array { $wsc_scanned_at = (int) get_post_meta( $post_id, '_cmplz_wsc_scanned_post', true ); static $batch_queue = null; if ( null === $batch_queue ) { $map = get_option( 'cmplz_wsc_batch_map', array() ); $batch_queue = isset( $map['queue'] ) && is_array( $map['queue'] ) ? $map['queue'] : array(); } $state['wsc_scanned_at'] = $wsc_scanned_at; $state['wsc_done'] = $wsc_scanned_at > 0; $state['wsc_inflight'] = COMPLIANZ::$wsc_registry->has_active_per_post_for_post_id( $post_id ); $state['in_wsc_cooldown'] = $wsc_scanned_at > 0 && ( time() - $wsc_scanned_at ) < $this->get_batch_cooldown(); $state['in_batch_queue'] = in_array( $post_id, $batch_queue, true ); return $state; } /** * Render the WSC scan button for the post column (pro-only). * * Hooked to cmplz_render_scan_column_button fired by render_scan_post_column(). * * Button cascade (evaluated top-to-bottom): * 1. WSC not authenticated → "Activate Website Scan" link → onboarding. * 2. WSC authenticated + disabled → "Scan" disabled + tooltip. * 3. WSC inflight → "Scanning…" disabled. * 4. In batch queue → "Scan" disabled + tooltip (queued). * 5. In 30-day cooldown → "Scan" disabled + tooltip (days remaining). * 6. Ready → "Scan" active button. * * @param int $post_id Post ID. * @param array $state WSC state array populated by scan_column_wsc_state(). */ public function render_scan_column_button( int $post_id, array $state ): void { $wsc_authed = class_exists( 'cmplz_wsc_auth' ) && cmplz_wsc_auth::wsc_is_authenticated(); if ( ! $wsc_authed ) { $onboarding_url = admin_url( 'admin.php?page=complianz&websitescan#dashboard' ); echo '' . esc_html__( 'Activate Website Scan', 'complianz-gdpr' ) . ''; return; } $wsc_enabled = isset( COMPLIANZ::$wsc_scanner ) && COMPLIANZ::$wsc_scanner->wsc_scan_enabled(); if ( ! $wsc_enabled ) { echo '' . ''; return; } if ( $state['wsc_inflight'] ) { echo ''; return; } if ( ! empty( $state['in_batch_queue'] ) ) { echo '' . ''; return; } if ( $state['in_wsc_cooldown'] ) { $wsc_scanned_at = $state['wsc_scanned_at']; $days_since = (int) floor( ( time() - $wsc_scanned_at ) / DAY_IN_SECONDS ); $days_remaining = (int) ceil( ( $this->get_batch_cooldown() - ( time() - $wsc_scanned_at ) ) / DAY_IN_SECONDS ); if ( 0 === $days_since ) { $scanned_msg = __( 'Scanned today.', 'complianz-gdpr' ); } else { $scanned_msg = sprintf( /* translators: %d: number of days since last scan */ _n( 'Scanned %d day ago.', 'Scanned %d days ago.', $days_since, 'complianz-gdpr' ), $days_since ); } $rescan_msg = sprintf( /* translators: %d: number of days until re-scan is available */ _n( 'Re-scan available in %d day.', 'Re-scan available in %d days.', $days_remaining, 'complianz-gdpr' ), $days_remaining ); $tip = $scanned_msg . ' ' . $rescan_msg; echo '' . ''; return; } echo ''; } // ── WSC settings response extension ────────────────────────────────────── /** * Inject batch stats into the cmplz_wsc_actions_response filter value. * * Hooked to cmplz_wsc_actions_response fired by cmplz_wsc_settings::handle_wsc_actions(). * Adds batch_stats so the key only exists in premium responses. * * @param array $response Existing response array from handle_wsc_actions(). * @return array */ public function add_batch_stats_to_response( array $response ): array { $response['batch_stats'] = $this->get_batch_stats(); return $response; } /** * Return WSC per-post batch counts from the persistent batch map. * * The enabled flag mirrors the wsc_batch_scan_enabled opt-in so the React * UI can distinguish "batch disabled" (zero counts are not meaningful) * from "batch idle / completed" (zero counts mean nothing left to do). * * @return array{ enabled: bool, queue: int, inflight: int, done: int } */ public function get_batch_stats(): array { $map = get_option( self::BATCH_MAP_OPTION, array() ); return array( 'enabled' => (bool) cmplz_get_option( 'wsc_batch_scan_enabled' ), 'queue' => isset( $map['queue'] ) && is_array( $map['queue'] ) ? count( $map['queue'] ) : 0, 'inflight' => isset( $map['inflight'] ) && is_array( $map['inflight'] ) ? count( $map['inflight'] ) : 0, 'done' => isset( $map['done'] ) && is_array( $map['done'] ) ? count( $map['done'] ) : 0, ); } // ── WSC per-post scan (manual trigger) ──────────────────────────────────── /** * Fire a WSC per-post scan for a single URL (manual trigger). * * Used by the "Scan" button in the post list column via * COMPLIANZ::$wsc_scan_pro->start_post_scan(). Operates independently of * the batch queue — does not write to cmplz_wsc_batch_map. * * Guards (evaluated in order): * 1. WSC enabled. * 2. Per-post cooldown — skip if WSC scan completed within last hour. * 3. No duplicate in-flight scan for same post. * 4. Concurrency cap — max CONCURRENCY_CAP per-post scans in flight site-wide. * 5. Global rate limit — max 15 WSC API calls per 5-minute window. * 6. Post must have a valid permalink. * 7. WSC API must return a valid UUID not already in the registry * (duplicate UUID = WSC serialised a prior scan; reject to prevent stall). * * On success: registers scan in registry and clears META_WSC_SCANNED so the * column shows "In progress" instead of the previous stale "Scanned" state. * * @param int $post_id WordPress post ID. * @return string|null Null on success, error code string on failure. */ public function start_post_scan( int $post_id ): ?string { if ( ! COMPLIANZ::$wsc_scanner->wsc_scan_enabled() ) { return 'wsc_disabled'; } // META_WSC_SCANNED stores a Unix timestamp written by the webhook. // A cooldown prevents hammering the API on repeated manual triggers. $last_wsc = (int) get_post_meta( $post_id, self::META_WSC_SCANNED, true ); if ( $last_wsc && ( time() - $last_wsc ) < self::COOLDOWN_SECONDS ) { return 'in_cooldown'; } // Prevent launching a second scan for the same post while one is already // in flight — the registry TTL is 2 hours so duplicates could accumulate. if ( COMPLIANZ::$wsc_registry->has_active_per_post_for_post_id( $post_id ) ) { return 'already_inflight'; } // Site-wide cap: never hold more than CONCURRENCY_CAP per-post scans open. if ( count( COMPLIANZ::$wsc_registry->get_all_per_post() ) >= self::CONCURRENCY_CAP ) { return 'cap_reached'; } // Sliding-window limiter: max 15 WSC API calls per 5-minute window. if ( ! $this->check_rate_limit() ) { return 'rate_limited'; } $permalink = get_permalink( $post_id ); if ( ! $permalink ) { return 'no_permalink'; } // POST to scan.complianz.io — returns a scan_id string on success, falsy on failure. $scan_id = COMPLIANZ::$wsc_scanner->wsc_scan_start( $permalink ); if ( ! $scan_id || ! wp_is_uuid( $scan_id ) ) { return 'api_error'; } // The WSC API serialises scans: while one is running, any new request returns // the same UUID. If the returned scan_id already exists in the registry it // belongs to another scan — adding it would overwrite that entry and silently // drop the existing scan's webhook delivery. if ( COMPLIANZ::$wsc_registry->get_per_post_entry( $scan_id ) !== null ) { return 'api_error'; } // Register the scan so the webhook handler can validate and route it. COMPLIANZ::$wsc_registry->add_per_post( $scan_id, $post_id, $permalink ); // Clear the previous completion timestamp so the column status falls back // to "Queued" / "wsc_inflight" instead of showing the stale "Partial" state. delete_post_meta( $post_id, self::META_WSC_SCANNED ); return null; } // ── WSC batch scan ──────────────────────────────────────────────────────── /** * Dispatch WSC per-post scans for pending posts in the batch queue. * * Flow: * 1. Load the persistent batch map { queue, inflight, done }. * * 2. Recover stalled inflight entries: if a scan_id is no longer valid in the * registry (TTL expired without webhook), move the post back to queue front * so it is retried in this dispatch run. * * 3. If queue and inflight are both empty, fetch the next sub-cycle: * call batch_get_page_ids(done) to get the next max_items posts excluding * already-processed ones. When that returns empty, all posts have been covered * in this full cycle — reset done and start fresh. This allows sites with more * than max_items posts to progress through all posts across multiple sub-cycles. * * 4. Compute free slots = CONCURRENCY_CAP − count(inflight). Dequeue and dispatch posts until * slots are full or queue is empty. Per-post guards (in order): * a. Per-post WSC batch cooldown (< 30 days since last scan) → move to done, skip. * b. Missing permalink → move to done, skip. * c. Rate limit exhausted → put post back at queue front, break. * d. WSC API failure → move to done (transient; retried next cycle), skip. * e. Success → add to registry + inflight, clear _cmplz_wsc_scanned_post. * * 5. Persist updated map and return count of scans dispatched. * * Webhook-driven advancement: webhook_handler() schedules a cmplz_wsc_batch_dispatch * single cron event (+5 s) after moving a completed batch post from inflight → done. * The delay is required because webhooks fire in an unauthenticated REST context * where admin-only classes are unavailable, and because the WSC API may not free * the concurrency slot before an immediate dispatch would run. * * @return int Number of scans dispatched in this run. */ public function batch_dispatch(): int { if ( ! COMPLIANZ::$wsc_scanner->wsc_scan_enabled() ) { return 0; } // Background batch is opt-in (default off). Manual per-post scans (column // button) bypass this gate — they hit start_post_scan() directly. if ( ! cmplz_get_option( 'wsc_batch_scan_enabled' ) ) { return 0; } $map = $this->batch_get_map(); // ── Step 2: recover stalled inflight entries ────────────────────────── // A registry entry expires after 2 hours (SCAN_TTL). If the webhook never // arrived (e.g. WSC API timeout), the slot is silently held forever. // Before requeuing, poll the WSC API: if the scan completed but the webhook // was lost, process the result directly and mark as done instead of re-scanning. foreach ( array_keys( $map['inflight'] ) as $post_id ) { $scan_id = $map['inflight'][ $post_id ]; if ( ! COMPLIANZ::$wsc_registry->is_valid( $scan_id ) ) { $response = COMPLIANZ::$wsc_scanner->wsc_scan_retrieve_scan( $scan_id ); $data = ! is_wp_error( $response ) ? json_decode( wp_remote_retrieve_body( $response ) ) : null; if ( $data && isset( $data->status ) && 'completed' === $data->status ) { $cookies = $data->result->trackers ?? array(); if ( ! empty( $cookies ) ) { COMPLIANZ::$wsc_scanner->wsc_scan_store_cookies( $cookies ); } update_post_meta( $post_id, self::META_WSC_SCANNED, time() ); COMPLIANZ::$wsc_registry->remove( $scan_id ); unset( $map['inflight'][ $post_id ] ); $map['done'][] = $post_id; } else { unset( $map['inflight'][ $post_id ] ); array_unshift( $map['queue'], (int) $post_id ); } } } // ── Step 3: initialize next sub-cycle if idle ──────────────────────── // queue and inflight both empty means the current sub-cycle finished. // Fetch the next batch excluding already-done posts so large sites // (> max_items posts) progress through all posts across sub-cycles // without re-scanning the same set on every init. // When batch_get_page_ids() returns empty, every post has been processed // in this full cycle — reset done and start completely fresh. if ( empty( $map['queue'] ) && empty( $map['inflight'] ) ) { $map['queue'] = $this->batch_get_page_ids( $map['done'] ); if ( empty( $map['queue'] ) ) { // Full cycle complete — reset done and re-init from scratch. $map['done'] = array(); $map['queue'] = $this->batch_get_page_ids( array() ); } if ( empty( $map['queue'] ) ) { $this->batch_save_map( $map ); return 0; } } // ── Step 4: dispatch ────────────────────────────────────────────────── $remote_cap = (int) get_transient( 'cmplz_wsc_concurrency_cap' ); $cap = $remote_cap > 0 ? $remote_cap : self::CONCURRENCY_CAP; $slots_free = $cap - count( $map['inflight'] ); $dispatched = 0; while ( $slots_free > 0 && ! empty( $map['queue'] ) ) { $post_id = array_shift( $map['queue'] ); // (a) Cooldown: post scanned within 30 days — count as done, skip. $last_wsc = (int) get_post_meta( $post_id, self::META_WSC_SCANNED, true ); if ( $last_wsc && ( time() - $last_wsc ) < self::BATCH_COOLDOWN_SECONDS ) { $map['done'][] = $post_id; continue; } // (b) No public URL — cannot scan; skip for this cycle. $permalink = get_permalink( $post_id ); if ( ! $permalink ) { $map['done'][] = $post_id; continue; } // (c) Rate limit exhausted (15 calls / 5 min). // check_rate_limit() returns false WITHOUT incrementing when at max. if ( ! $this->check_rate_limit() ) { array_unshift( $map['queue'], $post_id ); break; } // (d) WSC API failure — transient error; move to done and retry next cycle. $scan_id = COMPLIANZ::$wsc_scanner->wsc_scan_start( $permalink ); if ( ! $scan_id || ! wp_is_uuid( $scan_id ) ) { $map['done'][] = $post_id; continue; } // The WSC API serialises scans: while one is running, any new scan request // returns the same UUID. Re-queue and stop — when the active scan's webhook // fires it will immediately call batch_dispatch() for the next post. if ( in_array( $scan_id, $map['inflight'], true ) ) { array_unshift( $map['queue'], $post_id ); break; } // (e) Success: register in TTL-backed registry, mark as inflight, // clear completion timestamp so the column shows "Queued"/"wsc_inflight". COMPLIANZ::$wsc_registry->add_per_post( $scan_id, $post_id, $permalink ); delete_post_meta( $post_id, self::META_WSC_SCANNED ); $map['inflight'][ $post_id ] = $scan_id; --$slots_free; ++$dispatched; } $this->batch_save_map( $map ); return $dispatched; } /** * Load and normalize the persistent batch map from the database. * * The map has three lists: * queue — post IDs waiting to be dispatched to the WSC API. * inflight — post IDs dispatched but webhook not yet received: { post_id => scan_id }. * done — post IDs completed (webhook received) or skipped in this cycle. * * @return array{ queue: int[], inflight: array, done: int[] } */ private function batch_get_map(): array { $raw = get_option( self::BATCH_MAP_OPTION, array() ); return array_merge( array( 'queue' => array(), 'inflight' => array(), 'done' => array(), ), is_array( $raw ) ? $raw : array() ); } /** * Persist the batch map to the database. * * @param array $map Normalized batch map. */ private function batch_save_map( array $map ): void { update_option( self::BATCH_MAP_OPTION, $map, false ); } /** * Return post IDs eligible for WSC batch scanning. * * Builds an independent list from all published posts of every scannable post * type — not from cmplz_pages_list, which only covers the current local batch. * * $exclude receives the current cycle's done list so that large sites * (> max_items posts) progress through ALL posts across sub-cycles rather * than re-scanning the same max_items set on every init. When called with * an empty exclude list, a full fresh cycle starts from scratch. * * Two separate queries to avoid a complex OR meta_query JOIN on wp_postmeta: * 1. Posts never scanned by WSC (NOT EXISTS) — highest priority, filled first. * 2. Posts with the oldest _cmplz_wsc_scanned_post timestamp (ASC) — fill remainder. * Merged and capped at cmplz_wsc_batch_max_items (default 200). * * Fixed pages (WooCommerce, EDD) registered via the cmplz_scan_fixed_pages filter * are prepended and excluded from the $exclude list so they are always dispatched * on every sub-cycle. * * @param int[] $exclude Post IDs already processed in the current cycle. * @return int[] */ private function batch_get_page_ids( array $exclude = array() ): array { $max_items = (int) apply_filters( 'cmplz_wsc_batch_max_items', self::DEFAULT_MAX_ITEMS ); $post_types = COMPLIANZ::$scan->get_scannable_post_types(); if ( empty( $post_types ) ) { return array(); } // Extract integer-only fixed pages (WooCommerce, EDD) from the shared filter. // String slots ('home', 'remote', 'loginpage') are discarded — they have no // permalink and would be silently skipped by the dispatch guard anyway. // Fixed pages bypass $exclude so they are dispatched on every sub-cycle. $fixed_ids = array_values( array_filter( array_map( 'intval', apply_filters( 'cmplz_scan_fixed_pages', array() ) ), static fn( int $id ) => $id > 0 ) ); // Posts to exclude from queries: done list minus fixed pages. // Fixed pages are always re-included regardless of done state. $diff = array_values( array_diff( $exclude, $fixed_ids ) ); $query_exclude = ! empty( $diff ) ? $diff : array( 0 ); $base_args = array( 'post_type' => $post_types, 'post_status' => 'publish', 'fields' => 'ids', 'no_found_rows' => true, 'post__not_in' => $query_exclude, ); // Query 1: posts never scanned by WSC — simple NOT EXISTS, no JOIN on value. $never_scanned = array_map( 'intval', get_posts( array_merge( $base_args, array( 'posts_per_page' => $max_items, 'meta_query' => array( array( 'key' => self::META_WSC_SCANNED, 'compare' => 'NOT EXISTS', ), ), ) ) ) ); if ( count( $never_scanned ) >= $max_items ) { return array_values( array_unique( array_merge( $fixed_ids, $never_scanned ) ) ); } // Query 2: posts with meta, ordered oldest-first. Only runs when Query 1 // doesn't fill the cap, so the JOIN cost is paid only when needed. $remaining = $max_items - count( $never_scanned ); $merged = array_merge( $query_exclude, $never_scanned ); $q2_exclude = ! empty( $merged ) ? $merged : array( 0 ); $oldest_scanned = array_map( 'intval', get_posts( array_merge( $base_args, array( 'posts_per_page' => $remaining, 'post__not_in' => $q2_exclude, 'meta_key' => self::META_WSC_SCANNED, 'meta_type' => 'NUMERIC', 'orderby' => 'meta_value_num', 'order' => 'ASC', ) ) ) ); return array_values( array_unique( array_merge( $fixed_ids, $never_scanned, $oldest_scanned ) ) ); } // ── Batch lifecycle ─────────────────────────────────────────────────────── /** * Clear the persistent batch map (UI reset / restart). * * Called by cmplz_scan::reset_scan() on manual reset or restart from the UI. * Posts scanned within the last 30 days are still protected by the * BATCH_COOLDOWN_SECONDS guard in batch_dispatch() and will be skipped * when the batch re-initialises. Only posts scanned > 30 days ago (or never) * will be re-dispatched. * * Not called on automatic monthly resets — those do not pass through reset_scan(). */ public function batch_reset(): void { delete_option( self::BATCH_MAP_OPTION ); } /** * Expose the batch cooldown duration for use in column UI rendering. * * @return int Cooldown in seconds. */ public function get_batch_cooldown(): int { return self::BATCH_COOLDOWN_SECONDS; } // ── WSC API rate limiter ────────────────────────────────────────────────── /** * Store WSC CB response throttle parameters as transients. * * Hooked on cmplz_wsc_api_open_response (free code escape hatch). * Reads scanner_concurrency_cap and scanner_rate_limit_max from the CB * response body and caches them for 10 minutes so batch_dispatch() and * check_rate_limit() can adapt without an extra HTTP call. * * @param object $body Decoded CB response body. * @param string $service Service key passed to wsc_api_open(). */ public function handle_cb_response( object $body, string $service ): void { if ( 'scanner' !== $service ) { return; } if ( isset( $body->scanner_concurrency_cap ) ) { set_transient( 'cmplz_wsc_concurrency_cap', (int) $body->scanner_concurrency_cap, 10 * MINUTE_IN_SECONDS ); } if ( isset( $body->scanner_rate_limit_max ) ) { set_transient( 'cmplz_wsc_rate_limit_max', (int) $body->scanner_rate_limit_max, 10 * MINUTE_IN_SECONDS ); } if ( isset( $body->scanner_rate_limit_window ) ) { $window = (int) $body->scanner_rate_limit_window; set_transient( 'cmplz_wsc_rate_limit_window', $window, max( 10 * MINUTE_IN_SECONDS, $window ) ); } } /** * Sliding-window rate limiter for WSC API calls. * * Allows max RATE_LIMIT_MAX per-post scan API calls per RATE_LIMIT_WINDOW seconds. * Both the cap (scanner_rate_limit_max) and the window (scanner_rate_limit_window) * can be overridden remotely via the CB endpoint. * * Returns false WITHOUT incrementing the counter when the limit is reached, * so callers can safely use the return value as a break signal. * * @return bool True if the call is allowed. */ private function check_rate_limit(): bool { $cached = get_transient( self::RATE_LIMIT_KEY ); $data = $cached ? $cached : array( 'count' => 0, 'since' => time(), ); $remote_window = (int) get_transient( 'cmplz_wsc_rate_limit_window' ); $window = $remote_window > 0 ? $remote_window : self::RATE_LIMIT_WINDOW; if ( time() - $data['since'] >= $window ) { $data = array( 'count' => 0, 'since' => time(), ); } $remote_max = (int) get_transient( 'cmplz_wsc_rate_limit_max' ); $max = $remote_max > 0 ? $remote_max : self::RATE_LIMIT_MAX; if ( $data['count'] >= $max ) { return false; } ++$data['count']; set_transient( self::RATE_LIMIT_KEY, $data, $window * 2 ); return true; } } }