summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'CheckUser/src/CompareService.php')
-rw-r--r--CheckUser/src/CompareService.php220
1 files changed, 220 insertions, 0 deletions
diff --git a/CheckUser/src/CompareService.php b/CheckUser/src/CompareService.php
new file mode 100644
index 00000000..f1ab687e
--- /dev/null
+++ b/CheckUser/src/CompareService.php
@@ -0,0 +1,220 @@
+<?php
+
+namespace MediaWiki\CheckUser;
+
+use Wikimedia\Rdbms\ILoadBalancer;
+use Wikimedia\Rdbms\Subquery;
+
+class CompareService extends ChangeService {
+ /** @var int */
+ private $limit;
+
+ /**
+ * @param ILoadBalancer $loadBalancer
+ * @param UserManager $userManager
+ * @param int $limit Maximum number of rows to access (T245499)
+ */
+ public function __construct(
+ ILoadBalancer $loadBalancer,
+ UserManager $userManager,
+ $limit = 100000
+ ) {
+ parent::__construct( $loadBalancer, $userManager );
+ $this->limit = $limit;
+ }
+
+ /**
+ * Get edits made from an ip
+ *
+ * @param string $ipHex
+ * @param string|null $excludeUser
+ * @return int
+ */
+ public function getTotalEditsFromIp(
+ string $ipHex,
+ string $excludeUser = null
+ ) : int {
+ $db = $this->loadBalancer->getConnectionRef( DB_REPLICA );
+ $conds = [
+ 'cuc_ip_hex' => $ipHex,
+ 'cuc_type' => [ RC_EDIT, RC_NEW ],
+ ];
+
+ if ( $excludeUser ) {
+ $conds[] = 'cuc_user_text != ' . $db->addQuotes( $excludeUser );
+ }
+
+ return $db->selectRowCount( 'cu_changes', '*', $conds, __METHOD__ );
+ }
+
+ /**
+ * Get the compare query info
+ *
+ * @param string[] $targets
+ * @param string[] $excludeTargets
+ * @param string $start
+ * @return array
+ */
+ public function getQueryInfo( array $targets, array $excludeTargets, string $start ): array {
+ $db = $this->loadBalancer->getConnectionRef( DB_REPLICA );
+
+ if ( $targets === [] ) {
+ throw new \LogicException( 'Cannot get query info when $targets is empty.' );
+ }
+ $limit = (int)( $this->limit / count( $targets ) );
+
+ $sqlText = [];
+ foreach ( $targets as $target ) {
+ $info = $this->getQueryInfoForSingleTarget( $target, $excludeTargets, $start, $limit );
+ if ( $info !== null ) {
+ if ( !$db->unionSupportsOrderAndLimit() ) {
+ unset( $info['options']['ORDER BY'], $info['options']['LIMIT'] );
+ }
+
+ $sqlText[] = $db->selectSQLText(
+ $info['tables'],
+ $info['fields'],
+ $info['conds'],
+ __METHOD__,
+ $info['options']
+ );
+ }
+ }
+
+ $derivedTable = $db->unionQueries( $sqlText, $db::UNION_DISTINCT );
+
+ return [
+ 'tables' => [ 'a' => new Subquery( $derivedTable ) ],
+ 'fields' => [
+ 'cuc_user' => 'a.cuc_user',
+ 'cuc_user_text' => 'a.cuc_user_text',
+ 'cuc_ip' => 'a.cuc_ip',
+ 'cuc_ip_hex' => 'a.cuc_ip_hex',
+ 'cuc_agent' => 'a.cuc_agent',
+ 'first_edit' => 'MIN(a.cuc_timestamp)',
+ 'last_edit' => 'MAX(a.cuc_timestamp)',
+ 'total_edits' => 'count(*)',
+ ],
+ 'options' => [
+ 'GROUP BY' => [
+ 'cuc_user_text',
+ 'cuc_ip_hex',
+ 'cuc_agent',
+ ],
+ ],
+ ];
+ }
+
+ /**
+ * Get the query info for a single target.
+ *
+ * For the main investigation, this becomes a subquery that contributes to a derived
+ * table, used by getQueryInfo.
+ *
+ * For a limit check, this query is used to check whether the number of results for
+ * the target exceed the limit-per-target in getQueryInfo.
+ *
+ * @param string $target
+ * @param string[] $excludeTargets
+ * @param string $start
+ * @param int $limitPerTarget
+ * @param bool $limitCheck
+ * @return array|null Return null for invalid target
+ */
+ public function getQueryInfoForSingleTarget(
+ string $target,
+ array $excludeTargets,
+ string $start,
+ int $limitPerTarget,
+ $limitCheck = false
+ ) : ?array {
+ if ( $limitCheck ) {
+ $orderBy = null;
+ $offset = $limitPerTarget;
+ $limit = 1;
+ } else {
+ $orderBy = 'cuc_timestamp DESC';
+ $offset = null;
+ $limit = $limitPerTarget;
+ }
+
+ $conds = $this->buildTargetConds( $target );
+ if ( $conds === [] ) {
+ return null;
+ }
+
+ $conds = array_merge(
+ $conds,
+ $this->buildExcludeTargetsConds( $excludeTargets ),
+ $this->buildStartConds( $start )
+ );
+
+ $conds['cuc_type'] = [ RC_EDIT, RC_NEW ];
+
+ return [
+ 'tables' => 'cu_changes',
+ 'fields' => [
+ 'cuc_id',
+ 'cuc_user',
+ 'cuc_user_text',
+ 'cuc_ip',
+ 'cuc_ip_hex',
+ 'cuc_agent',
+ 'cuc_timestamp',
+ ],
+ 'conds' => $conds,
+ 'options' => [
+ 'ORDER BY' => $orderBy,
+ 'LIMIT' => $limit,
+ 'OFFSET' => $offset,
+ ],
+ ];
+ }
+
+ /**
+ * Check if we have incomplete data for any of the targets.
+ *
+ * @param string[] $targets
+ * @param string[] $excludeTargets
+ * @param string $start
+ * @return string[]
+ */
+ public function getTargetsOverLimit(
+ array $targets,
+ array $excludeTargets,
+ string $start
+ ) : array {
+ if ( $targets === [] ) {
+ return $targets;
+ }
+
+ $db = $this->loadBalancer->getConnectionRef( DB_REPLICA );
+
+ // If the database does not support order and limit on a UNION
+ // then none of the targets can be over the limit.
+ if ( !$db->unionSupportsOrderAndLimit() ) {
+ return [];
+ }
+
+ $targetsOverLimit = [];
+ $offset = (int)( $this->limit / count( $targets ) );
+
+ foreach ( $targets as $target ) {
+ $info = $this->getQueryInfoForSingleTarget( $target, $excludeTargets, $start, $offset, true );
+ if ( $info !== null ) {
+ $limitCheck = $db->select(
+ $info['tables'],
+ $info['fields'],
+ $info['conds'],
+ __METHOD__,
+ $info['options']
+ );
+ if ( $limitCheck->numRows() > 0 ) {
+ $targetsOverLimit[] = $target;
+ }
+ }
+ }
+
+ return $targetsOverLimit;
+ }
+}