summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'MLEB/Translate/ttmserver/ElasticSearchTTMServer.php')
-rw-r--r--MLEB/Translate/ttmserver/ElasticSearchTTMServer.php109
1 files changed, 60 insertions, 49 deletions
diff --git a/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php b/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php
index ff4c7b79..87d5e582 100644
--- a/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php
+++ b/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php
@@ -8,6 +8,17 @@
* @ingroup TTMServer
*/
+use Elastica\Aggregation\Terms;
+use Elastica\Client;
+use Elastica\Document;
+use Elastica\Exception\ExceptionInterface;
+use Elastica\Query;
+use Elastica\Query\BoolQuery;
+use Elastica\Query\FunctionScore;
+use Elastica\Query\MatchQuery;
+use Elastica\Query\Term;
+use Elastica\Type\Mapping;
+use MediaWiki\Extension\Translate\TranslatorInterface\TranslationHelperException;
use MediaWiki\Logger\LoggerFactory;
/**
@@ -45,7 +56,7 @@ class ElasticSearchTTMServer
*/
protected const FROZEN_TYPE = 'frozen';
- /** @var \Elastica\Client */
+ /** @var Client */
protected $client;
/**
* Reference to the maintenance script to relay logging output.
@@ -76,7 +87,7 @@ class ElasticSearchTTMServer
if ( !$this->useWikimediaExtraPlugin() ) {
// ElasticTTM is currently not compatible with elasticsearch 2.x/5.x
// It needs FuzzyLikeThis ported via the wmf extra plugin
- throw new \RuntimeException( 'The wikimedia extra plugin is mandatory.' );
+ throw new RuntimeException( 'The wikimedia extra plugin is mandatory.' );
}
/* Two query system:
* 1) Find all strings in source language that match text
@@ -90,7 +101,7 @@ class ElasticSearchTTMServer
$fuzzyQuery->setLikeText( $text );
$fuzzyQuery->addFields( [ 'content' ] );
- $boostQuery = new \Elastica\Query\FunctionScore();
+ $boostQuery = new FunctionScore();
$boostQuery->addFunction(
'levenshtein_distance_score',
[
@@ -98,20 +109,20 @@ class ElasticSearchTTMServer
'field' => 'content'
]
);
- $boostQuery->setBoostMode( \Elastica\Query\FunctionScore::BOOST_MODE_REPLACE );
+ $boostQuery->setBoostMode( FunctionScore::BOOST_MODE_REPLACE );
// Wrap the fuzzy query so it can be used as a filter.
// This is slightly faster, as ES can throw away the scores by this query.
- $bool = new \Elastica\Query\BoolQuery();
+ $bool = new BoolQuery();
$bool->addFilter( $fuzzyQuery );
$bool->addMust( $boostQuery );
- $languageFilter = new \Elastica\Query\Term();
+ $languageFilter = new Term();
$languageFilter->setTerm( 'language', $sourceLanguage );
$bool->addFilter( $languageFilter );
// The whole query
- $query = new \Elastica\Query();
+ $query = new Query();
$query->setQuery( $bool );
// The interface usually displays three best candidates. These might
@@ -184,10 +195,10 @@ class ElasticSearchTTMServer
// Skip second query if first query found nothing. Keeping only one return
// statement in this method to avoid forgetting to reset connection timeout
if ( $terms !== [] ) {
- $idQuery = new \Elastica\Query\Terms();
+ $idQuery = new Query\Terms();
$idQuery->setTerms( '_id', $terms );
- $query = new \Elastica\Query( $idQuery );
+ $query = new Query( $idQuery );
$query->setSize( 25 );
$query->setParam( '_source', [ 'wiki', 'uri', 'content', 'localid' ] );
$resultset = $this->getType()->search( $query );
@@ -210,7 +221,7 @@ class ElasticSearchTTMServer
}
// Ensure results are in quality order
- uasort( $suggestions, function ( $a, $b ) {
+ uasort( $suggestions, static function ( $a, $b ) {
if ( $a['quality'] === $b['quality'] ) {
return 0;
}
@@ -231,8 +242,8 @@ class ElasticSearchTTMServer
*
* @param MessageHandle $handle
* @param ?string $targetText
- * @throws \RuntimeException
* @return bool
+ * @throws RuntimeException
*/
public function update( MessageHandle $handle, $targetText ) {
if ( !$handle->isValid() || $handle->getCode() === '' ) {
@@ -254,11 +265,11 @@ class ElasticSearchTTMServer
// Do not delete definitions, because the translations are attached to that
if ( $handle->getCode() !== $sourceLanguage ) {
$localid = $handle->getTitleForBase()->getPrefixedText();
- $this->deleteByQuery( $this->getType(), Elastica\Query::create(
- ( new \Elastica\Query\BoolQuery() )
- ->addFilter( new Elastica\Query\Term( [ 'wiki' => wfWikiID() ] ) )
- ->addFilter( new Elastica\Query\Term( [ 'language' => $handle->getCode() ] ) )
- ->addFilter( new Elastica\Query\Term( [ 'localid' => $localid ] ) ) ) );
+ $this->deleteByQuery( $this->getType(), Query::create(
+ ( new BoolQuery() )
+ ->addFilter( new Term( [ 'wiki' => wfWikiID() ] ) )
+ ->addFilter( new Term( [ 'language' => $handle->getCode() ] ) )
+ ->addFilter( new Term( [ 'localid' => $localid ] ) ) ) );
}
// If translation was made fuzzy, we do not need to add anything
@@ -274,7 +285,7 @@ class ElasticSearchTTMServer
function () use ( $doc ) {
$this->getType()->addDocument( $doc );
},
- function ( $e, $errors ) use ( $fname ) {
+ static function ( $e, $errors ) use ( $fname ) {
$c = get_class( $e );
$msg = $e->getMessage();
error_log( $fname . ": update failed ($c: $msg); retrying." );
@@ -289,7 +300,7 @@ class ElasticSearchTTMServer
* @param MessageHandle $handle
* @param string $text
* @param int $revId
- * @return \Elastica\Document
+ * @return Document
*/
protected function createDocument( MessageHandle $handle, $text, $revId ) {
$language = $handle->getCode();
@@ -307,7 +318,7 @@ class ElasticSearchTTMServer
'group' => $handle->getGroupIds(),
];
- return new \Elastica\Document( $globalid, $data );
+ return new Document( $globalid, $data );
}
/**
@@ -352,7 +363,7 @@ class ElasticSearchTTMServer
/**
* Begin the bootstrap process.
*
- * @throws \RuntimeException
+ * @throws RuntimeException
*/
public function beginBootstrap() {
$type = $this->getType();
@@ -366,10 +377,10 @@ class ElasticSearchTTMServer
$settings = $type->getIndex()->getSettings();
$settings->setRefreshInterval( '-1' );
- $this->deleteByQuery( $this->getType(), \Elastica\Query::create(
- ( new Elastica\Query\Term() )->setTerm( 'wiki', wfWikiID() ) ) );
+ $this->deleteByQuery( $this->getType(), Query::create(
+ ( new Term() )->setTerm( 'wiki', wfWikiID() ) ) );
- $mapping = new \Elastica\Type\Mapping();
+ $mapping = new Mapping();
$mapping->setType( $type );
$mapping->setProperties( [
'wiki' => [ 'type' => 'keyword' ],
@@ -424,7 +435,7 @@ class ElasticSearchTTMServer
public function batchInsertTranslations( array $batch ) {
$docs = [];
foreach ( $batch as $data ) {
- list( $handle, $sourceLanguage, $text ) = $data;
+ [ $handle, $sourceLanguage, $text ] = $data;
$revId = $handle->getTitleForLanguage( $sourceLanguage )->getLatestRevID();
$docs[] = $this->createDocument( $handle, $text, $revId );
}
@@ -456,9 +467,9 @@ class ElasticSearchTTMServer
public function getClient() {
if ( !$this->client ) {
if ( isset( $this->config['config'] ) ) {
- $this->client = new \Elastica\Client( $this->config['config'] );
+ $this->client = new Client( $this->config['config'] );
} else {
- $this->client = new \Elastica\Client();
+ $this->client = new Client();
}
}
return $this->client;
@@ -500,7 +511,7 @@ class ElasticSearchTTMServer
$path = "_cluster/health/$indexName";
$response = $this->getClient()->request( $path );
if ( $response->hasError() ) {
- throw new \Exception( "Error while fetching index health status: " . $response->getError() );
+ throw new Exception( "Error while fetching index health status: " . $response->getError() );
}
return $response->getData();
}
@@ -531,7 +542,7 @@ class ElasticSearchTTMServer
}
$this->logOutput( "\tIndex is $status retrying..." );
sleep( 5 );
- } catch ( \Exception $e ) {
+ } catch ( Exception $e ) {
$this->logOutput( "Error while waiting for green ({$e->getMessage()}), retrying..." );
}
}
@@ -600,14 +611,14 @@ class ElasticSearchTTMServer
// Allow searching either by message content or message id (page name
// without language subpage) with exact match only.
- $searchQuery = new \Elastica\Query\BoolQuery();
+ $searchQuery = new BoolQuery();
foreach ( $fields as $analyzer => $words ) {
foreach ( $words as $word ) {
- $boolQuery = new \Elastica\Query\BoolQuery();
- $contentQuery = new \Elastica\Query\Match();
+ $boolQuery = new BoolQuery();
+ $contentQuery = new MatchQuery();
$contentQuery->setFieldQuery( $analyzer, $word );
$boolQuery->addShould( $contentQuery );
- $messageQuery = new \Elastica\Query\Term();
+ $messageQuery = new Term();
$messageQuery->setTerm( 'localid', $word );
$boolQuery->addShould( $messageQuery );
@@ -631,8 +642,8 @@ class ElasticSearchTTMServer
$handle = new MessageHandle( $title );
if ( $handle->isValid() && $handle->getCode() !== '' ) {
$localid = $handle->getTitleForBase()->getPrefixedText();
- $boolQuery = new \Elastica\Query\BoolQuery();
- $messageId = new \Elastica\Query\Term();
+ $boolQuery = new BoolQuery();
+ $messageId = new Term();
$messageId->setTerm( 'localid', $localid );
$boolQuery->addMust( $messageId );
$searchQuery->addShould( $boolQuery );
@@ -651,17 +662,17 @@ class ElasticSearchTTMServer
* @return \Elastica\Search
*/
public function createSearch( $queryString, $opts, $highlight ) {
- $query = new \Elastica\Query();
+ $query = new Query();
- list( $searchQuery, $highlights ) = $this->parseQueryString( $queryString, $opts );
+ [ $searchQuery, $highlights ] = $this->parseQueryString( $queryString, $opts );
$query->setQuery( $searchQuery );
- $language = new \Elastica\Aggregation\Terms( 'language' );
+ $language = new Terms( 'language' );
$language->setField( 'language' );
$language->setSize( 500 );
$query->addAggregation( $language );
- $group = new \Elastica\Aggregation\Terms( 'group' );
+ $group = new Terms( 'group' );
$group->setField( 'group' );
// Would like to prioritize the top level groups and not show subgroups
// if the top group has only few hits, but that doesn't seem to be possile.
@@ -675,18 +686,18 @@ class ElasticSearchTTMServer
// multiple must clauses are executed by converting each filter into a bit
// field then anding them together. The latter is normally faster if either
// of the subfilters are reused. May not make a difference in this context.
- $filters = new \Elastica\Query\BoolQuery();
+ $filters = new BoolQuery();
$language = $opts['language'];
if ( $language !== '' ) {
- $languageFilter = new \Elastica\Query\Term();
+ $languageFilter = new Term();
$languageFilter->setTerm( 'language', $language );
$filters->addFilter( $languageFilter );
}
$group = $opts['group'];
if ( $group !== '' ) {
- $groupFilter = new \Elastica\Query\Term();
+ $groupFilter = new Term();
$groupFilter->setTerm( 'group', $group );
$filters->addFilter( $groupFilter );
}
@@ -699,7 +710,7 @@ class ElasticSearchTTMServer
$query->setPostFilter( $filters );
}
- list( $pre, $post ) = $highlight;
+ [ $pre, $post ] = $highlight;
$query->setHighlight( [
// The value must be an object
'pre_tags' => [ $pre ],
@@ -723,7 +734,7 @@ class ElasticSearchTTMServer
try {
return $search->search();
- } catch ( \Elastica\Exception\ExceptionInterface $e ) {
+ } catch ( ExceptionInterface $e ) {
throw new TTMServerException( $e->getMessage() );
}
}
@@ -786,19 +797,19 @@ class ElasticSearchTTMServer
* in 2.x and returned in 5.x.
*
* @param \Elastica\Type $type the source index
- * @param \Elastica\Query $query
- * @throws \RuntimeException
+ * @param Query $query
+ * @throws RuntimeException
*/
- private function deleteByQuery( \Elastica\Type $type, \Elastica\Query $query ) {
+ private function deleteByQuery( \Elastica\Type $type, Query $query ) {
try {
MWElasticUtils::deleteByQuery( $type->getIndex(), $query, /* $allowConflicts = */ true );
- } catch ( \Exception $e ) {
+ } catch ( Exception $e ) {
LoggerFactory::getInstance( 'ElasticSearchTTMServer' )->error(
'Problem encountered during deletion.',
[ 'exception' => $e ]
);
- throw new \RuntimeException( "Problem encountered during deletion.\n" . $e );
+ throw new RuntimeException( "Problem encountered during deletion.\n" . $e );
}
}
@@ -806,7 +817,7 @@ class ElasticSearchTTMServer
public function isFrozen() {
try {
return MWElasticUtils::isFrozen( $this->getClient() );
- } catch ( \Exception $e ) {
+ } catch ( Exception $e ) {
LoggerFactory::getInstance( 'ElasticSearchTTMServer' )->warning(
'Problem encountered while checking the frozen index.',
[ 'exception' => $e ]