This is the codeAbility Sharing Platform! Learn more about the codeAbility Sharing Platform.

Skip to content
Snippets Groups Projects

Resolve "Relevance Ranking in Metadata"

Merged Philipp Gritsch requested to merge 439-relevance-ranking-in-metadata into development
Viewing commit b8865a87
Show latest version
7 files
+ 139
392
Compare changes
  • Side-by-side
  • Inline
Files
7
@@ -7,19 +7,11 @@ import at.ac.uibk.gitsearch.repository.jpa.StatisticsRepository;
import at.ac.uibk.gitsearch.service.dto.AutoCompleteEntry;
import at.ac.uibk.gitsearch.service.dto.StatisticsDTO;
import co.elastic.clients.elasticsearch.ElasticsearchClient;
import co.elastic.clients.elasticsearch._types.ScriptSortType;
import co.elastic.clients.elasticsearch._types.SortOptions;
import co.elastic.clients.elasticsearch._types.SortOptionsBuilders;
import co.elastic.clients.elasticsearch._types.SortOrder;
import co.elastic.clients.elasticsearch._types.mapping.FieldType;
import co.elastic.clients.elasticsearch._types.query_dsl.BoolQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.ExistsQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.MatchPhrasePrefixQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.MatchQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.MultiMatchQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.PrefixQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.RangeQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.TermQuery;
import co.elastic.clients.elasticsearch._types.query_dsl.TextQueryType;
import co.elastic.clients.elasticsearch._types.query_dsl.*;
import co.elastic.clients.elasticsearch.core.search.Hit;
import co.elastic.clients.json.JsonData;
import co.elastic.clients.transport.rest_client.RestClientTransport;
@@ -33,22 +25,8 @@ import java.net.ConnectException;
import java.text.ParseException;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.*;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -60,10 +38,12 @@ import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import javax.annotation.PostConstruct;
import javax.ws.rs.NotFoundException;
import liquibase.repackaged.org.apache.commons.text.StringSubstitutor;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codeability.sharing.plugins.api.search.SearchInputDTO;
import org.codeability.sharing.plugins.api.search.SearchOrdering;
import org.codeability.sharing.plugins.api.search.SearchResultDTO;
import org.codeability.sharing.plugins.api.search.SearchResultsDTO;
import org.codeability.sharing.plugins.api.search.UserProvidedMetadataDTO.Person;
@@ -573,7 +553,7 @@ public class MetaDataRepository {
search
.index(SearchRepositoryConstants.INDEX_METADATA)
.query(q -> q.bool(query))
.sort(getDefaultSortStrategy())
.sort(getSortStrategy(searchInputDTO.getOrdering()))
.from(from)
.size(pageSize),
SearchResultDTO.class
@@ -593,39 +573,46 @@ public class MetaDataRepository {
}
}
private List<SortOptions> getDefaultSortStrategy() {
/** todo try this
* GET /my_index/_search
* {
* "query": {
* "match_all": {}
* },
* "sort": {
* "_script": {
* "type": "number",
* "script": {
* "lang": "painless",
* "source": "doc['fieldA'].value * params.factorA + doc['fieldB'].value * params.factorB",
* "params": {
* "factorA": 1.0,
* "factorB": 100.0
* }
* },
* "order": "desc"
* }
* }
* }
*/
return List.of(
getSortOptionForFieldAndOrder(SearchRepositoryConstants.SEARCHSTATISTICS_BADGEREWARDED, SortOrder.Desc, FieldType.Boolean),
getSortOptionForFieldAndOrder(SearchRepositoryConstants.SEARCHSTATISTICS_DOWNLOADS, SortOrder.Desc, FieldType.Integer),
getSortOptionForFieldAndOrder(SearchRepositoryConstants.SEARCHSTATISTICS_VIEWS, SortOrder.Desc, FieldType.Integer)
private List<SortOptions> getSortStrategy(SearchOrdering searchOrdering) {
String sortingScriptTemplate =
"double boost = 0;" +
"if (doc.containsKey('${badgeRewarded}') && doc['${badgeRewarded}'].size() > 0) {" +
" boost = boost + (doc['${badgeRewarded}'].value ? 1.0 : 0.0) * params.factorBadge;" +
"}" +
"if (doc.containsKey('${downloads}') && doc['${downloads}'].size() > 0) {" +
" boost = boost + doc['${downloads}'].value * params.factorDownloads;" +
"}" +
"if (doc.containsKey('${views}') && doc['${views}'].size() > 0) {" +
" boost = boost + doc['${views}'].value * params.factorViews;" +
"}" +
"return boost;";
Map<String, String> templateValueMapping = Map.of(
"badgeRewarded",
SearchRepositoryConstants.SEARCHSTATISTICS_BADGEREWARDED,
"downloads",
SearchRepositoryConstants.SEARCHSTATISTICS_DOWNLOADS,
"views",
SearchRepositoryConstants.SEARCHSTATISTICS_VIEWS
);
var scriptSort = SortOptionsBuilders.script(b ->
b
.type(ScriptSortType.Number)
.order(SortOrder.Desc)
.script(sb ->
sb.inline(isb ->
isb
.lang("painless")
.source(StringSubstitutor.replace(sortingScriptTemplate, templateValueMapping))
.params("factorBadge", JsonData.of(searchOrdering.getFactorBadge()))
.params("factorDownloads", JsonData.of(searchOrdering.getFactorDownloads()))
.params("factorViews", JsonData.of(searchOrdering.getFactorViews()))
)
)
);
}
private SortOptions getSortOptionForFieldAndOrder(String field, SortOrder order, FieldType fieldType) {
return SortOptionsBuilders.field(sb -> sb.field(field).unmappedType(fieldType).order(order).missing("_last"));
return List.of(SortOptionsBuilders.score(s -> s.order(SortOrder.Desc)), scriptSort);
}
/**
@@ -819,12 +806,14 @@ public class MetaDataRepository {
*/
private void addAuthorizationQueryWithJavaApi(Optional<User> user, BoolQuery.Builder queryBuilder) {
// Authorization restrictions
// the boost value of 0 tells elastic to exclude these restrictions from the scoring process
final TermQuery.Builder simplePublicQuery = new TermQuery.Builder()
.value("public")
.field(SearchRepositoryConstants.PROJECT_VISIBILITY)
.boost(0.0f);
final ExistsQuery.Builder publicVisibilityQuery = new ExistsQuery.Builder()
.field(SearchRepositoryConstants.METADATA_PUBLICVISIBILITY);
.field(SearchRepositoryConstants.METADATA_PUBLICVISIBILITY)
.boost(0.0f);
final BoolQuery.Builder publicQuery = new BoolQuery.Builder()
.should(q -> q.term(simplePublicQuery.build()))
.should(q -> q.exists(publicVisibilityQuery.build()));