diff --git a/Dockerfile b/Dockerfile index 1890f73..a82d53c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,14 @@ FROM maven:3.8.6-openjdk-18-slim -RUN mkdir app-repo - +RUN mkdir /app-repo ADD . /app-repo/ - WORKDIR /app-repo RUN mvn clean - RUN mvn package spring-boot:repackage EXPOSE 3003 -ENTRYPOINT ["java"] +ENV SPRING_PROFILES_ACTIVE=docker -CMD ["-jar", "./target/repo-0.0.1-SNAPSHOT.jar"] +ENTRYPOINT ["java", "-jar", "./target/repo-0.0.1-SNAPSHOT.jar"] diff --git a/src/main/java/upc/edu/gessi/repo/AppConfig.java b/src/main/java/upc/edu/gessi/repo/AppConfig.java new file mode 100644 index 0000000..ba55521 --- /dev/null +++ b/src/main/java/upc/edu/gessi/repo/AppConfig.java @@ -0,0 +1,14 @@ +package upc.edu.gessi.repo; + +import org.springframework.boot.web.client.RestTemplateBuilder; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.client.RestTemplate; + +@Configuration +public class AppConfig { + @Bean + public RestTemplate restTemplate(RestTemplateBuilder builder) { + return builder.build(); + } +} diff --git a/src/main/java/upc/edu/gessi/repo/controller/InductiveKnowledgeAPI.java b/src/main/java/upc/edu/gessi/repo/controller/InductiveKnowledgeAPI.java index 6ce32fd..093ce3c 100644 --- a/src/main/java/upc/edu/gessi/repo/controller/InductiveKnowledgeAPI.java +++ b/src/main/java/upc/edu/gessi/repo/controller/InductiveKnowledgeAPI.java @@ -15,31 +15,33 @@ public interface InductiveKnowledgeAPI extends BaseAPI { @GetMapping("/last-review") int getLastReview(); - @PostMapping("/derived-NL-Features") + @PostMapping("/derived-nl-features") int derivedNLFeatures(@RequestParam(value = "documentType") DocumentType documentType, - @RequestParam(value = "batch-size") Integer batchSize, - @RequestParam(value = "from") Integer from); + @RequestParam(value = "batch-size", defaultValue = "0") Integer batchSize, + @RequestParam(value = "from", defaultValue = "0") Integer from, + @RequestParam(value = "feature-model", defaultValue = "transfeatex") String featureModel); - @PostMapping("/compute-Feature-Similarity") + @PostMapping("/compute-feature-similarity") void computeFeatureSimilarity(@RequestParam(defaultValue = "0.5", name = "threshold") double synonymThreshold); - @DeleteMapping("/Feature-Similarities") + + @DeleteMapping("/feature-similarities") void deleteFeatureSimilarities(); - @PostMapping("/compute-Similarity") + @PostMapping("/compute-similarity") void computeSimilarity(@RequestParam(defaultValue = "JACCARD") SimilarityAlgorithm algorithm); - @GetMapping("/find-Similar-Apps") + @GetMapping("/find-similar-apps") Map> getTopKSimilarApps(@RequestBody List apps, @RequestParam Integer k, @RequestParam DocumentType documentType); - @GetMapping("/find-Apps-By-Feature") + @GetMapping("/find-apps-by-feature") Map> findAppsByFeature(@RequestBody List features, @RequestParam Integer k, @RequestParam DocumentType documentType); - @GetMapping("/find-Apps-By-Features") + @GetMapping("/find-apps-by-features") List findAppsByFeatures(@RequestBody List features, @RequestParam Integer k, @RequestParam DocumentType documentType); diff --git a/src/main/java/upc/edu/gessi/repo/controller/impl/InductiveKnowledgeController.java b/src/main/java/upc/edu/gessi/repo/controller/impl/InductiveKnowledgeController.java index 89cf6ae..0388efc 100644 --- a/src/main/java/upc/edu/gessi/repo/controller/impl/InductiveKnowledgeController.java +++ b/src/main/java/upc/edu/gessi/repo/controller/impl/InductiveKnowledgeController.java @@ -46,16 +46,16 @@ public int getLastReview() { } @Override - @PostMapping("/derived-NL-Features") - public int derivedNLFeatures(@RequestParam(value = "documentType", defaultValue = "DESCRIPTION") DocumentType documentType, - @RequestParam(value = "batch-size", defaultValue = "10") Integer batchSize, - @RequestParam(value = "from", defaultValue = "0") Integer from) { + public int derivedNLFeatures(final DocumentType documentType, + final Integer batchSize, + final Integer from, + final String featureModel) { logger.info("Generating derived deductive knowledge from natural language documents"); logger.info("Document type: " + documentType); if (documentType.equals(DocumentType.REVIEWS)) { logger.info("Deducting features from reviews..."); try { - return ((FeatureService) useService(FeatureService.class)).extractFeaturesFromReviews(batchSize, from); + return ((FeatureService) useService(FeatureService.class)).extractFeaturesFromReviews(batchSize, from, featureModel); } catch (Exception e) { return dbConnection.getCount(); } diff --git a/src/main/java/upc/edu/gessi/repo/dto/AnalyzedDocument.java b/src/main/java/upc/edu/gessi/repo/dto/AnalyzedDocumentDTO.java similarity index 79% rename from src/main/java/upc/edu/gessi/repo/dto/AnalyzedDocument.java rename to src/main/java/upc/edu/gessi/repo/dto/AnalyzedDocumentDTO.java index 1b54555..2ac64c4 100644 --- a/src/main/java/upc/edu/gessi/repo/dto/AnalyzedDocument.java +++ b/src/main/java/upc/edu/gessi/repo/dto/AnalyzedDocumentDTO.java @@ -3,18 +3,18 @@ import java.io.Serializable; import java.util.List; -public class AnalyzedDocument implements Serializable { +public class AnalyzedDocumentDTO implements Serializable { private String id; private String text; private List features; - public AnalyzedDocument(String id, List features) { + public AnalyzedDocumentDTO(String id, List features) { this.id = id; this.features = features; } - public AnalyzedDocument(String id, String text) { + public AnalyzedDocumentDTO(String id, String text) { this.id = id; this.text = text; } diff --git a/src/main/java/upc/edu/gessi/repo/dto/Review/HUBResponseDTO.java b/src/main/java/upc/edu/gessi/repo/dto/Review/HUBResponseDTO.java new file mode 100644 index 0000000..c4984d0 --- /dev/null +++ b/src/main/java/upc/edu/gessi/repo/dto/Review/HUBResponseDTO.java @@ -0,0 +1,15 @@ +package upc.edu.gessi.repo.dto.Review; + +import java.util.List; + +public class HUBResponseDTO { + private List analyzed_reviews; + + public List getAnalyzed_reviews() { + return analyzed_reviews; + } + + public void setAnalyzed_reviews(List analyzed_reviews) { + this.analyzed_reviews = analyzed_reviews; + } +} diff --git a/src/main/java/upc/edu/gessi/repo/repository/ReviewRepository.java b/src/main/java/upc/edu/gessi/repo/repository/ReviewRepository.java index 4d6d0a8..1fcebe1 100644 --- a/src/main/java/upc/edu/gessi/repo/repository/ReviewRepository.java +++ b/src/main/java/upc/edu/gessi/repo/repository/ReviewRepository.java @@ -22,4 +22,10 @@ void addSentenceToReview(String reviewId, List getReviews(String nodeId); SentenceDTO getSentenceDTO(TupleQueryResult result); + + List findBatched(final int limit, final int offset); + + List findAllSimplified(); + + Integer getCount(); } diff --git a/src/main/java/upc/edu/gessi/repo/repository/impl/ReviewRepositoryImpl.java b/src/main/java/upc/edu/gessi/repo/repository/impl/ReviewRepositoryImpl.java index 263f20c..d5b743a 100644 --- a/src/main/java/upc/edu/gessi/repo/repository/impl/ReviewRepositoryImpl.java +++ b/src/main/java/upc/edu/gessi/repo/repository/impl/ReviewRepositoryImpl.java @@ -1,5 +1,6 @@ package upc.edu.gessi.repo.repository.impl; +import io.swagger.models.auth.In; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; @@ -130,7 +131,7 @@ public List findListed(List reviewIds) throws NoReviewsFoundE } List reviewDTOs = new ArrayList<>(); while (reviewsResult.hasNext()) { - ReviewDTO reviewDTO = getReviewDTO(reviewsResult); + ReviewDTO reviewDTO = getReviewDTO(reviewsResult.next()); reviewDTOs.add(reviewDTO); } return reviewDTOs; @@ -142,7 +143,10 @@ public IRI insert(ReviewDTO dto) { if (dto.getId() != null) { IRI reviewIRI = factory.createIRI(schemaIRI.getReviewIRI() + "/" + dto.getId()); statements.add(factory.createStatement(reviewIRI, schemaIRI.getTypeIRI(), schemaIRI.getReviewIRI())); - IRI applicationIRI = factory.createIRI(schemaIRI.getAppIRI() + "/" + dto.getPackageName()); + IRI applicationIRI = null; + if (dto.getPackageName() != null) { + applicationIRI = factory.createIRI(schemaIRI.getAppIRI() + "/" + dto.getPackageName()); + } if (applicationIRI != null) { statements.add(factory.createStatement(applicationIRI, schemaIRI.getReviewsIRI(), reviewIRI)); } @@ -280,6 +284,51 @@ public SentenceDTO getSentenceDTO(final TupleQueryResult result) { return sentenceDTO; } + @Override + public List findBatched(int limit, int offset) { + String query = reviewQueryBuilder.findAllQueryWithLimitOffset(limit, offset); + TupleQueryResult result = Utils.runSparqlSelectQuery(repository.getConnection(), query); + List reviewDTOList = new ArrayList<>(); + while (result.hasNext()) { + reviewDTOList.add(getReviewDTO(result.next())); + } + return reviewDTOList; + } + + @Override + public List findAllSimplified() { + String query = reviewQueryBuilder.findAllSimplifiedQuery(); + TupleQueryResult result = Utils.runSparqlSelectQuery(repository.getConnection(), query); + List reviewDTOList = new ArrayList<>(); + while (result.hasNext()) { + reviewDTOList.add(getReviewDTO(result.next())); + } + return reviewDTOList; + } + + + @Override + public Integer getCount() { + String query = reviewQueryBuilder.getCountQuery(); + TupleQueryResult result = Utils.runSparqlSelectQuery(repository.getConnection(), query); + + try { + if (result.hasNext()) { + BindingSet bindingSet = result.next(); + String countStr = bindingSet.getValue("count").stringValue(); + return Integer.parseInt(countStr); + } else { + return 0; + } + } catch (Exception e) { + e.printStackTrace(); + return null; + } finally { + result.close(); + } + } + + private void commitChanges(final List statements) { RepositoryConnection repoConnection = repository.getConnection(); repoConnection.add(statements); @@ -288,16 +337,25 @@ private void commitChanges(final List statements) { - private ReviewDTO getReviewDTO(final TupleQueryResult result) { + private ReviewDTO getReviewDTO(final BindingSet bindings) { ReviewDTO ReviewDTO = new ReviewDTO(); - BindingSet bindings = result.next(); if (existsShortReviewBinding(bindings)) { - String idValue = bindings.getBinding("id").getValue().stringValue(); - String textValue = bindings.getBinding("text").getValue().stringValue(); - String appValue = bindings.getBinding("app_identifier").getValue().stringValue(); - ReviewDTO.setId(idValue); - ReviewDTO.setReviewText(textValue); - ReviewDTO.setApplicationId(appValue); + if (bindings.getBinding("id") != null && bindings.getBinding("id").getValue() != null) { + String idValue = bindings.getBinding("id").getValue().stringValue(); + ReviewDTO.setId(idValue); + } + + if (bindings.getBinding("text") != null && bindings.getBinding("text").getValue() != null) { + String textValue = bindings.getBinding("text").getValue().stringValue(); + ReviewDTO.setReviewText(textValue); + } + + if (bindings.getBinding("app_identifier") != null && bindings.getBinding("app_identifier").getValue() != null) { + String appValue = bindings.getBinding("app_identifier").getValue().stringValue(); + ReviewDTO.setApplicationId(appValue); + + } + if (bindings.getBinding("date") != null && bindings.getBinding("date").getValue() != null) { String dateString = bindings.getBinding("date").getValue().stringValue(); try { @@ -329,8 +387,6 @@ private boolean existsShortReviewBinding(BindingSet bindings) { return bindings.getBinding("id") != null && bindings.getBinding("id").getValue() != null && bindings.getBinding("text") != null - && bindings.getBinding("text").getValue() != null - && bindings.getBinding("app_identifier") != null - && bindings.getBinding("app_identifier").getValue() != null; + && bindings.getBinding("text").getValue() != null; } } diff --git a/src/main/java/upc/edu/gessi/repo/service/FeatureService.java b/src/main/java/upc/edu/gessi/repo/service/FeatureService.java index 60d7c8d..e2fecc7 100644 --- a/src/main/java/upc/edu/gessi/repo/service/FeatureService.java +++ b/src/main/java/upc/edu/gessi/repo/service/FeatureService.java @@ -6,5 +6,5 @@ public interface FeatureService extends CrudService { void extractFeaturesByDocument(DocumentType documentType, int batchSize); - int extractFeaturesFromReviews(int batchSize, int from); + int extractFeaturesFromReviews(int batchSize, int from, String featureModel); } diff --git a/src/main/java/upc/edu/gessi/repo/service/ReviewService.java b/src/main/java/upc/edu/gessi/repo/service/ReviewService.java index 83a205c..a514331 100644 --- a/src/main/java/upc/edu/gessi/repo/service/ReviewService.java +++ b/src/main/java/upc/edu/gessi/repo/service/ReviewService.java @@ -2,5 +2,12 @@ import upc.edu.gessi.repo.dto.Review.ReviewDTO; +import java.util.List; + public interface ReviewService extends CrudService { + List getBatched(final int batch, final int offset); + + List getAllSimplified(); + + Integer getReviewCount(); } diff --git a/src/main/java/upc/edu/gessi/repo/service/impl/FeatureServiceImpl.java b/src/main/java/upc/edu/gessi/repo/service/impl/FeatureServiceImpl.java index 2848e06..d115bc8 100644 --- a/src/main/java/upc/edu/gessi/repo/service/impl/FeatureServiceImpl.java +++ b/src/main/java/upc/edu/gessi/repo/service/impl/FeatureServiceImpl.java @@ -1,5 +1,7 @@ package upc.edu.gessi.repo.service.impl; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.ValueFactory; @@ -16,6 +18,7 @@ import org.springframework.stereotype.Service; import upc.edu.gessi.repo.dto.*; import upc.edu.gessi.repo.dto.MobileApplication.MobileApplicationFullDataDTO; +import upc.edu.gessi.repo.dto.Review.ReviewDTO; import upc.edu.gessi.repo.dto.graph.*; import upc.edu.gessi.repo.exception.NoObjectFoundException; import upc.edu.gessi.repo.exception.ObjectNotFoundException; @@ -26,8 +29,10 @@ import upc.edu.gessi.repo.util.SchemaIRI; import upc.edu.gessi.repo.util.Utils; +import java.io.File; import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; @Service @Lazy @@ -75,137 +80,93 @@ public FeatureServiceImpl( repositoryFactory = repoFact; } - private void runFeatureExtractionBatch(List analyzedDocuments, List source, int count, IRI appIRI) { - List features = nlFeatureServiceImpl.getNLFeatures(analyzedDocuments); - List statements = new ArrayList<>(); - - for (int i = 0; i < features.size(); ++i) { - MobileApplicationFullDataDTO completeApplicationDataDTO = new MobileApplicationFullDataDTO(); - List featureString = features.get(i).getFeatures(); - List featureList = new ArrayList<>(); - for (String fs : featureString) { - featureList.add(new Feature(appIRI.toString(), fs)); - } - completeApplicationDataDTO.setFeatures( - featureList - .stream() - .map(Feature::getName) - .toList()); - try { - applicationService - .addFeatures( - completeApplicationDataDTO, - source.get(i), - statements); - } catch (Exception e) { - logger.error("There was some problem inserting features for app " + appIRI.toString() + ". Please try again later."); - } + @Override + public void extractFeaturesByDocument(DocumentType documentType, int batchSize) { + String predicateQueue = null; + switch(documentType) { + case SUMMARY -> predicateQueue = "abstract"; + case CHANGELOG -> predicateQueue = "releaseNotes"; + default -> predicateQueue = "description"; } - commitChanges(statements); - logger.info(count + " documents already processed. Keep going..."); + String predicate1 = "https://schema.org/" + predicateQueue; + String predicate2 = "https://schema.org/text"; + String query = "SELECT ?subject ?object ?text WHERE { ?subject <" + predicate1 + "> ?object . ?object <"+ predicate2 +"> ?text}"; + executeFeatureQuery(repository.getConnection(), query, batchSize, 0); } - private List getFeatures(String nodeId) { - List features = new ArrayList<>(); - - String query = "PREFIX schema: \n" + - "\n" + - "select ?feature ?name where {\n" + - " <"+ nodeId +"> schema:feature ?keywords .\n" + - " ?feature schema:name ?name\n" + - "} "; - - TupleQueryResult result = Utils.runSparqlSelectQuery(repository.getConnection(), query); - while (result.hasNext()) { - BindingSet bindings = result.next(); + @Override + public int extractFeaturesFromReviews(int batchSize, int from, String featureModel) { + List reviewDTOList = new ArrayList<>(); + if (batchSize == 0 && from == 0) { + int reviewCount = reviewServiceImpl.getReviewCount(); + int batchSizeAux = 1000; + int fromAux = 0; + int totalReviewsProcessed = 0; + try { + while (totalReviewsProcessed <= reviewCount) { + List reviewAuxList = reviewServiceImpl.getBatched(batchSizeAux, fromAux); + if (reviewAuxList.isEmpty()) { + break; + } else { + reviewDTOList.addAll(reviewAuxList); + } + fromAux += batchSizeAux; + totalReviewsProcessed += reviewAuxList.size(); + logger.info("Retrieved {} reviews, total reviews processed: {}", reviewAuxList.size(), totalReviewsProcessed); + } - IRI feature = (IRI) bindings.getValue("feature"); - String name = bindings.getValue("name").stringValue(); + } catch (Exception e) { + Utils.serializeReviews(reviewDTOList, logger); + } + } else { + reviewDTOList = reviewServiceImpl.getBatched(batchSize, from); + logger.info("Retrieved {} reviews starting from offset {}", reviewDTOList.size(), from); - GraphFeature graphFeature = new GraphFeature(feature.toString(), name); - features.add(graphFeature); } + List analyzedReviewsDTOList = nlFeatureServiceImpl.getHUBFeatures(reviewDTOList, featureModel); + reviewServiceImpl.create(analyzedReviewsDTOList); + logger.info("Processed and created {} analyzed reviews", analyzedReviewsDTOList.size()); - return features; + return 0; } - private void commitChanges(final List statements) { - RepositoryConnection repoConnection = repository.getConnection(); - repoConnection.add(statements); - repoConnection.close(); - } - private int executeFeatureQuery(RepositoryConnection repoConnection, String query, int batchSize, int from) { - Integer count; - TupleQueryResult result = Utils.runSparqlSelectQuery(repoConnection, query); - - List analyzedDocuments = new ArrayList<>(); - List source = new ArrayList<>(); - - count = 1; - while (result.hasNext()) { - BindingSet bindings = result.next(); - if (count >= from) { - try { - IRI appIRI = (IRI) bindings.getValue("subject"); - IRI documentIRI = (IRI) bindings.getValue("object"); - String text = bindings.getValue("text").stringValue(); - analyzedDocuments.add(new AnalyzedDocument(documentIRI.toString(), text)); - if (documentIRI.toString().contains(schemaIRI.getReviewIRI().toString())) { - String reviewSource = schemaIRI.getDigitalDocumentIRI().toString() - + appIRI.toString().replace(schemaIRI.getAppIRI().toString(), "") - + "-" + DocumentType.REVIEWS; - documentIRI = factory.createIRI(reviewSource); - } - - source.add(documentIRI); - - if (count % batchSize == 0) { - runFeatureExtractionBatch(analyzedDocuments, source, count, appIRI); - - analyzedDocuments = new ArrayList<>(); - source = new ArrayList<>(); - } - } catch (Exception e) { - return count; - } - } - ++count; - } + @Override + public List create(List dtos) { + return null; + } - // Run last batch - if (count % batchSize != 1) - runFeatureExtractionBatch(analyzedDocuments, source, count, schemaIRI.getAppIRI()); + @Override + public Feature get(String id) throws ObjectNotFoundException { + return null; + } - return -1; + @Override + public List getListed(List ids) throws NoObjectFoundException { + return null; + } + @Override + public List getAllPaginated(Integer page, Integer size) throws NoObjectFoundException { + return null; } @Override - public void extractFeaturesByDocument(DocumentType documentType, int batchSize) { - String predicateQueue = null; - switch(documentType) { - case SUMMARY -> predicateQueue = "abstract"; - case CHANGELOG -> predicateQueue = "releaseNotes"; - default -> predicateQueue = "description"; - } - String predicate1 = "https://schema.org/" + predicateQueue; - String predicate2 = "https://schema.org/text"; - String query = "SELECT ?subject ?object ?text WHERE { ?subject <" + predicate1 + "> ?object . ?object <"+ predicate2 +"> ?text}"; - executeFeatureQuery(repository.getConnection(), query, batchSize, 0); + public List getAll() { + return null; } @Override - public int extractFeaturesFromReviews(int batchSize, int from) { - String query = "SELECT ?subject ?object ?text WHERE {?subject ?object . " + - "?object ?text}"; + public void update(Feature entity) { - return executeFeatureQuery(repository.getConnection(), query, batchSize, from); } + @Override + public void delete(String id) { + } public List getAllFeatures() { String query = featureQueryBuilder.findAllFeaturesQuery(); TupleQueryResult result = Utils.runSparqlSelectQuery(repository.getConnection(), query); @@ -325,39 +286,121 @@ public void getAppsWithFeatures() { //return new Graph(nodes, edges); } - @Override - public List create(List dtos) { - return null; - } + private void runFeatureExtractionBatch(List analyzedDocumentDTOS, List source, int count, IRI appIRI) { + List features = nlFeatureServiceImpl.getNLFeatures(analyzedDocumentDTOS); + List statements = new ArrayList<>(); - @Override - public Feature get(String id) throws ObjectNotFoundException { - return null; + for (int i = 0; i < features.size(); ++i) { + MobileApplicationFullDataDTO completeApplicationDataDTO = new MobileApplicationFullDataDTO(); + List featureString = features.get(i).getFeatures(); + List featureList = new ArrayList<>(); + for (String fs : featureString) { + featureList.add(new Feature(appIRI.toString(), fs)); + } + completeApplicationDataDTO.setFeatures( + featureList + .stream() + .map(Feature::getName) + .toList()); + try { + applicationService + .addFeatures( + completeApplicationDataDTO, + source.get(i), + statements); + } catch (Exception e) { + logger.error("There was some problem inserting features for app " + appIRI.toString() + ". Please try again later."); + } + } + commitChanges(statements); + logger.info(count + " documents already processed. Keep going..."); } + private List getFeatures(String nodeId) { + List features = new ArrayList<>(); - @Override - public List getListed(List ids) throws NoObjectFoundException { - return null; - } + String query = "PREFIX schema: \n" + + "\n" + + "select ?feature ?name where {\n" + + " <"+ nodeId +"> schema:feature ?keywords .\n" + + " ?feature schema:name ?name\n" + + "} "; - @Override - public List getAllPaginated(Integer page, Integer size) throws NoObjectFoundException { - return null; + TupleQueryResult result = Utils.runSparqlSelectQuery(repository.getConnection(), query); + + while (result.hasNext()) { + BindingSet bindings = result.next(); + + IRI feature = (IRI) bindings.getValue("feature"); + String name = bindings.getValue("name").stringValue(); + + GraphFeature graphFeature = new GraphFeature(feature.toString(), name); + features.add(graphFeature); + } + + return features; + } + private void commitChanges(final List statements) { + RepositoryConnection repoConnection = repository.getConnection(); + repoConnection.add(statements); + repoConnection.close(); } - @Override - public List getAll() { - return null; + private int executeReviewFeatureQuery() { + return 0; } - @Override - public void update(Feature entity) { + private int executeFeatureQuery(RepositoryConnection repoConnection, String query, int batchSize, int from) { + Integer count; + TupleQueryResult result = Utils.runSparqlSelectQuery(repoConnection, query); - } + List analyzedDocumentDTOS = new ArrayList<>(); + List source = new ArrayList<>(); + + count = 1; + + while (result.hasNext()) { + BindingSet bindings = result.next(); + if (count >= from) { + try { + + IRI appIRI = (IRI) bindings.getValue("subject"); + IRI documentIRI = (IRI) bindings.getValue("object"); + String text = bindings.getValue("text").stringValue(); + + analyzedDocumentDTOS.add(new AnalyzedDocumentDTO(documentIRI.toString(), text)); + + if (documentIRI.toString().contains(schemaIRI.getReviewIRI().toString())) { + String reviewSource = schemaIRI.getDigitalDocumentIRI().toString() + + appIRI.toString().replace(schemaIRI.getAppIRI().toString(), "") + + "-" + DocumentType.REVIEWS; + documentIRI = factory.createIRI(reviewSource); + } + + source.add(documentIRI); + + if (count % batchSize == 0) { + runFeatureExtractionBatch(analyzedDocumentDTOS, source, count, appIRI); + + analyzedDocumentDTOS = new ArrayList<>(); + source = new ArrayList<>(); + } + } catch (Exception e) { + return count; + } + + } + ++count; + } + + // Run last batch + if (count % batchSize != 1) + runFeatureExtractionBatch(analyzedDocumentDTOS, source, count, schemaIRI.getAppIRI()); + + return -1; - @Override - public void delete(String id) { } + + private Object useRepository(Class clazz) { return repositoryFactory.createRepository(clazz); } diff --git a/src/main/java/upc/edu/gessi/repo/service/impl/NLFeatureServiceImpl.java b/src/main/java/upc/edu/gessi/repo/service/impl/NLFeatureServiceImpl.java index 4f593e9..c8d5523 100644 --- a/src/main/java/upc/edu/gessi/repo/service/impl/NLFeatureServiceImpl.java +++ b/src/main/java/upc/edu/gessi/repo/service/impl/NLFeatureServiceImpl.java @@ -1,5 +1,8 @@ package upc.edu.gessi.repo.service.impl; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import jakarta.json.JsonObject; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpPost; import org.apache.http.entity.StringEntity; @@ -10,35 +13,53 @@ import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Lazy; +import org.springframework.core.ParameterizedTypeReference; +import org.springframework.http.*; import org.springframework.stereotype.Service; -import upc.edu.gessi.repo.dto.AnalyzedDocument; +import org.springframework.web.client.HttpServerErrorException; +import org.springframework.web.client.RestTemplate; +import upc.edu.gessi.repo.dto.AnalyzedDocumentDTO; +import upc.edu.gessi.repo.dto.Review.HUBResponseDTO; +import upc.edu.gessi.repo.dto.Review.ReviewDTO; import upc.edu.gessi.repo.service.NLFeatureService; import upc.edu.gessi.repo.util.Utils; +import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; + @Service @Lazy public class NLFeatureServiceImpl implements NLFeatureService { private Logger logger = LoggerFactory.getLogger(NLFeatureServiceImpl.class); + private final RestTemplate restTemplate; + + @Autowired + public NLFeatureServiceImpl(final RestTemplate restTemplate) { + this.restTemplate = restTemplate; + } @Value("${transfeatex.url}") private String nlFeatureExtractionEndpoint; - public List getNLFeatures(List documents) { + @Value("${hub.url}") + private String hubFeatureAnalysisEndpoint; + + public List getNLFeatures(List documents) { CloseableHttpClient httpClient = HttpClientBuilder.create().build(); - List analyzedDocuments = new ArrayList<>(); + List analyzedDocumentDTOS = new ArrayList<>(); try { HttpPost request = new HttpPost(nlFeatureExtractionEndpoint); request.addHeader("Content-Type", "application/json"); JSONArray array = new JSONArray(); - for (AnalyzedDocument doc : documents) { + for (AnalyzedDocumentDTO doc : documents) { doc.setText(Utils.escape(doc.getText())); JSONObject obj = new JSONObject(); obj.put("id", doc.getId()); @@ -63,9 +84,9 @@ public List getNLFeatures(List documents) { features.add(featureJSONArray.getString(j)); } - AnalyzedDocument analyzedDoc = - new AnalyzedDocument(document.getString("id"), features); - analyzedDocuments.add(analyzedDoc); + AnalyzedDocumentDTO analyzedDoc = + new AnalyzedDocumentDTO(document.getString("id"), features); + analyzedDocumentDTOS.add(analyzedDoc); } } catch (Exception ex) { @@ -76,8 +97,46 @@ public List getNLFeatures(List documents) { } catch (IOException e) { e.printStackTrace(); } - return analyzedDocuments; + return analyzedDocumentDTOS; } } + + public List getHUBFeatures(List reviews, String featureModel) { + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + + HttpEntity> requestBody = new HttpEntity<>(reviews, headers); + + String url = hubFeatureAnalysisEndpoint + "?feature_model=" + featureModel; + int retryLimit = 3; + int retryCount = 0; + RestTemplate restTemplate = new RestTemplate(); + + while (retryCount < retryLimit) { + try { + HUBResponseDTO responseDTO = restTemplate.postForObject( + url, requestBody, HUBResponseDTO.class); + return responseDTO.getAnalyzed_reviews(); + } catch (HttpServerErrorException e) { + if (e.getStatusCode().is5xxServerError()) { + retryCount++; + logger.error("Received {} response, retrying {}/{}.", e.getStatusCode(), retryCount, retryLimit); + if (retryCount == retryLimit) { + Utils.serializeReviews(reviews, logger); + logger.error("Max retries reached. Serialized the reviews to reviewsDTOList.json"); + } + } else { + throw e; + } + } catch (Exception e) { + logger.error("An unexpected error occurred: {}", e.getMessage(), e); + Utils.serializeReviews(reviews, logger); + throw e; + } + } + throw new RuntimeException("Failed to get a valid response from HUB after " + retryLimit + " attempts."); + } + + } diff --git a/src/main/java/upc/edu/gessi/repo/service/impl/ReviewServiceImpl.java b/src/main/java/upc/edu/gessi/repo/service/impl/ReviewServiceImpl.java index 84dbf2b..00876a8 100644 --- a/src/main/java/upc/edu/gessi/repo/service/impl/ReviewServiceImpl.java +++ b/src/main/java/upc/edu/gessi/repo/service/impl/ReviewServiceImpl.java @@ -92,5 +92,17 @@ private Object useRepository(Class clazz) { return repositoryFactory.createRepository(clazz); } + @Override + public List getBatched(int batch, int offset) { + return ((ReviewRepository) useRepository(ReviewRepository.class)).findBatched(batch, offset); + } + @Override + public List getAllSimplified() { + return ((ReviewRepository) useRepository(ReviewRepository.class)).findAllSimplified(); + } + @Override + public Integer getReviewCount() { + return ((ReviewRepository) useRepository(ReviewRepository.class)).getCount(); + } } diff --git a/src/main/java/upc/edu/gessi/repo/util/ReviewQueryBuilder.java b/src/main/java/upc/edu/gessi/repo/util/ReviewQueryBuilder.java index 4dcb12a..dd19309 100644 --- a/src/main/java/upc/edu/gessi/repo/util/ReviewQueryBuilder.java +++ b/src/main/java/upc/edu/gessi/repo/util/ReviewQueryBuilder.java @@ -19,6 +19,35 @@ public String findAllQuery() { queryBuilder.append("}\n"); return queryBuilder.toString(); } + public String findAllQueryWithLimitOffset(int limit, int offset) { + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("PREFIX sc: \n"); + queryBuilder.append("PREFIX rdf: \n"); + queryBuilder.append("SELECT ?id ?text\n"); + queryBuilder.append("WHERE {\n"); + queryBuilder.append(" ?subject rdf:type sc:Review ;\n"); + queryBuilder.append(" sc:reviewBody ?text ;\n"); + queryBuilder.append(" sc:datePublished ?date;\n"); + queryBuilder.append(" sc:identifier ?id .\n"); + queryBuilder.append("}\n"); + queryBuilder.append("ORDER BY DESC (?date)\n"); + queryBuilder.append("LIMIT ").append(limit).append("\n"); + queryBuilder.append("OFFSET ").append(offset).append("\n"); + return queryBuilder.toString(); + } + + public String findAllSimplifiedQuery() { + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("PREFIX sc: \n"); + queryBuilder.append("PREFIX rdf: \n"); + queryBuilder.append("SELECT ?id ?text\n"); + queryBuilder.append("WHERE {\n"); + queryBuilder.append(" ?subject rdf:type sc:Review ;\n"); + queryBuilder.append(" sc:reviewBody ?text ;\n"); + queryBuilder.append(" sc:identifier ?id .\n"); + queryBuilder.append("}\n"); + return queryBuilder.toString(); + } public String findReviewsByIds(final List ids) { StringBuilder queryBuilder = new StringBuilder(); @@ -202,4 +231,16 @@ public String deleteByIDQuery(final String reviewId) { queryBuilder.append("}"); return queryBuilder.toString(); } + public String getCountQuery() { + StringBuilder queryBuilder = new StringBuilder(); + queryBuilder.append("PREFIX sc: \n"); + queryBuilder.append("PREFIX rdf: \n"); + queryBuilder.append("SELECT (COUNT(?subject) AS ?count)\n"); + queryBuilder.append("WHERE {\n"); + queryBuilder.append(" ?subject rdf:type sc:Review .\n"); + queryBuilder.append("}"); + return queryBuilder.toString(); + } + + } diff --git a/src/main/java/upc/edu/gessi/repo/util/Utils.java b/src/main/java/upc/edu/gessi/repo/util/Utils.java index d5c5742..b807b4a 100644 --- a/src/main/java/upc/edu/gessi/repo/util/Utils.java +++ b/src/main/java/upc/edu/gessi/repo/util/Utils.java @@ -2,12 +2,16 @@ import com.fasterxml.jackson.core.*; import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; import com.google.gson.Gson; import org.eclipse.rdf4j.query.TupleQuery; import org.eclipse.rdf4j.query.TupleQueryResult; import org.eclipse.rdf4j.query.Update; import org.eclipse.rdf4j.repository.RepositoryConnection; import org.json.JSONObject; +import org.slf4j.Logger; +import upc.edu.gessi.repo.dto.Review.ReviewDTO; import upc.edu.gessi.repo.dto.graph.Graph; import upc.edu.gessi.repo.dto.graph.GraphEdge; import upc.edu.gessi.repo.dto.graph.GraphNode; @@ -61,6 +65,17 @@ public static void saveJSONFile(Graph apps, String fileName) { } } + public static void serializeReviews(List reviews, Logger logger) { + try { + ObjectMapper objectMapper = new ObjectMapper(); + objectMapper.enable(SerializationFeature.INDENT_OUTPUT); + File jsonFile = new File("src/main/resources/reviewsDTOList.json"); + objectMapper.writeValue(jsonFile, reviews); + } catch (Exception jsonException) { + logger.error("Failed to serialize reviewsDTOList: {}", jsonException.getMessage(), jsonException); + } + } + /* jsonGenerator.writeStartObject(); // { jsonGenerator.writeStringField("name", "India"); diff --git a/src/main/resources/application-docker.properties b/src/main/resources/application-docker.properties new file mode 100644 index 0000000..850bdac --- /dev/null +++ b/src/main/resources/application-docker.properties @@ -0,0 +1,4 @@ +transfeatex.url=http://docker.host.internal:3004/extract-features +hub.url=http://docker.host.internal:3002/analyze/kg +inductive-knowledge-service.url=http://docker.host.internal:5001 +db.url=http://docker.host.internal:7200/repositories/MApp-KG_dev diff --git a/src/main/resources/application-localhost.properties b/src/main/resources/application-localhost.properties new file mode 100644 index 0000000..54cec0c --- /dev/null +++ b/src/main/resources/application-localhost.properties @@ -0,0 +1,4 @@ +transfeatex.url=http://localhost:3004/extract-features +hub.url=http://localhost:3002/analyze/kg +inductive-knowledge-service.url=http://localhost:5001 +db.url=http://gessi-chatbots.essi.upc.edu:7200/repositories/MApp-KG_dev diff --git a/src/main/resources/application.properties b/src/main/resources/application.properties index 705a69a..4465c82 100644 --- a/src/main/resources/application.properties +++ b/src/main/resources/application.properties @@ -1,20 +1,8 @@ -transfeatex.url=http://localhost:3004/extract-features -inductive-knowledge-service.url=http://localhost:5001 -#db.url=http://gessi-chatbots.essi.upc.edu:7200/repositories/app-data-repo -#db.url=http://localhost:7200/repositories/app-data-repository -#db.url=http://gessi-chatbots.essi.upc.edu:7200/repositories/MApp-KG -db.url=http://gessi-chatbots.essi.upc.edu:7200/repositories/MApp-KG_dev -# db.username=app_data_repository -# db.password=****************** scanner-service.url=http://localhost:5500/export-data max-days-reviews=365 spring.mvc.pathmatch.matching-strategy=ant_path_matcher rml.path=rml/extended.rml.ttl server.port=3003 -#spring.jpa.generate-ddl=true logging.level.web = INFO -db.username=${DB_USERNAME} -db.password=${DB_PASSWORD} - -##re_miner -#T^W#MP.wNk(7CsX# +db.username=XXXX +db.password=XXXX \ No newline at end of file diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml index 8b5fad3..603d4f9 100644 --- a/src/main/resources/logback.xml +++ b/src/main/resources/logback.xml @@ -1,12 +1,12 @@ - error.log + review_service.log %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{35} - %msg%n - +