From 7275906b2a66266551a0e0259e5e60ff9a61fdfd Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Mon, 12 Sep 2022 14:49:40 -0300 Subject: [PATCH] Cria classe para extrair do XML os materiais suplementares e cria ArticleAssets.replace_names (#322) * Cria classe para manipular material suplementar * Cria teste para a classe que manipula material suplementar * Cria atributo em ArticleAssets para substituir valores de xlink:href * Cria testes para ArticleAssets.replace_names * Remove item duplicado --- packtools/sps/models/article_assets.py | 49 +++++++ tests/sps/test_article_assets.py | 172 ++++++++++++++++++++++++- 2 files changed, 220 insertions(+), 1 deletion(-) diff --git a/packtools/sps/models/article_assets.py b/packtools/sps/models/article_assets.py index 38bffac00..c8aedbb1f 100644 --- a/packtools/sps/models/article_assets.py +++ b/packtools/sps/models/article_assets.py @@ -1,3 +1,7 @@ +class AssetReplacementError(Exception): + ... + + class ArticleAssets: ASSET_TAGS = ( 'graphic', @@ -32,6 +36,26 @@ def article_assets(self): return _assets + def replace_names(self, from_to): + """ + Replace names + + Parameters + ---------- + from_to : dict + + Returns + ------- + str list : not found names to replace + """ + not_found = [] + for asset in self.article_assets: + try: + asset.name = from_to[asset.name] + except KeyError as e: + not_found.append(asset.name) + return not_found + class Asset: def __init__(self, node, parent_map): @@ -42,6 +66,10 @@ def __init__(self, node, parent_map): def name(self): return self.node.attrib["{http://www.w3.org/1999/xlink}href"] + @name.setter + def name(self, value): + self.node.set("{http://www.w3.org/1999/xlink}href", value) + @property def id(self): current_node = self.node @@ -73,3 +101,24 @@ def type(self): return 'optimised' else: return 'original' + + +class SupplementaryMaterials: + + def __init__(self, xmltree): + self.xmltree = xmltree + self._assets = ArticleAssets(xmltree) + + @property + def items(self): + return [item + for item in self._assets.article_assets + if item.node.tag in ('supplementary-material', + 'inline-supplementary-material') + ] + + @property + def data(self): + return [{"id": item.id, "name": item.name, } + for item in self.items + ] diff --git a/tests/sps/test_article_assets.py b/tests/sps/test_article_assets.py index 97944f03d..72c1aea11 100644 --- a/tests/sps/test_article_assets.py +++ b/tests/sps/test_article_assets.py @@ -2,7 +2,10 @@ from packtools.sps.utils import xml_utils -from packtools.sps.models.article_assets import ArticleAssets +from packtools.sps.models.article_assets import ( + ArticleAssets, + SupplementaryMaterials, +) def generate_xmltree(snippet): @@ -661,3 +664,170 @@ def test_article_assets_optimised_png_as_original(self): obtained = obtain_asset_dict(ArticleAssets(xmltree).article_assets) self.assertDictEqual(expected, obtained) + + def test_replace_names_not_found(self): + snippet = """ + + + + + Caption Figura PT + + +

Nota da tabela em pt

+
+
+ + + + Caption Figura EN + + + + + + +

Figure 1 Identification of Senna Senna Mill. (Fabaceae) species collected in different locations in northwestern Ceará State. * Exotic, ** Endemic to Brazil. Source: Herbário Francisco José de Abreu Matos (HUVA).

+
+
+
+ """ + xmltree = generate_xmltree(snippet) + + from_to = { + "original.png": "novo_original.png", + "miniatura.jpg": "novo_miniatura.jpg", + "figura2.jpg": "novo_figura2.jpg", + } + article_assets = ArticleAssets(xmltree) + not_found = article_assets.replace_names(from_to) + self.assertEqual(not_found, []) + + updated = article_assets.article_assets + self.assertEqual(updated[0].name, 'novo_original.png') + self.assertEqual(updated[1].name, 'novo_miniatura.jpg') + self.assertEqual(updated[2].name, 'novo_figura2.jpg') + + def test_replace_names(self): + snippet = """ + + + + + Caption Figura PT + + +

Nota da tabela em pt

+
+
+ + + + Caption Figura EN + + + + + + +

Figure 1 Identification of Senna Senna Mill. (Fabaceae) species collected in different locations in northwestern Ceará State. * Exotic, ** Endemic to Brazil. Source: Herbário Francisco José de Abreu Matos (HUVA).

+
+
+
+ """ + xmltree = generate_xmltree(snippet) + + from_to = { + "original.png": "novo_original.png", + "miniatura.jpg": "novo_miniatura.jpg", + "figura02.jpg": "novo_figura2.jpg", + } + + article_assets = ArticleAssets(xmltree) + not_found = article_assets.replace_names(from_to) + + updated = article_assets.article_assets + self.assertEqual(updated[0].name, 'novo_original.png') + self.assertEqual(updated[1].name, 'novo_miniatura.jpg') + self.assertEqual(not_found, ["figura2.jpg"]) + + +class SupplementaryMaterialsTest(TestCase): + def _get_xmltree(self, xml): + return xml_utils.get_xml_tree(xml) + + def test_inline_supplementary_material(self): + + data = ( + """
+ + + +

+ Supplementary Information +

+

Supplementary information (chromatograms from chiral GC analysis) is available free of charge at http://jbcs.sbq.org.br as PDF file.

+
+
+ + """ + ) + xmltree = xml_utils.get_xml_tree(data) + + expected = [ + (None, 'https://minio.scielo.br/documentstore/1678-4790/LgRcS7ZYYQ5wSDKw8wKytSp/818bf2b94169513756c9f4734c24d9bc774a3795.pdf'), + ] + + for i, item in enumerate(SupplementaryMaterials(xmltree).items): + with self.subTest(i): + self.assertEqual(item.id, expected[i][0]) + self.assertEqual(item.name, expected[i][1]) + + def test_supplementary_material(self): + + data = ( + """
+ + + +Material suplementar +

O seguinte material suplementar está disponível online:

+

+ + + + +A relação entre energia e frequência de um +<italic>quantum</italic> +obtida por Einstein em 1905 + + + +

+
+ +

+ + + +Equivalência entre o Princípio de Maupertuis (mecânica do ponto material) e de Fermat (Ótica) no contexto não-relativístico. + + +

+
+
+ + """ + ) + xmltree = xml_utils.get_xml_tree(data) + + expected = [ + ('suppl01', 'c834.pdf'), + ('suppl02', '0b97.pdf'), + ] + + for i, item in enumerate(SupplementaryMaterials(xmltree).items): + with self.subTest(i): + self.assertEqual(item.id, expected[i][0]) + self.assertEqual(item.name, expected[i][1]) +