Skip to content

Commit

Permalink
Cria classe para extrair do XML os materiais suplementares e cria Art…
Browse files Browse the repository at this point in the history
…icleAssets.replace_names (#322)

* Cria classe para manipular material suplementar

* Cria teste para a classe que manipula material suplementar

* Cria atributo em ArticleAssets para substituir valores de xlink:href

* Cria testes para ArticleAssets.replace_names

* Remove item duplicado
  • Loading branch information
robertatakenaka authored Sep 12, 2022
1 parent f355521 commit 7275906
Show file tree
Hide file tree
Showing 2 changed files with 220 additions and 1 deletion.
49 changes: 49 additions & 0 deletions packtools/sps/models/article_assets.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
class AssetReplacementError(Exception):
...


class ArticleAssets:
ASSET_TAGS = (
'graphic',
Expand Down Expand Up @@ -32,6 +36,26 @@ def article_assets(self):

return _assets

def replace_names(self, from_to):
"""
Replace names
Parameters
----------
from_to : dict
Returns
-------
str list : not found names to replace
"""
not_found = []
for asset in self.article_assets:
try:
asset.name = from_to[asset.name]
except KeyError as e:
not_found.append(asset.name)
return not_found


class Asset:
def __init__(self, node, parent_map):
Expand All @@ -42,6 +66,10 @@ def __init__(self, node, parent_map):
def name(self):
return self.node.attrib["{http://www.w3.org/1999/xlink}href"]

@name.setter
def name(self, value):
self.node.set("{http://www.w3.org/1999/xlink}href", value)

@property
def id(self):
current_node = self.node
Expand Down Expand Up @@ -73,3 +101,24 @@ def type(self):
return 'optimised'
else:
return 'original'


class SupplementaryMaterials:

def __init__(self, xmltree):
self.xmltree = xmltree
self._assets = ArticleAssets(xmltree)

@property
def items(self):
return [item
for item in self._assets.article_assets
if item.node.tag in ('supplementary-material',
'inline-supplementary-material')
]

@property
def data(self):
return [{"id": item.id, "name": item.name, }
for item in self.items
]
172 changes: 171 additions & 1 deletion tests/sps/test_article_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

from packtools.sps.utils import xml_utils

from packtools.sps.models.article_assets import ArticleAssets
from packtools.sps.models.article_assets import (
ArticleAssets,
SupplementaryMaterials,
)


def generate_xmltree(snippet):
Expand Down Expand Up @@ -661,3 +664,170 @@ def test_article_assets_optimised_png_as_original(self):
obtained = obtain_asset_dict(ArticleAssets(xmltree).article_assets)

self.assertDictEqual(expected, obtained)

def test_replace_names_not_found(self):
snippet = """
<fig-group id="f01">
<fig xml:lang="pt">
<label>Figura 1</label>
<caption>
<title>Caption Figura PT</title>
</caption>
<attrib>
<p>Nota da tabela em pt</p>
</attrib>
</fig>
<fig xml:lang="en">
<label>Figure 1</label>
<caption>
<title>Caption Figura EN</title>
</caption>
<alternatives>
<graphic xlink:href="original.png" />
<graphic xlink:href="miniatura.jpg" specific-use="scielo-web" content-type="scielo-20x20" />
</alternatives>
<attrib>
<p><xref ref-type="fig" rid="f01">Figure 1</xref> Identification of <italic>Senna Senna</italic> Mill. (Fabaceae) species collected in different locations in northwestern Ceará State. <sup>*</sup> Exotic, <sup>**</sup> Endemic to Brazil. Source: Herbário Francisco José de Abreu Matos (HUVA).</p>
</attrib>
</fig>
</fig-group>
"""
xmltree = generate_xmltree(snippet)

from_to = {
"original.png": "novo_original.png",
"miniatura.jpg": "novo_miniatura.jpg",
"figura2.jpg": "novo_figura2.jpg",
}
article_assets = ArticleAssets(xmltree)
not_found = article_assets.replace_names(from_to)
self.assertEqual(not_found, [])

updated = article_assets.article_assets
self.assertEqual(updated[0].name, 'novo_original.png')
self.assertEqual(updated[1].name, 'novo_miniatura.jpg')
self.assertEqual(updated[2].name, 'novo_figura2.jpg')

def test_replace_names(self):
snippet = """
<fig-group id="f01">
<fig xml:lang="pt">
<label>Figura 1</label>
<caption>
<title>Caption Figura PT</title>
</caption>
<attrib>
<p>Nota da tabela em pt</p>
</attrib>
</fig>
<fig xml:lang="en">
<label>Figure 1</label>
<caption>
<title>Caption Figura EN</title>
</caption>
<alternatives>
<graphic xlink:href="original.png" />
<graphic xlink:href="miniatura.jpg" specific-use="scielo-web" content-type="scielo-20x20" />
</alternatives>
<attrib>
<p><xref ref-type="fig" rid="f01">Figure 1</xref> Identification of <italic>Senna Senna</italic> Mill. (Fabaceae) species collected in different locations in northwestern Ceará State. <sup>*</sup> Exotic, <sup>**</sup> Endemic to Brazil. Source: Herbário Francisco José de Abreu Matos (HUVA).</p>
</attrib>
</fig>
</fig-group>
"""
xmltree = generate_xmltree(snippet)

from_to = {
"original.png": "novo_original.png",
"miniatura.jpg": "novo_miniatura.jpg",
"figura02.jpg": "novo_figura2.jpg",
}

article_assets = ArticleAssets(xmltree)
not_found = article_assets.replace_names(from_to)

updated = article_assets.article_assets
self.assertEqual(updated[0].name, 'novo_original.png')
self.assertEqual(updated[1].name, 'novo_miniatura.jpg')
self.assertEqual(not_found, ["figura2.jpg"])


class SupplementaryMaterialsTest(TestCase):
def _get_xmltree(self, xml):
return xml_utils.get_xml_tree(xml)

def test_inline_supplementary_material(self):

data = (
"""<article xmlns:xlink="http://www.w3.org/1999/xlink" >
<back>
<fn-group>
<fn fn-type="supplementary-material">
<p>
<bold>Supplementary Information</bold>
</p>
<p>Supplementary information (chromatograms from chiral GC analysis) is available free of charge at <ext-link ext-link-type="uri" xlink:href="http://jbcs.sbq.org.br">http://jbcs.sbq.org.br</ext-link> as <inline-supplementary-material xlink:href="https://minio.scielo.br/documentstore/1678-4790/LgRcS7ZYYQ5wSDKw8wKytSp/818bf2b94169513756c9f4734c24d9bc774a3795.pdf" mimetype="application" mime-subtype="pdf">PDF</inline-supplementary-material> file.</p>
</fn>
</fn-group></back></article>
"""
)
xmltree = xml_utils.get_xml_tree(data)

expected = [
(None, 'https://minio.scielo.br/documentstore/1678-4790/LgRcS7ZYYQ5wSDKw8wKytSp/818bf2b94169513756c9f4734c24d9bc774a3795.pdf'),
]

for i, item in enumerate(SupplementaryMaterials(xmltree).items):
with self.subTest(i):
self.assertEqual(item.id, expected[i][0])
self.assertEqual(item.name, expected[i][1])

def test_supplementary_material(self):

data = (
"""<article xmlns:xlink="http://www.w3.org/1999/xlink" >
<back>
<app-group>
<app id="app01">
<title>Material suplementar</title>
<p>O seguinte material suplementar está disponível online:</p>
<p>
<supplementary-material id="suppl01" mime-subtype="pdf" mimetype="application" xlink:href="c834.pdf">
<label>Apêndice A –</label>
<caption>
<title>
A relação entre energia e frequência de um
<italic>quantum</italic>
obtida por Einstein em 1905
</title>
</caption>
</supplementary-material>
</p>
</app>
<app id="app02">
<p>
<supplementary-material id="suppl02" mime-subtype="pdf" mimetype="application" xlink:href="0b97.pdf">
<label>Apêndice B –</label>
<caption>
<title>Equivalência entre o Princípio de Maupertuis (mecânica do ponto material) e de Fermat (Ótica) no contexto não-relativístico.</title>
</caption>
</supplementary-material>
</p>
</app>
</app-group></back></article>
"""
)
xmltree = xml_utils.get_xml_tree(data)

expected = [
('suppl01', 'c834.pdf'),
('suppl02', '0b97.pdf'),
]

for i, item in enumerate(SupplementaryMaterials(xmltree).items):
with self.subTest(i):
self.assertEqual(item.id, expected[i][0])
self.assertEqual(item.name, expected[i][1])

0 comments on commit 7275906

Please sign in to comment.