Skip to content

Commit

Permalink
getting ready for Glottolog 5.0; closes #75
Browse files Browse the repository at this point in the history
  • Loading branch information
xrotwang committed Mar 7, 2024
1 parent 5851119 commit bf73ae4
Show file tree
Hide file tree
Showing 20 changed files with 200 additions and 241 deletions.
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ classifiers =
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy
License :: OSI Approved :: Apache Software License
Expand Down Expand Up @@ -123,7 +124,7 @@ max-line-length = 100
exclude = .tox

[tox:tox]
envlist = py38, py39, py310, py311
envlist = py38, py39, py310, py311, py312
isolated_build = true
skip_missing_interpreter = true

Expand Down
2 changes: 1 addition & 1 deletion src/pyglottolog/admin_commands/elpubbib.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@


def run(args): # pragma: no cover
download(args.repos.bibfiles['elpub.bib'], args.log)
download(args.repos.bibfiles['elpub.bib'], args.log, args.repos)
19 changes: 0 additions & 19 deletions src/pyglottolog/admin_commands/grambankbib.py

This file was deleted.

69 changes: 0 additions & 69 deletions src/pyglottolog/admin_commands/langscibib.py

This file was deleted.

8 changes: 0 additions & 8 deletions src/pyglottolog/admin_commands/ldhbib.py

This file was deleted.

2 changes: 2 additions & 0 deletions src/pyglottolog/admin_commands/updatelinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def run(args):
langs = list(args.repos.languoids())
updated = set()
for cls in LinkProvider.__subclasses__():
if getattr(cls, '__inactive__', False):
continue
name = cls.__name__.lower()
if (not getattr(args, 'provider', None)) or (name in args.provider):
args.log.info('updating {0} links ...'.format(name))
Expand Down
32 changes: 16 additions & 16 deletions src/pyglottolog/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
import re
import typing
import pathlib
import functools
import contextlib
import collections

import pycldf.util
from csvw import TableGroup, Column
from clldutils.path import walk, git_describe
from clldutils.misc import lazyproperty
from clldutils.apilib import API
from clldutils.jsonlib import load
import clldutils.iso_639_3
Expand Down Expand Up @@ -110,56 +110,56 @@ def _cfg(self, name, cls=None):
return config.Config.from_ini(
self.path('config', name + '.ini'), object_class=cls or config.Generic)

@lazyproperty
@functools.cached_property
def aes_status(self) -> typing.Dict[str, config.AES]:
"""
:rtype: mapping with :class:`config.AES` values.
"""
return self._cfg('aes_status', cls=config.AES)

@lazyproperty
@functools.cached_property
def aes_sources(self) -> typing.Dict[str, config.AESSource]:
"""
:rtype: mapping with :class:`config.AESSource` values
"""
return self._cfg('aes_sources', cls=config.AESSource)

@lazyproperty
@functools.cached_property
def document_types(self) -> typing.Dict[str, config.DocumentType]:
"""
:rtype: mapping with :class:`config.DocumentType` values
"""
return self._cfg('document_types', cls=config.DocumentType)

@lazyproperty
@functools.cached_property
def med_types(self) -> typing.Dict[str, config.MEDType]:
"""
:rtype: mapping with :class:`config.MEDType` values
"""
return self._cfg('med_types', cls=config.MEDType)

@lazyproperty
@functools.cached_property
def macroareas(self) -> typing.Dict[str, config.Macroarea]:
"""
:rtype: mapping with :class:`config.Macroarea` values
"""
return self._cfg('macroareas', cls=config.Macroarea)

@lazyproperty
@functools.cached_property
def language_types(self) -> typing.Dict[str, config.LanguageType]:
"""
:rtype: mapping with :class:`config.LanguageType` values
"""
return self._cfg('language_types', cls=config.LanguageType)

@lazyproperty
@functools.cached_property
def languoid_levels(self) -> typing.Dict[str, config.LanguoidLevel]:
"""
:rtype: mapping with :class:`config.LanguoidLevel` values
"""
return self._cfg('languoid_levels', cls=config.LanguoidLevel)

@lazyproperty
@functools.cached_property
def editors(self) -> typing.Dict[str, config.Generic]:
"""
Metadata about editors of Glottolog
Expand All @@ -168,7 +168,7 @@ def editors(self) -> typing.Dict[str, config.Generic]:
"""
return self._cfg('editors')

@lazyproperty
@functools.cached_property
def publication(self) -> typing.Dict[str, config.Generic]:
"""
Metadata about the Glottolog publication
Expand All @@ -177,7 +177,7 @@ def publication(self) -> typing.Dict[str, config.Generic]:
"""
return self._cfg('publication')

@lazyproperty
@functools.cached_property
def iso(self) -> clldutils.iso_639_3.ISO:
"""
:return: `clldutils.iso_639_3.ISO` instance, fed with the data of the latest \
Expand All @@ -192,7 +192,7 @@ def ftsindex(self) -> pathlib.Path:
"""
return self.build_path('whoosh')

@lazyproperty
@functools.cached_property
def _tree_dirs(self):
return list(walk(self.tree, mode='dirs'))

Expand Down Expand Up @@ -327,7 +327,7 @@ def newick_tree(
trees.append('{0};'.format(ns))
return '\n'.join(trees)

@lazyproperty
@functools.cached_property
def bibfiles(self) -> references.BibFiles:
"""
Access reference data by BibFile.
Expand Down Expand Up @@ -355,13 +355,13 @@ def refs_by_languoid(self, *bibfiles, **kw):
res[lang.id].append(entry)
return res, all_

@lazyproperty
@functools.cached_property
def hhtypes(self):
# Note: The file `hhtype.ini` does not exist anymore. This is fixed in HHTypes, when
# calling `config.get_ini`. Only used when compiling monster.bib.
return references.HHTypes(self.references_path('hhtype.ini'))

@lazyproperty
@functools.cached_property
def triggers(self):
res = {'inlg': [], 'lgcode': []}
for lang in self.languoids():
Expand All @@ -372,7 +372,7 @@ def triggers(self):
for text in lang.cfg.getlist('triggers', type_)])
return res

@lazyproperty
@functools.cached_property
def macroarea_map(self):
res = {}
for lang in self.languoids():
Expand Down
15 changes: 12 additions & 3 deletions src/pyglottolog/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import attr
from clldutils.misc import nfilter
from clldutils.inifile import INI
from clldutils.jsonlib import load

__all__ = [
'AES', 'AESSource', 'Macroarea', 'DocumentType', 'LanguageType', 'LanguoidLevel',
Expand All @@ -16,15 +17,17 @@ class ConfigObject(object):
Factory to turn INI file sections into instances of `@attr.s` classes.
"""
@classmethod
def from_section(cls, cfg, section):
def from_section(cls, cfg, section, fname):
try:
fields = set(f.name for f in attr.fields(cls))
except attr.exceptions.NotAnAttrsClassError:
fields = None

kw = {'name' if 'id' in cfg[section] else 'id': section}
kw.update(cfg[section].items())
return cls(**{k: v for k, v in kw.items() if fields is None or k in fields})
res = cls(**{k: v for k, v in kw.items() if fields is None or k in fields})
res._fname = fname
return res


class Generic(ConfigObject):
Expand Down Expand Up @@ -95,6 +98,12 @@ class Macroarea(ConfigObject):
#: Glottolog reference ID linking to further information
reference_id = attr.ib()

@property
def geojson(self):
fname = self._fname.parent / 'macroareas' / 'voronoi' / '{}.geojson'.format(
self.name.lower().replace(' ', '_'))
return load(fname) if fname.exists() else None


@attr.s
class DocumentType(ConfigObject):
Expand Down Expand Up @@ -178,7 +187,7 @@ def from_ini(cls, fname, object_class):
ini = get_ini(fname)
d = collections.OrderedDict()
for sec in ini.sections():
obj = object_class.from_section(ini, sec)
obj = object_class.from_section(ini, sec, fname)
d[obj.id] = obj
res = cls(**d)
res.__defaults__ = ini['DEFAULT']
Expand Down
4 changes: 3 additions & 1 deletion src/pyglottolog/iso.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ def iter(cls, max_year=None, cache_dir=None, log=None):
print('no crs for {}, page {}'.format(year, page))
else:
for i, cr in enumerate(tables[0]):
yield cls(**{k.replace(' ', '_'): v for k, v in cr.items()})
d = {k.replace(' ', '_'): v for k, v in cr.items()}
if 'Effective_Date' in d:
yield cls(**{k.replace(' ', '_'): v for k, v in cr.items()})
if i < 99:
break
page += 1 # pragma: no cover
Expand Down
3 changes: 2 additions & 1 deletion src/pyglottolog/languoids/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,9 @@ class Reference(object):
key = attr.ib() #:
pages = attr.ib(default=None) #:
trigger = attr.ib(default=None)
endtag = attr.ib(default='**')
pattern = re.compile(
r"\*\*(?P<key>[a-z0-9\-_]+:[a-zA-Z.?\-;*'/()\[\]!_:0-9\u2014]+?)\*\*"
r"\*\*(?P<key>[a-z0-9\-_]+:[a-zA-Z.?\-;*'/()\[\]!_:0-9\u2014]+?)(?P<endtag>\*\*|\(\*\*\))"
r"(:(?P<pages>[0-9\-f]+))?"
r'(<trigger "(?P<trigger>[^\"]+)">)?')
old_pattern = re.compile(r'[^\[]+\[(?P<pages>[^\]]*)\]\s*\([0-9]+\s+(?P<key>[^\)]+)\)')
Expand Down
1 change: 1 addition & 0 deletions src/pyglottolog/links/elcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def format_date() -> str:


class ElCat(LinkProvider):
__inactive__ = True
status_map = {}

def __init__(self, repos=None):
Expand Down
16 changes: 15 additions & 1 deletion src/pyglottolog/links/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def read_grouped_cldf_languages(doi): # pragma: no cover
rec = cldfzenodo.Record.from_doi(doi)
rec = cldfzenodo.Record.from_concept_doi(doi)
with TemporaryDirectory() as tmp:
ds = rec.download_dataset(tmp)
langs = sorted(
Expand Down Expand Up @@ -77,3 +77,17 @@ class WALS(LinkProvider): # pragma: no cover
__doi__ = '10.5281/zenodo.3606197'
__url_template__ = 'https://' + __domain__ + '/languoid/lect/wals_code_{0[id]}'
__label_template__ = '{0[name]}'


class Grambank(LinkProvider): # pragma: no cover
__domain__ = "grambank.clld.org"
__doi__ = '10.5281/zenodo.7740139'
__url_template__ = 'https://' + __domain__ + '/languages/{0[id]}'
__label_template__ = '{0[name]}'


class Lexibank(LinkProvider): # pragma: no cover
__domain__ = "lexibank.clld.org"
__doi__ = '10.5281/zenodo.5227817'
__url_template__ = 'https://' + __domain__ + '/languages/{0[id]}'
__label_template__ = '{0[name]}'
Loading

0 comments on commit bf73ae4

Please sign in to comment.