Skip to content

Commit acb5cdb

Browse files
author
yongzhuo
committed
setup of pypi
1 parent 0240a44 commit acb5cdb

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+76
-40
lines changed

README.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# !/usr/bin/python
22
# -*- coding: utf-8 -*-
3-
# @time : 2021/7/27 19:28
3+
# @time : 2021/9/27 23:18
44
# @author : Mo
55
# @function:
File renamed without changes.

pytorch_nlu/output/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# !/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
# @time : 2021/9/27 23:32
4+
# @author : Mo
5+
# @function:

pytorch_sequencelabeling/README.md pytorch_nlu/pytorch_sequencelabeling/README.md

+1-1

pytorch_sequencelabeling/slConfig.py pytorch_nlu/pytorch_sequencelabeling/slConfig.py

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
# @function: config of sequence-labeling, 超参数/类
66

77

8+
import os
9+
os.environ["USE_TORCH"] = "1"
810
from transformers import BertTokenizer, RobertaTokenizer, AlbertTokenizer, XLNetTokenizer, ElectraTokenizer, XLMTokenizer, AutoTokenizer
911
from transformers import BertConfig, RobertaConfig, AlbertConfig, XLNetConfig, ElectraConfig, XLMConfig, AutoConfig
1012
from transformers import BertModel, RobertaModel, AlbertModel, XLNetModel, ElectraModel, XLMModel, AutoModel

pytorch_sequencelabeling/slPredict.py pytorch_nlu/pytorch_sequencelabeling/slPredict.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
import os
1111
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
1212
sys.path.append(path_root)
13-
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
13+
from tcConfig import model_config
14+
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
1415
from slConfig import _SL_MODEL_SOFTMAX, _SL_MODEL_GRID, _SL_MODEL_SPAN, _SL_MODEL_CRF
1516
from slConfig import _SL_DATA_CONLL, _SL_DATA_SPAN
1617
from slTools import get_logger, load_json

pytorch_sequencelabeling/slRun.py pytorch_nlu/pytorch_sequencelabeling/slRun.py

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
1313
sys.path.append(path_root)
1414

15+
from tcConfig import model_config
16+
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
1517
from slConfig import _SL_MODEL_SOFTMAX, _SL_MODEL_GRID, _SL_MODEL_SPAN, _SL_MODEL_CRF
1618
from slConfig import _SL_DATA_CONLL, _SL_DATA_SPAN
1719
from slTools import get_logger

pytorch_textclassification/README.md pytorch_nlu/pytorch_textclassification/README.md

+1-1

pytorch_textclassification/tcConfig.py pytorch_nlu/pytorch_textclassification/tcConfig.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
# @author : Mo
55
# @function: config of transformers and graph-model
66

7-
7+
import os
8+
os.environ["USE_TORCH"] = "1"
89
from transformers import BertTokenizer, RobertaTokenizer, AlbertTokenizer, XLNetTokenizer, ElectraTokenizer, XLMTokenizer, AutoTokenizer
910
from transformers import BertConfig, RobertaConfig, AlbertConfig, XLNetConfig, ElectraConfig, XLMConfig, AutoConfig
1011
from transformers import BertModel, RobertaModel, AlbertModel, XLNetModel, ElectraModel, XLMModel, AutoModel
@@ -36,7 +37,7 @@
3637

3738
# model算法超参数
3839
model_config = {
39-
"CUDA_VISIBLE_DEVICES": "1", # 环境, GPU-CPU, "-1"/"0"/"1"/"2"...
40+
"CUDA_VISIBLE_DEVICES": "0", # 环境, GPU-CPU, "-1"/"0"/"1"/"2"...
4041
"output_hidden_states": None, # [6,11] # 输出层, 即取第几层transformer的隐藏输出, list
4142
"pretrained_model_name_or_path": "", # 预训练模型地址
4243
"model_save_path": "save_path", # 训练模型保存-训练完毕模型目录

pytorch_textclassification/tcLayer.py pytorch_nlu/pytorch_textclassification/tcLayer.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -188,14 +188,14 @@ def __init__(self, reduction="mean", inf=1e12):
188188

189189
def forward(self, logits, labels):
190190
logits = (1 - 2 * labels) * logits # <3, 4>
191-
logits_neg = logits - labels * self.inf # <3, 4>
192-
logits_pos = logits - (1 - labels) * self.inf # <3, 4>
191+
logits_neg = logits - labels * self.inf # <3, 4>, 减去选中多标签的index
192+
logits_pos = logits - (1 - labels) * self.inf # <3, 4>, 减去其他不需要的多标签Index
193193
zeros = torch.zeros_like(logits[..., :1]) # <3, 1>
194194
logits_neg = torch.cat([logits_neg, zeros], dim=-1) # <3, 5>
195195
logits_pos = torch.cat([logits_pos, zeros], dim=-1) # <3, 5>
196196
neg_loss = torch.logsumexp(logits_neg, dim=-1) # <3, >
197197
pos_loss = torch.logsumexp(logits_pos, dim=-1) # <3, >
198-
loss = neg_loss + pos_loss
198+
loss = neg_loss + pos_loss # pos比零大, neg比零小
199199
if "mean" == self.reduction:
200200
loss = loss.mean()
201201
else:

pytorch_textclassification/tcPredict.py pytorch_nlu/pytorch_textclassification/tcPredict.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
import os
1111
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
1212
sys.path.append(path_root)
13-
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
13+
from tcConfig import model_config
14+
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
1415
from tcTools import get_logger, load_json
1516
from tcOffice import Office
1617
from tcData import Corpus
@@ -60,7 +61,8 @@ def predict(self, texts, logits_type="sigmoid"):
6061

6162

6263
if __name__ == "__main__":
63-
64+
# BERT-base = 8109M
65+
# path_config = "../output/text_classification/model_BERT/tc.config"
6466
path_config = "../output/text_classification/model_ERNIE/tc.config"
6567
tcp = TextClassificationPredict(path_config)
6668
texts = [{"text": "平乐县,古称昭州,隶属于广西壮族自治区桂林市,位于广西东北部,桂林市东南部,东临钟山县,南接昭平,西北毗邻阳朔,北连恭城,总面积1919.34平方公里。"},

pytorch_textclassification/tcRun.py pytorch_nlu/pytorch_textclassification/tcRun.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import os
1111
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "."))
1212
sys.path.append(path_root)
13+
from tcConfig import model_config
14+
os.environ["CUDA_VISIBLE_DEVICES"] = model_config.get("CUDA_VISIBLE_DEVICES", "0")
1315
from tcConfig import _TC_MULTI_CLASS, _TC_MULTI_LABEL
1416
from tcTools import get_logger
1517
from tcOffice import Office
@@ -123,11 +125,10 @@ def eval(self):
123125
save_steps = 320 # 存储步数
124126
ee = 0
125127
# 训练-验证语料地址, 可以只输入训练地址
126-
# path_corpus = path_root + "/corpus/text_classification/school"
127-
path_corpus = path_root + "/corpus/text_classification/org_tnews"
128-
129-
path_train = os.path.join(path_corpus, "train.json")
130-
path_dev = os.path.join(path_corpus, "dev.json")
128+
path_corpus = path_root + "/corpus/text_classification/org_multi-label_school"
129+
# path_corpus = path_root + "/corpus/text_classification/org_tnews"
130+
path_train = os.path.join(path_corpus, "train.json.augment")
131+
path_dev = os.path.join(path_corpus, "dev.json.augment")
131132
model_config["evaluate_steps"] = evaluate_steps # 评估步数
132133
model_config["save_steps"] = save_steps # 存储步数
133134
model_config["path_train"] = path_train
@@ -145,17 +146,17 @@ def eval(self):
145146
"ROBERTA": pretrained_model_dir + "/chinese_roberta_wwm_ext_pytorch",
146147
"ALBERT": pretrained_model_dir + "/albert_base_v1",
147148
"XLNET": pretrained_model_dir + "/chinese_xlnet_mid_pytorch",
148-
"ERNIE": pretrained_model_dir + "/ERNIE_stable-1.0.1-pytorch",
149-
# "ERNIE": pretrained_model_dir + "/ernie-tiny",
149+
# "ERNIE": pretrained_model_dir + "/ERNIE_stable-1.0.1-pytorch",
150+
"ERNIE": pretrained_model_dir + "/ernie-tiny",
150151
"BERT": pretrained_model_dir + "/bert-base-chinese",
151152
}
152-
idx = 0 # 选择的预训练模型类型---model_type
153+
idx = 1 # 选择的预训练模型类型---model_type
153154
model_config["pretrained_model_name_or_path"] = pretrained_model_name_or_path[model_type[idx]]
154155
# model_config["model_save_path"] = "../output/text_classification/model_{}".format(model_type[idx] + "_" + str(get_current_time()))
155156
model_config["model_save_path"] = "../output/text_classification/model_{}".format(model_type[idx])
156157
model_config["model_type"] = model_type[idx]
157158

158-
os.environ["CUDA_VISIBLE_DEVICES"] = str(model_config["CUDA_VISIBLE_DEVICES"])
159+
# os.environ["CUDA_VISIBLE_DEVICES"] = str(model_config["CUDA_VISIBLE_DEVICES"])
159160

160161
# main
161162
lc = TextClassification(model_config)

pytorch_nlu/version.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# !/usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
# @time : 2020/12/21 22:24
4+
# @author : Mo
5+
# @function: version of Pytorch-NLU
6+
7+
8+
__version__ = "0.0.1"

setup.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# @function :setup of Pytorch-NLU
66

77

8+
from pytorch_nlu.version import __version__
89
from setuptools import find_packages, setup
910
import codecs
1011

@@ -23,23 +24,32 @@
2324
install_requires = list(map(lambda x: x.strip(), reader.readlines()))
2425

2526
setup(name=NAME,
26-
version="0.0.1",
27+
version=__version__,
2728
description=DESCRIPTION,
2829
long_description=long_description,
2930
long_description_content_type="text/markdown",
3031
author=AUTHOR,
3132
author_email=EMAIL,
3233
url=URL,
33-
packages=find_packages(exclude=("test")),
34+
packages=find_packages(),
3435
install_requires=install_requires,
35-
include_package_data=True,
36+
package_data={"pytorch_nlu": ["*.*", "corpus/*",
37+
"pytorch_textclassification/*"
38+
"pytorch_sequencelabeling/*",
39+
"corpus/text_classification/*",
40+
"corpus/sequence_labeling/*",
41+
"corpus/text_classification/school/*",
42+
"corpus/text_classification/tnews/*",
43+
"corpus/sequence_labeling/ner_china_people_daily_1998_conll/*",
44+
"corpus/sequence_labeling/ner_china_people_daily_1998_span/*",]},
3645
license=LICENSE,
37-
classifiers=["License :: OSI Approved :: MIT License",
46+
classifiers=["License :: OSI Approved :: Apache License",
3847
"Programming Language :: Python :: 3.4",
3948
"Programming Language :: Python :: 3.5",
4049
"Programming Language :: Python :: 3.6",
4150
"Programming Language :: Python :: 3.7",
4251
"Programming Language :: Python :: 3.8",
52+
"Programming Language :: Python :: 3.9",
4353
"Programming Language :: Python :: Implementation :: CPython",
4454
"Programming Language :: Python :: Implementation :: PyPy"],)
4555

test/sl/tet_sl_base_crf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
1515
sys.path.append(path_sys)
1616
print(path_root)
1717
print(path_sys)

test/sl/tet_sl_base_crf_ernie.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
1515
sys.path.append(path_sys)
1616
print(path_root)
1717
print(path_sys)

test/sl/tet_sl_base_data_conll.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
1515
sys.path.append(path_sys)
1616
print(path_root)
1717
print(path_sys)

test/sl/tet_sl_base_data_span.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
1515
sys.path.append(path_sys)
1616
print(path_root)
1717
print(path_sys)

test/sl/tet_sl_base_grid.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
1515
sys.path.append(path_sys)
1616
print(path_root)
1717
print(path_sys)

test/sl/tet_sl_base_predict.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
1515
sys.path.append(path_sys)
1616
print(path_root)
1717
print(path_sys)

test/sl/tet_sl_base_softmax.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
1515
sys.path.append(path_sys)
1616
print(path_root)
1717
print(path_sys)

test/sl/tet_sl_base_span.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import sys
1313
import os
1414
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
15-
path_sys = os.path.join(path_root, "pytorch_sequencelabeling")
15+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_sequencelabeling")
1616
sys.path.append(path_sys)
1717
print(path_root)
1818
print(path_sys)

test/tc/tet_tc_base_multi_class.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
1515
print(path_root)
1616
# 分类下的引入, pytorch_textclassification
1717
from tcTools import get_current_time

test/tc/tet_tc_base_multi_label.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
1515
print(path_root)
1616
# 分类下的引入, pytorch_textclassification
1717
from tcTools import get_current_time

test/tc/tet_tc_base_multi_label_focalloss.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
1515
print(path_root)
1616
# 分类下的引入, pytorch_textclassification
1717
from tcTools import get_current_time

test/tc/tet_tc_base_multi_label_isadv.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
1515
print(path_root)
1616
# 分类下的引入, pytorch_textclassification
1717
from tcTools import get_current_time

test/tc/tet_tc_base_predict_multiclass.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
1515
print(path_root)
1616
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
1717
from tcPredict import TextClassificationPredict

test/tc/tet_tc_base_predict_multilabel.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import sys
1212
import os
1313
path_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
14-
sys.path.append(os.path.join(path_root, "pytorch_textclassification"))
14+
path_sys = os.path.join(path_root, "pytorch_nlu", "pytorch_textclassification")
1515
print(path_root)
1616
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
1717
from tcPredict import TextClassificationPredict

0 commit comments

Comments
 (0)