-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8abd65d
Showing
30 changed files
with
963 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
__pycache__/ | ||
.ipynb_checkpoints/ | ||
outputs/ | ||
checkpoints/* | ||
timing/* | ||
transcription_prepared/tmpfolder* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
## Simple Antifraud | ||
**** | ||
This is a simplified voice antifraud system created as part of bachelor's thesis at [Moscow Polytechnic University](https://mospolytech.ru/). The system is based on a pre-trained DeepSpeech model, Naive Bayes classifier and TF-IDF vectorizer. | ||
|
||
Project was done to illustrate the impact of performing adversarial attacks on this type of systems so it should not be used in production. Even if you think that DeepSpeech is protected enough, the classifier is vulnerable to the [Bayesian poisoning](https://en.wikipedia.org/wiki/Bayesian_poisoning) itself. | ||
|
||
This is some kind of [Damn-Vulnerable Service](https://github.com/vavkamil/awesome-vulnerable-apps) so you can get a flag if you will properly abuse it. | ||
|
||
### Project structure | ||
|
||
- `checkpoints` contains .ckpt files of pretrained DeepSpeech models. Pretrained models can be found [here](releases). | ||
- `training` includes notebook with data preparation and fitting for NB Classifier and vectorizer. | ||
- `pickles` folder are used to store them. | ||
|
||
### Installation | ||
|
||
Install deepspeech.pytorch: | ||
|
||
``` | ||
git clone https://github.com/SeanNaren/deepspeech.pytorch | ||
cd deepspeech.pytorch | ||
pip install -r requirements.txt | ||
pip install -e . | ||
``` | ||
|
||
Clone this repository and run within it to install remaining dependencies: | ||
``` | ||
pip install -r requirements.txt | ||
``` | ||
|
||
### Mitigations | ||
|
||
The robustness of original [LibriSpeech model]() can be increased using adversarial retraining with gaussian data augmentation. The example model can be found in [ Releases](releases). You can also try to use another controls, described [here](https://www.enisa.europa.eu/publications/securing-machine-learning-algorithms). | ||
|
||
To retrain a model with a new data [original trainig script](https://github.com/SeanNaren/deepspeech.pytorch/blob/master/deepspeech_pytorch/training.py) can be used. Simply replace | ||
``` | ||
model = DeepSpeech( | ||
labels=labels, | ||
model_cfg=cfg.model, | ||
optim_cfg=cfg.optim, | ||
precision=cfg.trainer.precision, | ||
spect_cfg=cfg.data.spect | ||
) | ||
``` | ||
with | ||
|
||
``` | ||
model = DeepSpeech.load_from_checkpoint( | ||
cfg.checkpoint.filepath, | ||
freeze=True, | ||
learning_rate=0.0001 | ||
) | ||
``` | ||
so you can retrain it like `python3 train.py checkpoint.filepath=/path/to/file.ckpt`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import time | ||
from os import path | ||
from typing import Callable, List | ||
import torch | ||
import torchaudio | ||
import numpy as np | ||
import art.estimators.speech_recognition as asr | ||
from art.attacks.evasion import ImperceptibleASRPyTorch | ||
|
||
import warnings | ||
warnings.filterwarnings(action='ignore') | ||
|
||
def save_to_txt(dest_dir, filename, content): | ||
with open(path.join(dest_dir, filename), 'a') as f: | ||
f.write(content) | ||
|
||
def time_to_file( | ||
file_maker : Callable, | ||
dest_dir : str = 'timing', | ||
): | ||
def decorator(function): | ||
def wrapper(model, filepath, transcription): | ||
start_time = time.time() | ||
result = function(model, filepath, transcription) | ||
elapsed = time.time() - start_time | ||
sample = path.split(filepath)[-1] | ||
file_maker( | ||
dest_dir, | ||
sample.split('.')[0], | ||
f'Time elapsed: {elapsed}s for {sample}\n' | ||
) | ||
return result | ||
return wrapper | ||
return decorator | ||
|
||
@time_to_file(save_to_txt) | ||
def make_adversarial(model, filepath, labels): | ||
adversarial = ImperceptibleASRPyTorch(model) | ||
audio = load_np_audio(filepath) | ||
waveform_np = adversarial.generate(audio, labels) | ||
return waveform_np | ||
|
||
def create_args(sample_list, transcription): | ||
transc = list() | ||
for sample in sample_list: | ||
rec = sample.split('_')[0] | ||
if rec in transcription.keys(): | ||
transc.append(transcription[rec]) | ||
return zip(sample_list, transc) | ||
|
||
def load_np_audio(filepath): | ||
return torchaudio.load(filepath)[0].numpy() | ||
|
||
def save_np_audio(array, filename, destdir): | ||
tensor = torch.from_numpy(array) | ||
filepath = path.join(destdir, filename) | ||
torchaudio.save(filepath, tensor, 16000) | ||
|
||
def create_advs( | ||
model : asr.PyTorchDeepSpeech, | ||
source_dir : str, | ||
dest_dir : str, | ||
samples : List[str], | ||
transcriptions : List[str] | ||
) -> List[np.ndarray]: | ||
advs = [] | ||
for args in create_args(samples, transcriptions): | ||
advs.append(make_adversarial(model, | ||
path.join(source_dir, args[0]), | ||
np.array([args[1]]) | ||
)) | ||
save_np_audio(advs[-1], args[0], dest_dir) | ||
return advs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
ORIG=$1 #directory that contain original audio | ||
SOX=$2 #direcory where converted audio will be placed | ||
|
||
subdirs=$(ls $ORIG); | ||
cd $ORIG | ||
for dir in $subdirs; do | ||
mkdir "../$SOX/$dir" | ||
for audio in $(ls $dir); do | ||
sox -v 0.86 "$dir/$audio"\ | ||
--bits 16 --no-dither --compression 0.0\ | ||
"../$SOX/$dir/$audio"\ | ||
channels 1 rate 16000 || echo $audio corrupted | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import shutil | ||
from pathlib import Path | ||
import numpy as np | ||
from deepspeech_pytorch.data.utils import create_manifest | ||
from adversarial_generation import save_np_audio, load_np_audio | ||
|
||
class NoisePreprocessor: | ||
|
||
HARD_NOISE = .1 | ||
MED_NOISE = .01 | ||
LIGHT_NOISE = .005 | ||
|
||
def __init__(self, sigma=.01): | ||
self.sigma = sigma | ||
|
||
def set_noise(self, sigma): | ||
if sigma < 1: | ||
self.sigma = sigma | ||
else: | ||
raise ValueError | ||
|
||
def apply_noise(self, x): | ||
noise = np.random.normal(x, self.sigma, x.shape) | ||
return noise.astype(np.float32) | ||
|
||
class DataMaker(): | ||
|
||
LIBRISPEECH_MAX = 281241 | ||
|
||
def __init__( | ||
self, | ||
samples_folder : str, | ||
dest_path : str, | ||
manifest_path : str = 'manifests', | ||
num_workers: int = 1 | ||
): | ||
self.num_workers = num_workers | ||
self.manifest_path = Path(manifest_path) | ||
self.dest_path = Path(dest_path) | ||
self.dest_wav = self.dest_path / 'wav' | ||
self.dest_txt = self.dest_path / 'txt' | ||
self.samples_wav = Path(samples_folder) / 'wav' | ||
self.samples_txt = Path(samples_folder) / 'txt' | ||
self.prep = NoisePreprocessor() | ||
|
||
def _create_manifest(self, prefix): | ||
output_name = f'{prefix}_noise.json' | ||
create_manifest( | ||
str(self.dest_path), | ||
output_name, | ||
self.manifest_path, | ||
self.num_workers | ||
) | ||
|
||
def _save_sample(self, name, text, audio): | ||
save_np_audio(audio, f'{name}.wav', str(self.dest_wav)) | ||
(self.dest_txt / f'{name}.txt').write_text(text) | ||
|
||
def _get_text(self, name): | ||
return (self.samples_txt / f'{name}.txt').read_text() | ||
|
||
def _apply_noise(self, sample, times): | ||
audio = load_np_audio(sample) | ||
name = sample.name.rstrip('.wav') | ||
text = self._get_text(name) | ||
for t in range(times): | ||
audio_noised = self.prep.apply_noise(audio) | ||
self._save_sample(f'{name}_{t}', text, audio) | ||
|
||
def _make_dirs(self): | ||
self.dest_path.mkdir() | ||
self.dest_wav.mkdir() | ||
self.dest_txt.mkdir() | ||
|
||
def apply_noise(self, prefix='train', times=3): | ||
self._make_dirs() | ||
for sample in self.samples_wav.iterdir(): | ||
self._apply_noise(sample, times) | ||
self._create_manifest(prefix) | ||
|
||
def _val_random(self, size): | ||
gen = np.random.default_rng() | ||
return sorted(gen.choice( | ||
self.LIBRISPEECH_MAX, | ||
size=size, | ||
replace=False | ||
)) | ||
|
||
def _copy_sample(self, wav): | ||
name = wav.name.rstrip('.wav') | ||
shutil.copy(str(wav), self.dest_wav) | ||
txt = self.samples_txt / f'{name}.txt' | ||
shutil.copy(str(txt), self.dest_txt) | ||
|
||
def _create_random(self, size): | ||
indexes = self._val_random(size) | ||
for i, wav in enumerate(self.samples_wav.iterdir()): | ||
if i in indexes: | ||
self._copy_sample(wav) | ||
|
||
def _create_iterate(self, size): | ||
for i, wav in enumerate(self.samples_wav.iterdir()): | ||
if i == size: | ||
break | ||
self._copy_sample(wav) | ||
|
||
def create_vals(self, size, random=False): | ||
self._make_dirs() | ||
if random: | ||
self._create_random(size) | ||
else: | ||
self._create_iterate(size) | ||
self._create_manifest(prefix='val') |
Oops, something went wrong.