-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathGLD_fast_pytorch.py
88 lines (79 loc) · 3.77 KB
/
GLD_fast_pytorch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#Optimization of random directions for Pytorch
#Has functions for converting the model weights to and from a 1D numpy array,
#and an (example) objective or error function for optimrandomdir
#Gerald Schuller, August 2019
#import optimrandomdir_parallel as optimrandomdir #with parallel cpu processing, good for complex error functions
#import optimrandomdir_parallel_linesearch as optimrandomdir
import GLD_fast #with no parallel cpu processing, good for simple error functions
import torch
import numpy as np
def model2weightsarray(model):
#turns the weights of model to a 1D numpy array,
#returns the numpy array
#Turn weights into 1D numpy array
#global model
weightsarray=[]
for param_tensor in model.state_dict():
#print(param_tensor, "\t", model.state_dict()[param_tensor].numpy().shape)
weights=model.cpu().state_dict()[param_tensor] #reading weights from model, convert to numpy
#print("weights=", weights)
weights=weights.numpy()
weightsfl=np.reshape(weights,(-1)) #flattened weights tensor
#print("weightsfl=", weightsfl)
weightsarray=np.hstack((weightsarray,weightsfl)) #make a long 1D vector out of it
#print("weightsarray=", weightsarray)
#print("weightsarray.shape=", weightsarray.shape)
return weightsarray
def weightsarray2model(weightsarray, model):
#turns a weights array to a pytorch model
#global model
#print("weightsarray=", weightsarray)
pointer=0
for param_tensor in model.state_dict():
sh=model.cpu().state_dict()[param_tensor].numpy().shape #shape of current layer
numnweights= np.prod(sh) #number of weights to read
weightsfl=weightsarray[pointer+np.arange(numnweights)] #flattened weights
weightsre=np.reshape(weightsfl,sh) #reconstructed tensor
#print("weightsre=", weightsre)
weightsretorch=torch.from_numpy(weightsre)
#print("weightsretorch=", weightsretorch)
model.state_dict()[param_tensor].data.copy_(weightsretorch) #write pytorch structure back to model
pointer+=numnweights
return
def torcherrorfunction(weightsarray, args):
#implements an error function for optimization of random direction
#weights needs to be a numpy tensor
#Return 1D numpy weightsarray back into pytorch structure:
#global model
#global loss_fn
#print("weightsarray torcherrorfunction=", weightsarray)
model=args[0]
loss_fn=args[1]
X=args[2]
Y=args[3]
weightsarray2model(weightsarray, model)
Ypred=model(X)#.cpu()
loss=loss_fn(Ypred, Y).item()#.cpu() #.detach().numpy()
#print("loss errfn:", loss)
return loss
def optimizer(model, loss_fn, X, Y, iterations=100, startingscale=1.0, endscale=0.0):
#This is the interface function for Pytorch betworks
#It replaces the optimizer for loop
#Arguments:
#model: object for the neural network whose weights are to be optimized
#loss_fn: the pytorch loss function
#X: Training set
#Y: Target
#iterations: the number of iteration for the method of random directions
#startingscale: starting standard deviation for the random steps in X
#endscale: The standard deviation is slowly reduced of the iterations to reach endscale
#The best values for the start- and end-scale depend on the network, some trials are useful.
weightsarray=model2weightsarray(model)
print("optimizer weightsarray.shape=", weightsarray.shape, "weightsarray=",weightsarray)
#optimrandomdir.iterations=iterations
#optimrandomdir.startingscale=startingscale
#optimrandomdir.endscale=endscale
#Here now the actual optimization of random directions:
weightsmin=GLD_fast.gldfast(torcherrorfunction, weightsarray, args=(model, loss_fn, X, Y), iterations=iterations, startingscale=startingscale)
#update model with new weights:
weightsarray2model(weightsmin, model)