-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
74 lines (61 loc) · 2.78 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import torch
import torch.nn as nn
import random
# Read the dataset and preprocess the text
with open('dataset.txt', 'r') as file:
text = file.read().lower()
# Create a character-level vocabulary
chars = sorted(list(set(text)))
char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}
# Define the RNN model
class CharRNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size, num_layers=1):
super(CharRNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.embedding = nn.Embedding(input_size, hidden_size)
self.rnn = nn.RNN(hidden_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x, hidden):
batch_size = x.size(0)
x = self.embedding(x)
out, hidden = self.rnn(x, hidden)
out = out.contiguous().view(-1, self.hidden_size)
out = self.fc(out)
return out, hidden
# Set the hyperparameters
input_size = len(chars) # Number of unique characters in the dataset
output_size = len(chars) # Number of unique characters in the dataset
hidden_size = 512 # Increase hidden size for potentially more complex data
num_layers = 4 # Increase number of layers for potentially more complex data
seq_length = 350 # Increase sequence length to capture longer contexts
learning_rate = 0.0005 # Slightly reduce learning rate for better stability
num_epochs = 2000 # Adjust based on the convergence of the model and validation performance
# Initialize the model, loss function, and optimizer
model = CharRNN(input_size, hidden_size, output_size, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Training the model
for epoch in range(num_epochs):
# Randomly sample a sequence from the text
start_idx = random.randint(0, len(text) - seq_length - 1)
end_idx = start_idx + seq_length + 1
input_seq = text[start_idx:end_idx]
target_seq = text[start_idx + 1:end_idx + 1]
# Convert the input and target sequences to numerical representation
input_tensor = torch.tensor([char_to_idx[ch] for ch in input_seq], dtype=torch.long).unsqueeze(0)
target_tensor = torch.tensor([char_to_idx[ch] for ch in target_seq], dtype=torch.long)
# Initialize hidden state
hidden = torch.zeros(num_layers, 1, hidden_size)
# Forward pass
optimizer.zero_grad()
output, _ = model(input_tensor, hidden)
loss = criterion(output, target_tensor)
# Backward pass and optimization
loss.backward()
optimizer.step()
if (epoch+1) % 100 == 0:
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
# Save the model
torch.save(model.state_dict(), 'language_model.pth')