23
loading...
This website collects cookies to deliver better user experience
input: "hello worl"
input -> model -> "d" (next predicted character)
"a" = 0
"b" = 1
"c" = 2
...
[1, 0, 0, 0 ... 0, 0] //"a"
[0, 1, 0, 0 ... 0, 0] //"b"
data1.txt
data2.txt
data3.txt
...
data8.txt
input sequence: "tensorflow is coo"
output: "l"
def get_character_count():
#returns the number of possible characters
alphabet = get_alphabet()
return len(alphabet)
def get_alphabet():
#returns the list of all characters we will allow from our dataset (the lower case alphabet, spaces and new lines)
return list("abcdefghijklmnopqrstuvwxyz \n")
def text_to_vector(text):
#takes in a text and returns it as a sequence of one-hot encodings, representing each character in the text
alphabet = get_alphabet()
vector = []
for char in text:
if char.lower() in alphabet:
one_hot = [0] * get_character_count()
index = alphabet.index(char.lower())
one_hot[index] = 1
vector.append(one_hot)
return vector
def prep_dataset(file):
#this function takes the file name of where certain text data is stored and returns the input sequences array and output characters array
text = open(file, "r").read()
vec = text_to_vector(text) #one-hot encoding the text
xs = [] #input sequence array
ys = [] #output character array
i = 0
while i < len(vec) - 15: #loop for finding each substring of length 15
x = vec[i:i+15] #input sequence
y = vec[i+15] #output character
xs.append(x)
ys.append(y)
i += 1
return xs, ys
if __name__ == "__main__":
x = [] #input sequences
y = [] #output characters
for i in range(1, 9): #goes through all the dataset files and adds the inputs and outputs to x and y
a, b = prep_dataset(f"data{i}.txt")
for i in a:
x.append(i)
for i in b:
y.append(i)
def build_model():
model = tf.keras.Sequential([
tf.keras.layers.LSTM(128, input_dim=get_character_count(), return_sequences=True),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
tf.keras.layers.Dense(32),
tf.keras.layers.Dense(get_character_count(), activation="softmax")
])
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adam(0.01))
return model
def train_model(model, x, y):
print ("Training...")
model.fit(x, y, epochs=30)
model.save("save")
#train.py
import numpy as np
import tensorflow as tf
def get_character_count():
#returns the number of possible characters
alphabet = get_alphabet()
return len(alphabet)
def get_alphabet():
#returns the list of all characters we will allow from our dataset (the lower case alphabet, spaces and new lines)
return list("abcdefghijklmnopqrstuvwxyz \n")
def text_to_vector(text):
#takes in a text and returns it as a sequence of one-hot encodings, representing each character in the text
alphabet = get_alphabet()
vector = []
for char in text:
if char.lower() in alphabet:
one_hot = [0] * get_character_count()
index = alphabet.index(char.lower())
one_hot[index] = 1
vector.append(one_hot)
return vector
def prep_dataset(file):
#this function takes the file name of where certain text data is stored and returns the input sequences array and output characters array
text = open(file, "r").read()
vec = text_to_vector(text) #one-hot encoding the text
xs = [] #input sequence array
ys = [] #output character array
i = 0
while i < len(vec) - 15: #loop for finding each substring of length 15
x = vec[i:i+15] #input sequence
y = vec[i+15] #output character
xs.append(x)
ys.append(y)
i += 1
return xs, ys
def build_model():
model = tf.keras.Sequential([
tf.keras.layers.LSTM(128, input_dim=get_character_count(), return_sequences=True),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
tf.keras.layers.Dense(32),
tf.keras.layers.Dense(get_character_count(), activation="softmax")
])
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adam(0.01))
return model
def train_model(model, x, y):
print ("Training...")
model.fit(x, y, epochs=30)
model.save("save")
if __name__ == "__main__":
model = build_model()
x = [] #input sequences
y = [] #output characters
for i in range(1, 9): #goes through all the dataset files and adds the inputs and outputs to x and y
a, b = prep_dataset(f"data{i}.txt")
for i in a:
x.append(i)
for i in b:
y.append(i)
train_model(model, np.array(x, dtype=float), np.array(y, dtype=float))
1. Start with input sequence
2. Pass input sequence to model to predict next character
3. Add this character to the input sequence and drop off the first letter of the sequence
4. Repeat steps 2 and 3 however times you want to produce a set of lyrics
#run.py
import tensorflow as tf
import numpy as np
from train import get_alphabet, text_to_vector
from autocorrect import Speller
spell = Speller()
def gen_text(model, inp, len):
#inp = input sequence
#len = no. of characters to produce
alphabet = get_alphabet()
res = inp #final output
for i in range(len):
vec = text_to_vector(inp) #encoding the input
vec = np.expand_dims(vec, axis=0) #formatting it so it matches the input shape for our model
index = np.argmax(model.predict(vec)) #passing the input to our model
letter = alphabet[index] #decoding our output to a letter
res += letter #adding the letter to our output string
inp += letter #adding the letter to the input sequence
inp = inp[1:] #dropping off the first letter of input sequence
return spell(res) #return spell checked output
model = tf.keras.models.load_model("save")
while True:
print ("============================")
print (gen_text(model, input("Enter seed phrase: "), 500))
print ("============================")
python run.py
[...Tensorflow warnings]
============================
Enter seed phrase: Never will it mend
Never will it mend
now the truth of me
of live
all silence the exist
cannot kill the the family
battery
never
fire
to begin whipping one
no nothing no the matters breath
oh it so met mor the role me can see
and it just free the find
never will the time
nothing is the ear fire
truth wind to see
man me will the death
writing dawn aninimine in me
cannot justice the battery
pounding either as taken my stream
to the will is the existing there is bore
make it our lothenent
born one row the better the existing fro
============================
============================
Enter seed phrase: hold my battery of breath
hold my battery of breath of eyes to set death
oh straw hat your humanity
late the ust comes before but they su
never cared to be
i the estimate it life the lost fill dead
so red
so true
battery
no nothing life now i me crossing ftin
dare
so true myself in me
now pain i mean
so net would
to be
no ripped to are
so prmd
imply solute more is to you hear
taken my end
truth the within
so let it be worth
tro finding
something
mutilation cancellation cancellation
austin
so let it be resting spouses the stan
serve goth
============================