| |
| import tensorflow as tf |
| import numpy as np |
| from keras.utils import np_utils |
| import matplotlib.pyplot as plt |
| |
| |
| tf.reset_default_graph() |
| |
| tf.set_random_seed(1) |
| |
| |
| |
| |
| |
| |
| |
| learning_rate=0.1 |
| nepochs=10000 |
| embedding_dim=2 |
| window_size = 1 |
| |
| |
| text ="King is a brave man Queen is a beautiful woman" |
| |
| |
| text = text.lower() |
| |
| word_seq = [] |
| for word in text.split(): |
| if ((word != '.') & (word not in '0123456789')& (word not in ['a','is', 'the'] )): |
| word_seq.append(word) |
| |
| word_seq |
| |
| unique_words = set(word_seq) |
| n_unique_words = len(unique_words) |
| unique_words |
| |
| |
| word_to_int = {w: i for i, w in enumerate(unique_words)} |
| int_to_word = {i: w for i, w in enumerate(unique_words)} |
| print(word_to_int) |
| print(int_to_word) |
| |
| data = [] |
| for i in range(1, len(word_seq) - 1): |
| |
| target = word_seq[i] |
| print("target : ",target) |
| neighbor=[] |
| for j in range(window_size): |
| neighbor.append(word_seq[i - j-1]) |
| print(neighbor) |
| neighbor.append(word_seq[i + j+ 1]) |
| print(neighbor) |
| |
| for w in neighbor: |
| data.append([w, target]) |
| print("data : ", data) |
| |
| x_train = [] |
| y_train = [] |
| |
| |
| for w in data: |
| x_train.append(np_utils.to_categorical(word_to_int[w[0]] , n_unique_words)) |
| y_train.append(np_utils.to_categorical(word_to_int[w[1]] , n_unique_words)) |
| |
| print(x_train) |
| print(y_train) |
| |
| x_train = np.asarray(x_train) |
| y_train = np.asarray(y_train) |
| print(x_train) |
| print(y_train) |
| |
| X = tf.placeholder(tf.float32, shape=(None, n_unique_words)) |
| Y = tf.placeholder(tf.float32, shape=(None, n_unique_words)) |
| |
| |
| |
| W1 = tf.Variable(tf.random_normal([n_unique_words, embedding_dim])) |
| print("w1 : " ,W1.get_shape()) |
| b1 = tf.Variable(tf.random_normal([embedding_dim])) |
| print("b1 : " ,b1.get_shape()) |
| |
| hidden_representation = tf.add(tf.matmul(X,W1), b1) |
| print(hidden_representation.get_shape()) |
| |
| W2 = tf.Variable(tf.random_normal([embedding_dim, n_unique_words])) |
| print("W2 : " ,b1.get_shape()) |
| b2 = tf.Variable(tf.random_normal([n_unique_words])) |
| print("b2 : " ,b1.get_shape()) |
| |
| |
| prediction = tf.nn.softmax(tf.add( tf.matmul(hidden_representation, W2), b2)) |
| print(prediction.get_shape()) |
| |
| cross_entropy_loss = tf.reduce_mean(-tf.reduce_sum(Y * tf.log(prediction), reduction_indices=[1])) |
| |
| |
| train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy_loss) |
| |
| |
| |
| sess = tf.Session() |
| init = tf.global_variables_initializer() |
| sess.run(init) |
| losses=[] |
| for epoch in range(nepochs): |
| sess.run(train_step, feed_dict={X: x_train, Y: y_train}) |
| loss=sess.run(cross_entropy_loss, feed_dict={X: x_train, Y: y_train}) |
| if epoch%100==0: |
| print('epoch={}, loss = {}' .format(epoch, loss)) |
| losses.append(loss) |