데이터관련 교육 카페

https://www.tensorflow.org/tutorials/text/nmt_with_attention?hl=ko 번역 신경망 구조 encoder와 decoder <img src="https://img1.daumcdn.net/relay/cafe/original/?fname=https%3A%2F%2Fwww.tensorflow.org%2Fimages%2Fseq2seq%2Fattention_mechanism.jpg%3Fhl%3Dko" alt="주의 메커니즘" width="500"> 인코더에서 저장된 기억셀(c)값은 인코더 안의 lstm에서만 활성화되고 마지막 노드에서 소멸결과 h값만 디코더에 전달하여 예측 반영 # 인코더 구현 (입력 신경망 구현) <pre class="lang-python" dir="ltr"><code dir="ltr">class Encoder(tf.keras.Model):   def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):     super(Encoder, self).__init__()     self.batch_sz = batch_sz     self.enc_units = enc_units     self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)     self.gru = tf.keras.layers.GRU(self.enc_units,                                    return_sequences=True,                                    return_state=True,                                    recurrent_initializer='glorot_uniform')   def call(self, x, hidden):     x = self.embedding(x)     output, state = self.gru(x, initial_state = hidden)     return output, state   def initialize_hidden_state(self):     return tf.zeros((self.batch_sz, self.enc_units)) </code></pre><pre class="lang-python" dir="ltr"><code dir="ltr">encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE) # sample input sample_hidden = encoder.initialize_hidden_state() sample_output, sample_hidden = encoder(example_input_batch, sample_hidden) ## 디코더 구현 (번역 신경망 구현) </code> <code dir="ltr"><code dir="ltr">class Decoder(tf.keras.Model):   def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):     super(Decoder, self).__init__()     self.batch_sz = batch_sz     self.dec_units = dec_units     self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)     self.gru = tf.keras.layers.GRU(self.dec_units,                                    return_sequences=True,                                    return_state=True,                                    recurrent_initializer='glorot_uniform')     self.fc = tf.keras.layers.Dense(vocab_size)     # used for attention     self.attention = BahdanauAttention(self.dec_units)   def call(self, x, hidden, enc_output):     # enc_output shape == (batch_size, max_length, hidden_size)     context_vector, attention_weights = self.attention(hidden, enc_output)     # x shape after passing through embedding == (batch_size, 1, embedding_dim)     x = self.embedding(x)     # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)     x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)     # passing the concatenated vector to the GRU     output, state = self.gru(x)     # output shape == (batch_size * 1, hidden_size)     output = tf.reshape(output, (-1, output.shape[2]))     # output shape == (batch_size, vocab)     x = self.fc(output)     return x, state, attention_weights </code></code></pre><code dir="ltr">decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE) sample_decoder_output, _, _ = decoder(tf.random.uniform((BATCH_SIZE, 1)),                                       sample_hidden, sample_output) print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape)) </code><pre class="lang-python" dir="ltr"> ## 옵티마이저 및 오차함수 설정 <code dir="ltr">optimizer = tf.keras.optimizers.Adam() # Adam optimizer loss_object = tf.keras.losses.SparseCategoricalCrossentropy( # 교차 엔트로피 오차함수     from_logits=True, reduction='none') def loss_function(real, pred):   mask = tf.math.logical_not(tf.math.equal(real, 0))   loss_ = loss_object(real, pred)   mask = tf.cast(mask, dtype=loss_.dtype)   loss_ *= mask   return tf.reduce_mean(loss_) </code> 이후 체크포인트 및 에폭 설정은 정확도에 따라 변경 ## 모델 훈련<pre class="lang-python" dir="ltr"><code dir="ltr">@tf.function # 즉시실행! def train_step(inp, targ, enc_hidden):   loss = 0   with tf.GradientTape() as tape:     enc_output, enc_hidden = encoder(inp, enc_hidden)     dec_hidden = enc_hidden     dec_input = tf.expand_dims([targ_lang.word_index['<start>']] * BATCH_SIZE, 1)     # Teacher forcing - feeding the target as the next input     for t in range(1, targ.shape[1]):       # 영문 입력된 데이터(encoder 계산값)을 스페인어 번역 노드(decoder로 전달)       predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)       loss += loss_function(targ[:, t], predictions)       # using teacher forcing       dec_input = tf.expand_dims(targ[:, t], 1)   batch_loss = (loss / int(targ.shape[1]))   variables = encoder.trainable_variables + decoder.trainable_variables   gradients = tape.gradient(loss, variables)   optimizer.apply_gradients(zip(gradients, variables))   return batch_loss</code></pre><pre class="lang-python" dir="ltr"><code dir="ltr">## 학습 에폭 설정 및 체크포인트 설정 EPOCHS = 10 for epoch in range(EPOCHS):   start = time.time()   enc_hidden = encoder.initialize_hidden_state()   total_loss = 0   for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):     batch_loss = train_step(inp, targ, enc_hidden)     total_loss += batch_loss     if batch % 100 == 0:       print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,                                                    batch,                                                    batch_loss.numpy()))   # saving (checkpoint) the model every 2 epochs   if (epoch + 1) % 2 == 0:     checkpoint.save(file_prefix = checkpoint_prefix)   print('Epoch {} Loss {:.4f}'.format(epoch + 1,                                       total_loss / steps_per_epoch))   print('Time taken for 1 epoch {} sec\n'.format(time.time() - start)) </code></pre> 결과 예시 - 분산 행렬로 가장 높은 확률값을 통한 문장 번역 "¿ todavia estan en casa?" -> "are you still at home?" ### 문장 입력시 시작값과 끝 값에 대한 벡터값도 존재 <img src="https://img1.daumcdn.net/relay/cafe/original/?fname=https%3A%2F%2Fwww.tensorflow.org%2Ftutorials%2Ftext%2Fnmt_with_attention_files%2Foutput_A3LLCx3ZE0Ls_1.png%3Fhl%3Dko" alt="png"></pre>