#!/usr/bin/env python

# -*- coding: utf-8 -*-

"""

----------------------------------

Version : ??

File Name : visual_vec.py

Description :

Author : xijun1

Email :

Date : 2018/12/25

-----------------------------------

Change Activiy : 2018/12/25

-----------------------------------

"""

__author__ = 'xijun1'

from tqdm import tqdm

import numpy as np

import tensorflow as tf

from tensorflow.contrib.tensorboard.plugins import projector

import os

import codecs

words, embeddings = [], []

log_path = 'model'

with codecs.open('/Users/xxx/github/python_demo/vec.txt', 'r') as f:

header = f.readline()

vocab_size, vector_size = map(int, header.split())

for line in tqdm(range(vocab_size)):

word_list = f.readline().split(' ')

word = word_list[0]

vector = word_list[1:-1]

if word == "":

continue

words.append(word)

embeddings.append(np.array(vector))

assert len(words) == len(embeddings)

print(len(words))

with tf.Session() as sess:

X = tf.Variable([0.0], name='embedding')

place = tf.placeholder(tf.float32, shape=[len(words), vector_size])

set_x = tf.assign(X, place, validate_shape=False)

sess.run(tf.global_variables_initializer())

sess.run(set_x, feed_dict={place: embeddings})

with codecs.open(log_path + '/metadata.tsv', 'w') as f:

for word in tqdm(words):

f.write(word + '\n')

# with summary

summary_writer = tf.summary.FileWriter(log_path, sess.graph)

config = projector.ProjectorConfig()

embedding_conf = config.embeddings.add()

embedding_conf.tensor_name = 'embedding:0'

embedding_conf.metadata_path = os.path.join('metadata.tsv')

projector.visualize_embeddings(summary_writer, config)

# save

saver = tf.train.Saver()

saver.save(sess, os.path.join(log_path, "model.ckpt"))

#结果:

Logo

永洪科技,致力于打造全球领先的数据技术厂商,具备从数据应用方案咨询、BI、AIGC智能分析、数字孪生、数据资产、数据治理、数据实施的端到端大数据价值服务能力。

更多推荐