1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
| import pymysql from wordcloud import WordCloud import matplotlib.pyplot as plt import jieba import os import re
def remove_stop_words(f): stop_words = ['0','|'] for stop_word in stop_words: f = f.replace(stop_word, ' ') return f
def create_word_cloud(f): print('ip解析记录!') base_dir = os.getcwd() print('当前目录 '+base_dir) ttf = '/System/Library/fonts/PingFang.ttc' FONT_PATH = os.environ.get("FONT_PATH", ttf) f = remove_stop_words(f) cut_text = " ".join(jieba.cut(f,cut_all=False, HMM=True)) wc = WordCloud( font_path=FONT_PATH, collocations=False, max_words=100, width=2000, height=1200, ) wordcloud = wc.generate(cut_text) plt.imshow(wordcloud) plt.axis("off") wordcloud.to_file("wordcloud.jpg") plt.show() def get_content_from_db(): print('连接数据库!') dbhost='localhost' dbuser='root' dbpass='123456' dbname='ip_info' db = pymysql.connect(host=dbhost,user=dbuser,password=dbpass,database=dbname) cursor = db.cursor() sql = "SELECT * FROM ip_registered " cursor.execute(sql) results = cursor.fetchall() content = '' for row in results: id = row[0] registeredIp = row[1] createTime = row[2] updateTime = row[3] registeredAddress = row[4] content = content + str(registeredAddress + "\n")
db.commit() db.close() return content content = get_content_from_db()
pattern = re.compile(r'<[^>]+>',re.S) content = pattern.sub('', content)
create_word_cloud(content)
|