在上一篇博客里面。我们已经把智联的招聘信息存入到数据库(mysql)里面。
接下来我们就需要对里面需要的信息进入 筛选和显示。
直接上代码:
import jieba
import matplotlib.pyplot as plt
import jieba
from wordcloud import WordCloud
import pymysql
import urllib
import bs4
from urllib import request
from bs4 import BeautifulSoup as bs
#数据库定义
class selectMysql(object):
def select_data(self):
list1=[]
#创建数据库链接
conn =pymysql.connect(host="127.0.0.1",port=3306,user="root",passwd="zfno11",db="job",charset='utf8')
#创建游标
sql = 'select jobname from job '
cursor = conn.cursor()
try:
cursor.execute(sql)
#提交,不然无法保存新建或者修改的数据
except MySQLdb.Error:
print("数据库执行语句异常")
finally:
alldata = cursor.fetchall()
conn.commit()
#关闭游标
cursor.close()
conn.close()
for i in alldata:
list1.append(i)
return(list1)
#返回获取到的字符串结果
def get_result(self,list1):
self.list1=list1
with open("shuju",'w') as f:
for i in self.list1:
b= str(i).replace("(","").replace(")","").replace("-","").replace(",","").replace(" ","").replace("("," ").replace(")"," ").replace("'","")
f.write(b +'\n')
f.close()
# 创建停用词list
def stopwordslist(filepath):
stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]
return stopwords
#去掉空格
# 对句子进行分词
def seg_sentence(sentence):
sentence_seged = jieba.cut(sentence.strip())
stopwords = stopwordslist('stopwords.txt') # 这里加载停用词的路径
outstr1 = ''
outstr = ''
for word in sentence_seged:
if word not in stopwords:
if word != '\t':
outstr += word
outstr1 += " "
#去重
for i in outstr1:
if i not in outstr1:
outstr.append(i)
return outstr
inputs = open('shuju', 'r', encoding='utf-8')
outputs = open('output.txt', 'w')
for line in inputs:
line_seg = seg_sentence(line) # 这里的返回值是字符串
outputs.write(line_seg + '\n')
outputs.close()
inputs.close()
###以上部分完成了从数据库读取jobname然后存入到文本shuju里面,最后对文本进入简单的处理
class ciyun(object):
def __init__(self):
pass
def ciyundis(self):
text = open("output.txt",'rb').read()
wordlist = jieba.cut(text,cut_all=True)
w1= " ".join(wordlist)
# print(w1)
#词云
backgroud_Image=plt.imread('timg.jpg')
wc = WordCloud(background_color = "black", #设置背景颜色
mask = backgroud_Image, #设置背景图片
max_words = 50, #设置最大显示的字数
#stopwords = "", #设置停用词
font_path = "/usr/share/fonts/simfang.ttf",
#设置中文字体,使得词云可以显示(词云默认字体是“DroidSansMono.ttf字体库”,不支持中文)
max_font_size = 150, #设置字体最大值
random_state = 30, #设置有多少种随机生成状态,即有多少种配色方案
)
myword = wc.generate(w1) #生成词云
plt.imshow(myword)
plt.axis("off")
plt.savefig('pink.png', dpi=700) #700是像素,像素越高,放大越清楚
plt.show()
#data= selectMysql()
#EEE=data.select_data()
#data.get_result(EEE)
ciyun1=ciyun()
ciyun1.ciyundis()
执行结果截图: