接口:
https://cx.shouji.360.cn/?number=str(电话号码)
界面推荐理由
- 在上一篇文章中,我们对https很有用://www.so.com/s?ie=utf-8&q="+ str(电话号码)试着得到我们想要的结果,这是可用的,但是接口反爬机制是严格的,不能得到几个就关闭,需要建立代理ip池和间隔访问时间。
- 这个接口非常稳定,基本上没有反爬机制,我们可以在没有代理ip的情况下完成我们想要的结果,更简单、稳定、高效。
如果是非电话号码,可以自动取消座机或手机
结果展示: 代码
# coding: utf-8# -*- coding: utf-8 -*-import urllib.requestimport urllib.parsefrom urllib.error import HTTPError ,URLErrorimport socket #请求超时异常fromm bs4 import BeautifulSoupimport requestsimport timeimport jsonimport csvimport reimport randomimport datetimeimport pandas as pddef response(url): try: headers = { } headers["User-Agent"]="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36" #random.seed(datetime.datetime.now()) req = urllib.request.Request(url, headers=headers) bsobj = urllib.request.urlopen(req, timeout=60).read().decode("utf-8") #bsobj = BeautifulSoup(html,"html.parser") jsonobj = json.loads(bsobj) #print(jsonobj) provinces = jsonobj.get("data").get("province") if len(provinces) > 0: province = provinces else: if len(provinces) < 1 and len(str(re.match('00852', telephone))) != 4: province = "香港" elif len(provinces) < 1 and len(str(re.match('00886', telephone)))!= 4: province = "台湾" else: province = "未知" citys = jsonobj.get("data").get("city") if len(citys)>0: city = citys else: city = province sps = jsonobj.get("data").get("sp") if len(sps)>0: sp = sps elif len(sps)<1 and len(provinces)>0: sp = "座机" else: sp = province print(telephone,province,city,sp) writer.writerows([[str(telephone),province,city,sp]]) except (HTTPError, URLError, socket.timeout, AttributeError,UnicodeEncodeError,TypeError) as e: returnif __name__ == '__main__': # 创建一个存储数据的表 file_name = r"D:\Case_data/telephone_r" + ".csv" f = open(file_name, "w+", newline='',encoding = 'gb18030') writer = csv.writer(f, dialect='excel') # 首先写入columns_name writer.writerow(电话, '省份', '市级', ‘分类’) # 打开数据源 read_file_path=r'D:/Case_data/telephone.xlsx' df = pd.read_excel(read_file_path,encoding='utf-8') df[[“电话”] = df[[“电话”].replace('-', "", regex=True) #替换单个值 display(df.tail(10)) for telephone in df[[“电话”]: #print(telephone) time.sleep(0) urls = r"https://cx.shouji.360.cn/phonearea.图灵?number=" + str(telephone) #print(urls) response(urls)f.close()
更多的数据分析和操作知识
在这里,随时学习干货!