1.请求api接口 通常会有woff的链接或者以base64的格式返回
2.把woff保存下来,通过代码把里面的每个字转为图片
from __future__ import print_function, division, absolute_import
from fontTools.ttLib import TTFont
from fontTools.pens.basePen import BasePen
from reportlab.graphics.shapes import Path
from reportlab.lib import colors
from reportlab.graphics import renderPM
from reportlab.graphics.shapes import Group, Drawing, scale
class ReportLabPen(BasePen):
"""A pen for drawing onto a reportlab.graphics.shapes.Path object."""
def __init__(self, glyphSet, path=None):
BasePen.__init__(self, glyphSet)
if path is None:
path = Path()
self.path = path
def _moveTo(self, p):
(x, y) = p
self.path.moveTo(x, y)
def _lineTo(self, p):
(x, y) = p
self.path.lineTo(x, y)
def _curveToOne(self, p1, p2, p3):
(x1, y1) = p1
(x2, y2) = p2
(x3, y3) = p3
self.path.curveTo(x1, y1, x2, y2, x3, y3)
def _closePath(self):
self.path.closePath()
def ttfToImage(fontName, imagePath, fmt="png"):
font = TTFont(fontName)
gs = font.getGlyphSet()
glyphNames = font.getGlyphNames()
for i in glyphNames:
if i[0] == '.': # 跳过'.notdef', '.null'
continue
g = gs[i]
pen = ReportLabPen(gs, Path(fillColor=colors.black, strokeWidth=1))
g.draw(pen)
#这里加200像素为了留白
w, h = g.width+200, g.width+200
g = Group(pen.path)
#偏移吧
g.translate(200, 50)
d = Drawing(w,h)
d.add(g)
imageFile = imagePath + "/" + i + ".png"
renderPM.drawToFile(d, imageFile, fmt)
ttfToImage(fontName="./test.woff", imagePath="Img")
3.使用ddddocr识别文字
以下为 猿人学比赛题第七题 字体反爬 代码实例:
from __future__ import print_function, division, absolute_import
import os
import base64
import ddddocr
import requests
from fontTools.ttLib import TTFont
from fontTools.pens.basePen import BasePen
from reportlab.graphics.shapes import Path
from reportlab.lib import colors
from reportlab.graphics import renderPM
from reportlab.graphics.shapes import Group, Drawing, scale
class ReportLabPen(BasePen):
"""A pen for drawing onto a reportlab.graphics.shapes.Path object."""
def __init__(self, glyphSet, path=None):
BasePen.__init__(self, glyphSet)
if path is None:
path = Path()
self.path = path
def _moveTo(self, p):
(x, y) = p
self.path.moveTo(x, y)
def _lineTo(self, p):
(x, y) = p
self.path.lineTo(x, y)
def _curveToOne(self, p1, p2, p3):
(x1, y1) = p1
(x2, y2) = p2
(x3, y3) = p3
self.path.curveTo(x1, y1, x2, y2, x3, y3)
def _closePath(self):
self.path.closePath()
def ttfToImage(fontName, imagePath, fmt="png"):
font = TTFont(fontName)
gs = font.getGlyphSet()
glyphNames = font.getGlyphNames()
for i in glyphNames:
if i[0] == '.': # 跳过'.notdef', '.null'
continue
g = gs[i]
pen = ReportLabPen(gs, Path(fillColor=colors.black, strokeWidth=1))
g.draw(pen)
w, h = g.width+200, g.width+200
g = Group(pen.path)
g.translate(200, 50)
d = Drawing(w,h)
d.add(g)
imageFile = imagePath + "/" + i + ".png"
renderPM.drawToFile(d, imageFile, fmt)
dd=ddddocr.DdddOcr()
woff_path="./test.woff"
imagePath="./woff_img"
if not os.path.exists(imagePath):
os.makedirs(imagePath)
headers = {'user-agent': 'yuanrenxue.project',"cookie":"sessionid=JinJiaoDaWang-alex"}
def req(page):
params = (
('page', str(page)),
)
response = requests.get('https://match.yuanrenxue.com/api/match/7', headers=headers, params=params)
res_json=response.json()
woff=res_json["woff"]
data=res_json["data"]
with open(woff_path,"wb+") as f:
f.write(base64.b64decode(woff))
ttfToImage(fontName=woff_path, imagePath=imagePath)
data2=data_handler(data)
#删除woff转存的图片
ls = os.listdir(imagePath)
for i in ls:
c_path = os.path.join(imagePath, i)
os.remove(c_path)
#
return data2
mapp_list={}
def mapp_func(str):
if str not in mapp_list.keys():
# print("开始识别",str)
try:
with open(f"./{imagePath}/{str}.png","rb") as f:
img=f.read()
res = dd.classification(img_bytes=img)
# print("识别结果",res)
mapp_list[str]=res
return res
except Exception as e:
print("无~",e)
else:
return mapp_list[str]
def data_handler(h_data):
return_data = []
for d in h_data:
#{"value": " 롔   "}, {"value": "숱    "},
res_arr=str(d["value"]).split(" ")
# print(res_arr[:-1])
return_str=""
for r in res_arr[:-1]:
rr=r.replace("&#x","uni")
# print(rr)
rrr=mapp_func(rr)
return_str+=rrr
return_data.append({"value":return_str})
return return_data
if __name__ == '__main__':
for a in range(1,6):
data=req(a)
print(data)
评论 (0)