逆向之字体反爬-使用woff转图片再识别的方式通过

1.请求api接口通常会有woff的链接或者以base64的格式返回
2.把woff保存下来，通过代码把里面的每个字转为图片

from __future__ import print_function, division, absolute_import
from fontTools.ttLib import TTFont
from fontTools.pens.basePen import BasePen
from reportlab.graphics.shapes import Path
from reportlab.lib import colors
from reportlab.graphics import renderPM
from reportlab.graphics.shapes import Group, Drawing, scale

class ReportLabPen(BasePen):
    """A pen for drawing onto a reportlab.graphics.shapes.Path object."""

    def __init__(self, glyphSet, path=None):
        BasePen.__init__(self, glyphSet)
        if path is None:
            path = Path()
        self.path = path

    def _moveTo(self, p):
        (x, y) = p
        self.path.moveTo(x, y)

    def _lineTo(self, p):
        (x, y) = p
        self.path.lineTo(x, y)

    def _curveToOne(self, p1, p2, p3):
        (x1, y1) = p1
        (x2, y2) = p2
        (x3, y3) = p3
        self.path.curveTo(x1, y1, x2, y2, x3, y3)

    def _closePath(self):
        self.path.closePath()


def ttfToImage(fontName, imagePath, fmt="png"):
    font = TTFont(fontName)
    gs = font.getGlyphSet()
    glyphNames = font.getGlyphNames()
    for i in glyphNames:
        if i[0] == '.':  # 跳过'.notdef', '.null'
            continue
        g = gs[i]
        pen = ReportLabPen(gs, Path(fillColor=colors.black, strokeWidth=1))
        g.draw(pen)
        #这里加200像素为了留白
        w, h = g.width+200, g.width+200
        g = Group(pen.path)
        #偏移吧
        g.translate(200, 50)
        d = Drawing(w,h)
        d.add(g)
        imageFile = imagePath + "/" + i + ".png"
        renderPM.drawToFile(d, imageFile, fmt)

ttfToImage(fontName="./test.woff", imagePath="Img")

3.使用ddddocr识别文字

以下为猿人学比赛题第七题字体反爬代码实例：

from __future__ import print_function, division, absolute_import

import os
import base64

import ddddocr
import requests
from fontTools.ttLib import TTFont
from fontTools.pens.basePen import BasePen
from reportlab.graphics.shapes import Path
from reportlab.lib import colors
from reportlab.graphics import renderPM
from reportlab.graphics.shapes import Group, Drawing, scale
class ReportLabPen(BasePen):
    """A pen for drawing onto a reportlab.graphics.shapes.Path object."""

    def __init__(self, glyphSet, path=None):
        BasePen.__init__(self, glyphSet)
        if path is None:
            path = Path()
        self.path = path

    def _moveTo(self, p):
        (x, y) = p
        self.path.moveTo(x, y)

    def _lineTo(self, p):
        (x, y) = p
        self.path.lineTo(x, y)

    def _curveToOne(self, p1, p2, p3):
        (x1, y1) = p1
        (x2, y2) = p2
        (x3, y3) = p3
        self.path.curveTo(x1, y1, x2, y2, x3, y3)

    def _closePath(self):
        self.path.closePath()


def ttfToImage(fontName, imagePath, fmt="png"):
    font = TTFont(fontName)
    gs = font.getGlyphSet()
    glyphNames = font.getGlyphNames()
    for i in glyphNames:
        if i[0] == '.':  # 跳过'.notdef', '.null'
            continue
        g = gs[i]
        pen = ReportLabPen(gs, Path(fillColor=colors.black, strokeWidth=1))
        g.draw(pen)
        w, h = g.width+200, g.width+200
        g = Group(pen.path)
        g.translate(200, 50)
        d = Drawing(w,h)
        d.add(g)
        imageFile = imagePath + "/" + i + ".png"
        renderPM.drawToFile(d, imageFile, fmt)

dd=ddddocr.DdddOcr()
woff_path="./test.woff"
imagePath="./woff_img"
if not os.path.exists(imagePath):
    os.makedirs(imagePath)
headers = {'user-agent': 'yuanrenxue.project',"cookie":"sessionid=JinJiaoDaWang-alex"}
def req(page):
    params = (
        ('page', str(page)),
    )
    response = requests.get('https://match.yuanrenxue.com/api/match/7', headers=headers, params=params)
    res_json=response.json()
    woff=res_json["woff"]
    data=res_json["data"]
    with open(woff_path,"wb+") as f:
        f.write(base64.b64decode(woff))
    ttfToImage(fontName=woff_path, imagePath=imagePath)
    data2=data_handler(data)

    #删除woff转存的图片
    ls = os.listdir(imagePath)
    for i in ls:
        c_path = os.path.join(imagePath, i)
        os.remove(c_path)
    #
    return data2

mapp_list={}
def mapp_func(str):
    if str not in mapp_list.keys():
        # print("开始识别",str)
        try:
            with open(f"./{imagePath}/{str}.png","rb") as f:
                img=f.read()
                res = dd.classification(img_bytes=img)
                # print("识别结果",res)
                mapp_list[str]=res
                return res
        except Exception as e:
            print("无~",e)
    else:
        return mapp_list[str]
def data_handler(h_data):
    return_data = []
    for d in h_data:
        #{"value": "&#xf312 &#xb854 &#xe948 &#xf312 "}, {"value": "&#xc231 &#xf428 &#xf312 &#xe278 "},
        res_arr=str(d["value"]).split(" ")
        # print(res_arr[:-1])
        return_str=""
        for r in res_arr[:-1]:
            rr=r.replace("&#x","uni")
            # print(rr)
            rrr=mapp_func(rr)
            return_str+=rrr
        return_data.append({"value":return_str})
    return return_data

if __name__ == '__main__':
    for a in range(1,6):
        data=req(a)
        print(data)

逆向之字体反爬-使用woff转图片再识别的方式通过

评论 (0)