import fitz

pdf_path = "彩色楼梯灯控制器产品规格书.pdf"
pdf = fitz.open(pdf_path)

print("分析PDF结构...")

for page_num in range(min(3, len(pdf))):  # 先看前3页
    page = pdf[page_num]
    print(f"\n=== 第 {page_num + 1} 页 ===")
    
    # 获取所有文本块
    blocks = page.get_text("dict")["blocks"]
    blocks.sort(key=lambda b: b["bbox"][1])  # 按Y坐标排序
    
    for i, block in enumerate(blocks[:10]):  # 只显示前10个块
        if "lines" in block:
            for line in block["lines"]:
                text = "".join([span["text"] for span in line["spans"]])
                if text.strip():
                    bbox = block["bbox"]
                    print(f"  Y={bbox[1]:.1f} | '{text.strip()[:50]}...'")
                    break

pdf.close()
