import requests
from bs4 import BeautifulSoup
import urllib.parse
import time

def get_scholar_profile(name):
    """
    获取Google Scholar上第一个匹配的学者简介
    """
    base_url = "<https://scholar.google.com/citations?hl=en&view_op=search_authors&mauthors=>"
    search_url = base_url + urllib.parse.quote_plus(name)

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }

    try:
        print(f"\\n🔍 正在查询学者: {name}...")
        response = requests.get(search_url, headers=headers)
        soup = BeautifulSoup(response.text, 'html.parser')

        # 提取第一个结果
        first_result = soup.find('div', {'class': 'gs_ai gs_scl gs_ai_chpr'})
        if not first_result:
            print(f"⚠️ 未找到学者 {name} 的匹配结果")
            return None

        # 提取姓名和简介
        scholar_name = first_result.find('h3', {'class': 'gs_ai_name'}).text.strip()
        description = first_result.find('div', {'class': 'gs_ai_aff'}).text.strip()

        print(f"✅ 成功获取 {name} 的信息")
        return f"{scholar_name}\\n{description}"

    except Exception as e:
        print(f"❌ 查询 {name} 时出错: {str(e)}")
        return None

# 待查询的学者名单
names = [
    "Lianmin Zheng",
    "Wei-Lin Chiang",
    "Ying Sheng",
    "Siyuan Zhuang",
    "Zhanghao Wu",
    "Yonghao Zhuang",
    "Zi Lin",
    "Zhuohan Li",
    "Dacheng Li",
    "E. Xing",
    "Haotong Zhang"
]

print("=" * 50)
print("🏁 开始执行Google Scholar学者信息查询")
print(f"📋 待查询学者数量: {len(names)}人")
print("=" * 50)

# 获取并打印结果
results = []
for i, name in enumerate(names, 1):
    print(f"\\n📊 进度: {i}/{len(names)}")
    result = get_scholar_profile(name)
    if result:
        results.append(result)
        print(result)
        print("---")
    time.sleep(2)  # 避免请求过于频繁

print("\\n" + "=" * 50)
print("🎉 查询完成!结果汇总:")
print("=" * 50)
for i, result in enumerate(results, 1):
    print(f"\\n结果 #{i}:")
    print(result)

print("\\n" + "=" * 50)
print(f"📊 成功查询: {len(results)}/{len(names)} 位学者")
print("=" * 50)