import requests
from bs4 import BeautifulSoup
import urllib.parse
import time
def get_scholar_profile(name):
"""
获取Google Scholar上第一个匹配的学者简介
"""
base_url = "<https://scholar.google.com/citations?hl=en&view_op=search_authors&mauthors=>"
search_url = base_url + urllib.parse.quote_plus(name)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
try:
print(f"\\n🔍 正在查询学者: {name}...")
response = requests.get(search_url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
# 提取第一个结果
first_result = soup.find('div', {'class': 'gs_ai gs_scl gs_ai_chpr'})
if not first_result:
print(f"⚠️ 未找到学者 {name} 的匹配结果")
return None
# 提取姓名和简介
scholar_name = first_result.find('h3', {'class': 'gs_ai_name'}).text.strip()
description = first_result.find('div', {'class': 'gs_ai_aff'}).text.strip()
print(f"✅ 成功获取 {name} 的信息")
return f"{scholar_name}\\n{description}"
except Exception as e:
print(f"❌ 查询 {name} 时出错: {str(e)}")
return None
# 待查询的学者名单
names = [
"Lianmin Zheng",
"Wei-Lin Chiang",
"Ying Sheng",
"Siyuan Zhuang",
"Zhanghao Wu",
"Yonghao Zhuang",
"Zi Lin",
"Zhuohan Li",
"Dacheng Li",
"E. Xing",
"Haotong Zhang"
]
print("=" * 50)
print("🏁 开始执行Google Scholar学者信息查询")
print(f"📋 待查询学者数量: {len(names)}人")
print("=" * 50)
# 获取并打印结果
results = []
for i, name in enumerate(names, 1):
print(f"\\n📊 进度: {i}/{len(names)}")
result = get_scholar_profile(name)
if result:
results.append(result)
print(result)
print("---")
time.sleep(2) # 避免请求过于频繁
print("\\n" + "=" * 50)
print("🎉 查询完成!结果汇总:")
print("=" * 50)
for i, result in enumerate(results, 1):
print(f"\\n结果 #{i}:")
print(result)
print("\\n" + "=" * 50)
print(f"📊 成功查询: {len(results)}/{len(names)} 位学者")
print("=" * 50)