import instructor from pydantic import BaseModel from openai import OpenAI
# Define your desired output structure classExtractUser(BaseModel): name: str age: int
# Patch the OpenAI client client = instructor.from_openai(OpenAI())
# Extract structured data from natural language res = client.chat.completions.create( model="gpt-4o-mini", response_model=ExtractUser, messages=[{"role": "user", "content": "John Doe is 30 years old."}], )
classSimpleMemoryPlugin(sm.BasePlugin): def__init__(self): self.memories = [ "the earth has fictionally beeen destroyed.", "the moon is made of cheese.", ]
defyield_memories(self): return (m for m in self.memories)
defpre_send_hook(self, conversation: sm.Conversation): for m in self.yield_memories(): conversation.add_message(role="system", text=m)
defget_weather( location: Annotated[ str, Field(description="The city and state, e.g. San Francisco, CA") ], unit: Annotated[ Literal["celcius", "fahrenheit"], Field( description="The unit of temperature, either 'celsius' or 'fahrenheit'" ), ] = "celcius", ): """ Get the current weather in a given location """ returnf"42 {unit}"
# Add your function as a tool conversation = sm.create_conversation() conversation.add_message("user", "What's the weather in San Francisco?") response = conversation.send(tools=[get_weather])
- 对于详情页,返回如下JSON结构: \`\`\`json { "page_type": "detail", "fields": [ {"field_name": "title", "xpath": "XPath to the title"}, {"field_name": "author", "xpath": "XPath to the author"}, {"field_name": "publish_time", "xpath": "XPath to the publish time"}, {"field_name": "content", "xpath": "XPath to the content"} ] } \`\`\`
llm: model:"deepseek-chat" temperature:0 api_key:<你的APIKEY># Your API key, if the API requires it api_base:https://api.deepseek.com# The URL where an OpenAI-compatible server is running to handle LLM API requests # api_version: ... # The version of the API (this is primarily for Azure) max_output:4096# The maximum characters of code output visible to the LLM # Computer Settings computer: import_computer_api:True# Gives OI a helpful Computer API designed for code interpreting language models
multi_line:True# If True, you can input multiple lines starting and ending with ```
有一个好的爬虫代理,就能爬取绝大多数的网站。国内的代理供应商,一般隧道代理都是按并发数收费,性能都差不多。但国外的代理,不知道哪根筋不对,全都是按流量收费的。我调研了十多个海外代理供应商,最后综合评测下来亮代理还不错,虽然也是按流量收费,但代理可用性确实非常高。有兴趣的同学可以试一试,他们提供免费试用:Proxy - Bright Data