Skip to content

Latest commit

 

History

History
59 lines (58 loc) · 9.5 KB

2025-01-10-gpt-4o-mini-2024-07-18-instructor-results.md

File metadata and controls

59 lines (58 loc) · 9.5 KB

Benchmark Results (model=gpt-4o-mini-2024-07-18, date=2025-01-10)

Domain Response Model Sample Response JSON
document.invoice Invoice
document.receipt Receipt
{
"receipt_id": null,
"transaction_date": "2021-01-26T22:36:22",
"merchant_name": "Walmart",
"merchant_address": {
"street": "8060 W TROPICAL PKWY",
"city": "LAS VEGAS",
"state": "NV",
"postal_code": "89149",
"country": null
},
"merchant_phone": null,
"cashier_name": "SARAH",
"register_number": "35",
"customer_name": null,
"customer_id": null,
"items": [
{
"description": "BOYS CREW",
"quantity": 1.0,
"unit_price": 9.48,
"total_price": 9.48
},
{
"description": "BOYS SOCKS",
"quantity": 1.0,
"unit_price": 6.97,
"total_price": 6.97
},
{
"description": "BOXER BRIEF",
"quantity": 1.0,
"unit_price": 10.98,
"total_price": 10.98
}
],
"subtotal": 27.43,
"tax": 2.3,
"total": 29.73,
"currency": "USD",
"payment_method": {
"type": "Debit",
"card_last_4": "****",
"card_type": null
},
"discount_amount": null,
"discount_description": null,
"tip_amount": null,
"return_policy": null,
"barcode": null,
"additional_charges": null,
"notes": null,
"others": null
}
document.resume Resume
document.us-drivers-license USDriversLicense
{
"issuing_state": "MT",
"license_number": "0812319684104",
"full_name": "Brenda Lynn Sample",
"first_name": "Brenda",
"middle_name": "Lynn",
"last_name": "Sample",
"address": {
"street": "123 MAIN STREET",
"city": "HELENA",
"state": "MT",
"zip_code": "59601"
},
"date_of_birth": "1968-08-04",
"gender": "F",
"height": "5'06\"",
"weight": 150.0,
"eye_color": "BRO",
"hair_color": null,
"issue_date": "2015-02-15",
"expiration_date": "2023-08-04",
"license_class": "D",
"donor": null,
"veteran": null
}
document.utility-bill UtilityBill
{
"account_number": "1234567890-1",
"date_mailed": "2019-09-07",
"service_for": "SPARKY JOULE",
"service_address": {
"street": "12345 ENERGY CT",
"city": null,
"state": null,
"zip_code": null
},
"billing_period_start": null,
"billing_period_end": null,
"date_due": "2019-09-28",
"amount_due": 88.14,
"previous_balance": 0.0,
"payment_received": 91.57,
"current_charges": 88.14,
"breakdown_of_charges": [
{
"description": "Current PG&E Electric Delivery Charges",
"amount": 55.66,
"usage": null,
"rate": null
},
{
"description": "Silicon Valley Clean Energy Electric Generation Charges",
"amount": 32.48,
"usage": null,
"rate": null
}
],
"payment_options": [
"www.pge.com/waystopay"
],
"contact_information": {
"phone": "1-800-743-5000",
"website": "www.pge.com/MyEnergy"
}
}
document.w2-form W2Form
{
"control_number": "GNI851",
"ein": "63-0065650",
"ssn": "554-03-0876",
"employee_name": "Anastasia Hodges",
"employee_address": {
"street": "200 2nd Street NE",
"city": "Waseca",
"state": "MN",
"zip_code": "56093"
},
"employer_name": "NORTH 312",
"employer_address": {
"street": "151 N Market Street",
"city": "Wooster",
"state": "OH",
"zip_code": "44691"
},
"wages_tips_other_compensation": 23677.7,
"federal_income_tax_withheld": 2841.32,
"social_security_wages": 24410.0,
"social_security_tax_withheld": 1513.42,
"medicare_wages_and_tips": 24410.0,
"medicare_tax_withheld": 353.95,
"tax_year": 2020
}
aerospace.remote-sensing RemoteSensing
{
"description": "The satellite image captures a coastal urban area adjacent to a busy port. The scene includes residential neighborhoods, a marina, and extensive shipping facilities with numerous cargo containers visible. The waterway is active with vessels, indicating significant maritime activity.",
"objects": [
"residential buildings",
"marina",
"cargo containers",
"shipping docks",
"waterway",
"roads",
"beach"
],
"categories": [
"commercial-area",
"port",
"residential-area",
"water-treatment",
"beach"
],
"is_visible": true
}
healthcare.medical-insurance-card MedicalInsuranceCard
{
"provider_service": {
"provider_service_number": null,
"precertification_number": null
},
"member_information": {
"member_name": "Member Name",
"member_id": "XY2 123456789",
"group_number": "023457"
},
"pharmacy_plan": {
"rx_bin": "987654",
"rx_pcn": null,
"rx_grp": "HIOPT",
"pharmacy_help_desk": null
},
"insurance_provider": {
"provider_name": "BlueCross BlueShield",
"network": "PPO"
},
"coverage": {
"office_visit": "$15",
"specialist_visit": null,
"urgent_care": null,
"emergency_room": "$75",
"inpatient_hospital": null
}
}
retail.ecommerce-product-caption RetailEcommerceProductCaption
{
"description": "The Kindle Paperwhite features a 6.8\" display and adjustable warm light for a comfortable reading experience. It is designed for easy portability and offers a sleek black finish.",
"rating": 85,
"name": "Kindle Paperwhite",
"brand": "Amazon",
"category": "Electronics / E-readers",
"price": "$139.99",
"color": "Black"
}
media.tv-news TVNews
{
"description": "A news anchor presenting a segment about President Biden criticizing Netanyahu in an interview.",
"chyron": "Biden criticises Netanyahu in an interview",
"network": "BBC News",
"reporters": null
}
media.nfl-game-state NFLGameState
{
"description": null,
"teams": [
{
"name": "GB",
"score": 0
},
{
"name": "AZ",
"score": 7
}
],
"status": "in_progress",
"quarter": 2,
"clock_time": "12:12",
"possession_team": "GB",
"down": "2nd",
"distance": 10,
"yard_line": -10,
"network": "NBC",
"is_shown": true
}
media.nba-game-state NBAGameState
{
"description": null,
"teams": [
{
"name": "MIA",
"score": 7
},
{
"name": "SA",
"score": 6
}
],
"status": "in_progress",
"quarter": 1,
"clock_time": "9:09",
"shot_clock": 11,
"network": "ESPN",
"is_shown": true
}