Skip to content

Latest commit

 

History

History
59 lines (58 loc) · 11.4 KB

2025-01-10-gpt-4o-2024-11-20-instructor-results.md

File metadata and controls

59 lines (58 loc) · 11.4 KB

Benchmark Results (model=gpt-4o-2024-11-20, date=2025-01-10)

Domain Response Model Sample Response JSON
document.invoice Invoice
{
"invoice_id": "9999999",
"period_start": null,
"period_end": null,
"invoice_issue_date": "2023-11-11",
"invoice_due_date": null,
"order_id": null,
"customer_id": null,
"issuer": "LOGO GOES HERE",
"issuer_address": {
"street": "123 Main Street",
"city": "Anytown",
"state": "US",
"postal_code": "01234",
"country": null
},
"customer": "Fred Davis",
"customer_email": "info@invoice.com",
"customer_phone": "(800) 123-4567",
"customer_billing_address": {
"street": "1335 Martin Luther King Jr Ave",
"city": "Dunedin",
"state": "FL",
"postal_code": "34698",
"country": null
},
"customer_shipping_address": {
"street": "249 Windward Passage",
"city": "Clearwater",
"state": "FL",
"postal_code": "33767",
"country": null
},
"items": [
{
"description": "Service",
"quantity": 1,
"currency": null,
"unit_price": 200.0,
"total_price": 200.0
},
{
"description": "Parts AAA",
"quantity": 1,
"currency": null,
"unit_price": 100.0,
"total_price": 100.0
},
{
"description": "Parts BBB",
"quantity": 2,
"currency": null,
"unit_price": 50.0,
"total_price": 100.0
}
],
"subtotal": 400.0,
"tax": null,
"total": 400.0,
"currency": null,
"notes": null
}
document.receipt Receipt
{
"receipt_id": "70CSCB10738BP",
"transaction_date": "2021-01-26T22:36:26",
"merchant_name": "Walmart",
"merchant_address": {
"street": "8060 W Tropical Pkwy",
"city": "Las Vegas",
"state": "NV",
"postal_code": "89149",
"country": null
},
"merchant_phone": "702-839-3620",
"cashier_name": "Sarah",
"register_number": "35",
"customer_name": null,
"customer_id": null,
"items": [
{
"description": "Boys Crew",
"quantity": 1.0,
"unit_price": 9.48,
"total_price": 9.48
},
{
"description": "Boys Socks",
"quantity": 1.0,
"unit_price": 6.97,
"total_price": 6.97
},
{
"description": "Boxer Brief",
"quantity": 1.0,
"unit_price": 10.98,
"total_price": 10.98
}
],
"subtotal": 27.43,
"tax": 2.3,
"total": 29.73,
"currency": "USD",
"payment_method": {
"type": "Debit",
"card_last_4": null,
"card_type": "US Debit"
},
"discount_amount": null,
"discount_description": null,
"tip_amount": null,
"return_policy": null,
"barcode": "5455 5110 5197 3924 8502",
"additional_charges": null,
"notes": "Low Prices You Can Trust. Every Day.",
"others": {
"terminal_id": "SC162890",
"approval_code": "817522",
"network_id": "0056",
"reference_number": "120800300965"
}
}
document.resume Resume
document.us-drivers-license USDriversLicense
{
"issuing_state": "MT",
"license_number": "0812319684104",
"full_name": "Brenda Lynn Sample",
"first_name": "Brenda",
"middle_name": "Lynn",
"last_name": "Sample",
"address": {
"street": "123 Main Street",
"city": "Helena",
"state": "MT",
"zip_code": "59601"
},
"date_of_birth": "1968-08-04",
"gender": "F",
"height": "5'06\"",
"weight": 150.0,
"eye_color": "BRO",
"hair_color": null,
"issue_date": "2015-02-15",
"expiration_date": "2023-08-04",
"license_class": "D",
"donor": null,
"veteran": null
}
document.utility-bill UtilityBill
{
"account_number": "1234567890-1",
"date_mailed": "2019-09-07",
"service_for": "SPARKY JOULE",
"service_address": {
"street": "12345 ENERGY CT",
"city": null,
"state": null,
"zip_code": null
},
"billing_period_start": null,
"billing_period_end": null,
"date_due": "2019-09-28",
"amount_due": 88.14,
"previous_balance": 91.57,
"payment_received": -91.57,
"current_charges": 88.14,
"breakdown_of_charges": [
{
"description": "Current PG&E Electric Delivery Charges",
"amount": 55.66,
"usage": null,
"rate": null
},
{
"description": "Silicon Valley Clean Energy Electric Generation Charges",
"amount": 32.48,
"usage": null,
"rate": null
}
],
"payment_options": [
"www.pge.com/waystopay"
],
"contact_information": {
"phone": "1-800-743-5000",
"hours": "Monday-Friday 7 a.m.-9 p.m., Saturday 8 a.m.-6 p.m.",
"website": "www.pge.com/MyEnergy"
}
}
document.w2-form W2Form
{
"control_number": "GNI851",
"ein": "63-0065650",
"ssn": "554-03-0876",
"employee_name": "Anastasia Hodges",
"employee_address": {
"street": "200 2nd Street NE",
"city": "Waseca",
"state": "MN",
"zip_code": "56093"
},
"employer_name": "NORTH 312",
"employer_address": {
"street": "151 N Market Street",
"city": "Wooster",
"state": "OH",
"zip_code": "44691"
},
"wages_tips_other_compensation": 23677.7,
"federal_income_tax_withheld": 2841.32,
"social_security_wages": 24410.0,
"social_security_tax_withheld": 1513.42,
"medicare_wages_and_tips": 24410.0,
"medicare_tax_withheld": 353.95,
"tax_year": 2020
}
aerospace.remote-sensing RemoteSensing
{
"description": "The satellite image shows a large port area with extensive infrastructure for shipping and logistics, adjacent to a densely populated urban area. The port includes container storage, docks, and industrial facilities, while the urban area features residential and commercial buildings.",
"objects": [
"shipping containers",
"docks",
"industrial facilities",
"residential buildings",
"commercial buildings",
"roads",
"ships",
"marina",
"beach"
],
"categories": [
"port",
"residential-area",
"industrial-area",
"beach"
],
"is_visible": true
}
healthcare.medical-insurance-card MedicalInsuranceCard
{
"provider_service": null,
"member_information": {
"member_name": "Member Name",
"member_id": "XYZ123456789",
"group_number": "023457"
},
"pharmacy_plan": {
"rx_bin": "987654",
"rx_pcn": null,
"rx_grp": null,
"pharmacy_help_desk": null
},
"insurance_provider": {
"provider_name": "BlueCross BlueShield",
"network": "PPO"
},
"coverage": {
"office_visit": "$15",
"specialist_visit": "$15",
"urgent_care": null,
"emergency_room": "$75",
"inpatient_hospital": null
}
}
retail.ecommerce-product-caption RetailEcommerceProductCaption
{
"description": "The Kindle Paperwhite features a 6.8-inch display with adjustable warm light, offering a larger screen and enhanced reading experience.",
"rating": 90,
"name": "Kindle Paperwhite (8 GB)",
"brand": "Amazon",
"category": "Electronics / E-readers",
"price": "$139.99",
"color": "Black"
}
media.tv-news TVNews
{
"description": "A news anchor is presenting a story in a studio setting with a blurred background. The chyron displays a headline about Biden criticizing Netanyahu in an interview.",
"chyron": "Biden criticises Netanyahu in an interview",
"network": "BBC News",
"reporters": null
}
media.nfl-game-state NFLGameState
{
"description": "Green Bay Packers vs. Arizona Cardinals game in progress.",
"teams": [
{
"name": "GB",
"score": 0
},
{
"name": "AZ",
"score": 7
}
],
"status": "in_progress",
"quarter": 2,
"clock_time": "12:12",
"possession_team": "GB",
"down": "2nd",
"distance": 10,
"yard_line": 10,
"network": "NBC",
"is_shown": true
}
media.nba-game-state NBAGameState
{
"description": "NBA Finals Game 1 between Miami Heat and San Antonio Spurs.",
"teams": [
{
"name": "MIA",
"score": 7
},
{
"name": "SA",
"score": 6
}
],
"status": "in_progress",
"quarter": 1,
"clock_time": "9:09",
"shot_clock": 11,
"network": "ESPN",
"is_shown": true
}