fix eval/language tag for ChatEval paper

sotopia-lab · May 24, 2024 · 4bd7c01 · 4bd7c01
1 parent ecd5949
commit 4bd7c01
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 50 deletions.
diff --git a/components/data/chartData.tsx b/components/data/chartData.tsx
@@ -14,7 +14,7 @@ export const bar_data = [
   agents_with_personas: 24,
   human: 92,
   not_applicable: 148,
-  rule_based: 60,
+  rule_based: 59,
   more_than_three_agents: 35,
   more_information_asymmetrical: 2,
   prompting_and_in_context_learning: 47,
@@ -25,7 +25,7 @@ export const bar_data = [
   agents_with_memory: 16,
   more_omniscient: 7,
   pretraining: 18,
-  model_based: 45,
+  model_based: 46,
   simulated_humans: 14,
   agent_teams: 6,
   health: 18,
@@ -132,29 +132,29 @@ export const area_data = [
   human: 1,
   rule_based: 1,
   human_agent: 1,
-  pretraining: 0,
-  simulated_humans: 0,
-  education: 0,
   more_information_asymmetrical: 0,
+  qualitative: 0,
+  more_than_three_agents: 0,
+  finetuning: 0,
+  text: 0,
+  not_applicable: 0,
   model_based: 0,
-  two_agents: 0,
-  agent_teams: 0,
+  embodied: 0,
+  simulated_humans: 0,
   prompting_and_in_context_learning: 0,
-  agents_with_personas: 0,
-  qualitative: 0,
-  agents_with_memory: 0,
   fully_omniscient: 0,
-  policy: 0,
+  health: 0,
+  agent_teams: 0,
   virtual: 0,
   more_omniscient: 0,
-  text: 0,
-  embodied: 0,
-  finetuning: 0,
-  competition: 0,
-  health: 0,
-  more_than_three_agents: 0,
   implicit_objectives: 0,
-  not_applicable: 0,
+  two_agents: 0,
+  agents_with_personas: 0,
+  competition: 0,
+  policy: 0,
+  education: 0,
+  agents_with_memory: 0,
+  pretraining: 0,
 },
 {
   name: '2016',
@@ -176,18 +176,18 @@ export const area_data = [
   more_than_three_agents: 1,
   model_based: 1,
   education: 1,
-  pretraining: 0,
-  simulated_humans: 0,
   more_information_asymmetrical: 0,
-  agent_teams: 0,
+  not_applicable: 0,
+  simulated_humans: 0,
   prompting_and_in_context_learning: 0,
-  agents_with_memory: 0,
   fully_omniscient: 0,
-  policy: 0,
-  more_omniscient: 0,
   health: 0,
+  agent_teams: 0,
+  more_omniscient: 0,
   implicit_objectives: 0,
-  not_applicable: 0,
+  policy: 0,
+  agents_with_memory: 0,
+  pretraining: 0,
 },
 {
   name: '2017',
@@ -204,23 +204,23 @@ export const area_data = [
   robotics: 1,
   qualitative: 1,
   human: 1,
-  pretraining: 0,
-  simulated_humans: 0,
-  education: 0,
   more_information_asymmetrical: 0,
+  collaboration: 0,
+  more_than_three_agents: 0,
+  finetuning: 0,
   model_based: 0,
-  agent_teams: 0,
+  embodied: 0,
+  simulated_humans: 0,
   prompting_and_in_context_learning: 0,
-  agents_with_personas: 0,
-  collaboration: 0,
   fully_omniscient: 0,
-  policy: 0,
-  more_omniscient: 0,
-  embodied: 0,
-  finetuning: 0,
   health: 0,
-  more_than_three_agents: 0,
+  agent_teams: 0,
+  more_omniscient: 0,
   implicit_objectives: 0,
+  agents_with_personas: 0,
+  policy: 0,
+  education: 0,
+  pretraining: 0,
 },
 {
   name: '2018',
@@ -250,10 +250,10 @@ export const area_data = [
   model_based: 1,
   education: 1,
   more_omniscient: 1,
-  pretraining: 0,
   more_information_asymmetrical: 0,
   agent_teams: 0,
   policy: 0,
+  pretraining: 0,
 },
 {
   name: '2019',
@@ -279,14 +279,14 @@ export const area_data = [
   agent_teams: 1,
   model_based: 1,
   health: 1,
-  pretraining: 0,
-  simulated_humans: 0,
-  education: 0,
   more_information_asymmetrical: 0,
-  agents_with_memory: 0,
+  simulated_humans: 0,
   fully_omniscient: 0,
-  policy: 0,
   more_omniscient: 0,
+  policy: 0,
+  education: 0,
+  agents_with_memory: 0,
+  pretraining: 0,
 },
 {
   name: '2020',
@@ -349,10 +349,10 @@ export const area_data = [
   more_omniscient: 1,
   agents_with_personas: 1,
   prompting_and_in_context_learning: 1,
-  pretraining: 0,
   more_information_asymmetrical: 0,
-  agents_with_memory: 0,
   fully_omniscient: 0,
+  agents_with_memory: 0,
+  pretraining: 0,
 },
 {
   name: '2022',
@@ -393,7 +393,7 @@ export const area_data = [
   embodied: 25,
   prompting_and_in_context_learning: 55,
   more_than_three_agents: 21,
-  rule_based: 69,
+  rule_based: 68,
   not_applicable: 81,
   text: 53,
   implicit_objectives: 18,
@@ -413,7 +413,7 @@ export const area_data = [
   pretraining: 7,
   agent_teams: 10,
   agents_with_personas: 8,
-  model_based: 17,
+  model_based: 18,
   fully_omniscient: 1,
   health: 7,
   policy: 2,

diff --git a/components/papers.tsx b/components/papers.tsx
@@ -2242,10 +2242,10 @@ export const data: Paper[] = [
     date: "08/2023",
     environments: "collaboration, text",
     agents: "prompting_and_in_context_learning, more_than_three_agents",
-    evaluation: "rule_based",
+    evaluation: "model_based",
     other: "n/a",
     url: "https://arxiv.org/abs/2308.07201",
-    bibtex: "@misc{chan2023chateval,\n      title={ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate}, \n      author={Chi-Min Chan and Weize Chen and Yusheng Su and Jianxuan Yu and Wei Xue and Shanghang Zhang and Jie Fu and Zhiyuan Liu},\n      year={2023},\n      month={8},\n      eprint={2308.07201},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL},\n      url={https://arxiv.org/abs/2308.07201},\n      environments = {collaboration, text},\n      agents = {prompting_and_in_context_learning, more_than_three_agents},\n      evaluation = {rule_based},\n      other = {n/a}\n}",
+    bibtex: "@misc{chan2023chateval,\n      title={ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate}, \n      author={Chi-Min Chan and Weize Chen and Yusheng Su and Jianxuan Yu and Wei Xue and Shanghang Zhang and Jie Fu and Zhiyuan Liu},\n      year={2023},\n      month={8},\n      eprint={2308.07201},\n      archivePrefix={arXiv},\n      primaryClass={cs.CL},\n      url={https://arxiv.org/abs/2308.07201},\n      environments = {collaboration, text},\n      agents = {prompting_and_in_context_learning, more_than_three_agents},\n      evaluation = {model_based},\n      other = {n/a}\n}",
     subsection: "evaluation/language",
 },
 

diff --git a/docs/paper_table.md b/docs/paper_table.md
@@ -182,7 +182,7 @@
 | [AgentCF: Collaborative Learning with Autonomous Language Agents for Recommender Systems](https://arxiv.org/abs/2310.09233)                                                                                                                                                                                                                                                                  | 10, 2023 | ['mixed_objectives', 'text']                                                                    | ['prompting_and_in_context_learning', 'more_than_three_agents', 'agents_with_memory', 'agents_with_personas']     | ['rule_based']                         | ['simulated_humans']                                                |
 | [Approximating Online Human Evaluation of Social Chatbots with Prompting](https://aclanthology.org/2023.sigdial-1.25)                                                                                                                                                                                                                                                                        | 9, 2023  | ['mixed_objectives', 'text']                                                                    | ['prompting_and_in_context_learning', 'two_agents']                                                               | ['model_based']                        | ['n/a']                                                             |
 | [CharacterChat: Learning towards Conversational AI with Personalized Social Support](https://arxiv.org/abs/2308.10278)                                                                                                                                                                                                                                                                       | 08, 2023 | ['implicit_objectives', 'text']                                                                 | ['prompting_and_in_context_learning', 'two_agents', 'agents_with_memory', 'agents_with_personas']                 | ['model_based', 'human']               | ['simulated_humans']                                                |
-| [ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate](https://arxiv.org/abs/2308.07201)                                                                                                                                                                                                                                                                                 | 08, 2023 | ['collaboration', 'text']                                                                       | ['prompting_and_in_context_learning', 'more_than_three_agents']                                                   | ['rule_based']                         | ['n/a']                                                             |
+| [ChatEval: Towards Better LLM-based Evaluators through Multi-Agent Debate](https://arxiv.org/abs/2308.07201)                                                                                                                                                                                                                                                                                 | 08, 2023 | ['collaboration', 'text']                                                                       | ['prompting_and_in_context_learning', 'more_than_three_agents']                                                   | ['model_based']                        | ['n/a']                                                             |
 | [Don{'}t Forget Your {ABC}{'}s: Evaluating the State-of-the-Art in Chat-Oriented Dialogue Systems](https://aclanthology.org/2023.acl-long.839)                                                                                                                                                                                                                                               | 7, 2023  | ['text']                                                                                        | ['n/a']                                                                                                           | ['human']                              | ['human_agent']                                                     |
 | [PersonaLLM: Investigating the Ability of Large Language Models to Express Personality Traits](https://api.semanticscholar.org/CorpusID:268032940)                                                                                                                                                                                                                                           | 5, 2023  | ['text']                                                                                        | ['prompting_and_in_context_learning']                                                                             | ['human', 'model_based']               | ['n/a']                                                             |
 | [Psychological Metrics for Dialog System Evaluation](https://arxiv.org/abs/2305.14757)                                                                                                                                                                                                                                                                                                       | 05, 2023 | ['text']                                                                                        | ['two_agents']                                                                                                    | ['human', 'rule_based']                | ['human_agent']                                                     |

diff --git a/main.bib b/main.bib
@@ -3001,7 +3001,7 @@ @misc{chan2023chateval
       url={https://arxiv.org/abs/2308.07201},
       environments = {collaboration, text},
       agents = {prompting_and_in_context_learning, more_than_three_agents},
-      evaluation = {rule_based},
+      evaluation = {model_based},
       other = {n/a}
 }