email-tracker/human_eval.tsv at f09560c7b17e66bb1d75fa0801e73e8496202752

Files

2025-10-24 19:21:19 -05:00

8.9 KiB

Raw Blame History

1	HumanEval/0	from typing import List\
2	\	return False\
3		\
4	\	'author': 'jt',\
5	\	'dataset': 'test'\
6	\	assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\
7	\	assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\
8	\	assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\
9	\	assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\
10	\	assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\
11	\	assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\
12	\	assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\
13	HumanEval/1	from typing import List\
14	\	current_string = []\
15	\	current_depth = 0\
16	\	for c in paren_string:\
17	\	return result\
18		\
19	\	'author': 'jt',\
20	\	'dataset': 'test'\
21	\	assert candidate('(()()) ((())) () ((())()())') == [\
22	\	]\
23	\	assert candidate('() (()) ((())) (((())))') == [\
24	\	]\
25	\	assert candidate('(()(())((())))') == [\
26	\	]\
27	\	assert candidate('( ) (( )) (( )( ))') == ['()', '(())', '(()())']\
28	HumanEval/2	\
29		\
30	\	'author': 'jt',\
31	\	'dataset': 'test'\
32	\	assert candidate(3.5) == 0.5\
33	\	assert abs(candidate(1.33) - 0.33) < 1e-6\
34	\	assert abs(candidate(123.456) - 0.456) < 1e-6\
35	HumanEval/3	from typing import List\
36	\	for op in operations:\
37	\	return False\
38		\
39	\	'author': 'jt',\
40	\	'dataset': 'test'\
41	\	assert candidate([]) == False\
42	\	assert candidate([1, 2, -3, 1, 2, -3]) == False\
43	\	assert candidate([1, 2, -4, 5, 6]) == True\
44	\	assert candidate([1, -1, 2, -2, 5, -5, 4, -4]) == False\
45	\	assert candidate([1, -1, 2, -2, 5, -5, 4, -5]) == True\
46	\	assert candidate([1, -2, 2, -2, 5, -5, 4, -4]) == True\
47	HumanEval/4	from typing import List\
48	\	return sum(abs(x - mean) for x in numbers) / len(numbers)\
49		\
50	\	'author': 'jt',\
51	\	'dataset': 'test'\
52	\	assert abs(candidate([1.0, 2.0, 3.0]) - 2.0/3.0) < 1e-6\
53	\	assert abs(candidate([1.0, 2.0, 3.0, 4.0]) - 1.0) < 1e-6\
54	\	assert abs(candidate([1.0, 2.0, 3.0, 4.0, 5.0]) - 6.0/5.0) < 1e-6\
55	HumanEval/5	from typing import List\
56	\	result = []\
57	\	for n in numbers[:-1]:\
58	\	result.append(numbers[-1])\
59	\	return result\
60		\
61	\	'author': 'jt',\
62	\	'dataset': 'test'\
63	\	assert candidate([], 7) == []\
64	\	assert candidate([5, 6, 3, 2], 8) == [5, 8, 6, 8, 3, 8, 2]\
65	\	assert candidate([2, 2, 2], 2) == [2, 2, 2, 2, 2]\
66	HumanEval/6	from typing import List\
67	\	return [parse_paren_group(x) for x in paren_string.split(' ') if x]\
68		\
69	\	'author': 'jt',\
70	\	'dataset': 'test'\
71	\	assert candidate('(()()) ((())) () ((())()())') == [2, 3, 1, 3]\
72	\	assert candidate('() (()) ((())) (((())))') == [1, 2, 3, 4]\
73	\	assert candidate('(()(())((())))') == [4]\
74	HumanEval/7	from typing import List\
75		\
76	\	'author': 'jt',\
77	\	'dataset': 'test'\
78	\	assert candidate([], 'john') == []\
79	\	assert candidate(['xxx', 'asd', 'xxy', 'john doe', 'xxxAAA', 'xxx'], 'xxx') == ['xxx', 'xxxAAA', 'xxx']\
80	\	assert candidate(['xxx', 'asd', 'aaaxxy', 'john doe', 'xxxAAA', 'xxx'], 'xx') == ['xxx', 'aaaxxy', 'xxxAAA', 'xxx']\
81	\	assert candidate(['grunt', 'trumpet', 'prune', 'gruesome'], 'run') == ['grunt', 'prune']\
82	HumanEval/8	from typing import List, Tuple\
83	\	prod_value = 1\
84	\	for n in numbers:\
85	\	return sum_value, prod_value\
86		\
87	\	'author': 'jt',\
88	\	'dataset': 'test'\
89	\	assert candidate([]) == (0, 1)\
90	\	assert candidate([1, 1, 1]) == (3, 1)\
91	\	assert candidate([100, 0]) == (100, 0)\
92	\	assert candidate([3, 5, 7]) == (3 + 5 + 7, 3 * 5 * 7)\
93	\	assert candidate([10]) == (10, 10)\
94	HumanEval/9	from typing import List, Tuple\
95	\	result = []\
96	\	for n in numbers:\
97	\	return result\
98		\
99	\	'author': 'jt',\
100	\	'dataset': 'test'\
101	\	assert candidate([]) == []\
102	\	assert candidate([1, 2, 3, 4]) == [1, 2, 3, 4]\
103	\	assert candidate([4, 3, 2, 1]) == [4, 4, 4, 4]\
104	\	assert candidate([3, 2, 3, 100, 3]) == [3, 3, 3, 100, 100]\

8.9 KiB Raw Blame History

8.9 KiB

Raw Blame History