8.9 KiB
8.9 KiB
| 1 | HumanEval/0 | from typing import List\ |
|---|---|---|
| 2 | \ | return False\ |
| 3 | \ | |
| 4 | \ | 'author': 'jt',\ |
| 5 | \ | 'dataset': 'test'\ |
| 6 | \ | assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True\ |
| 7 | \ | assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False\ |
| 8 | \ | assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True\ |
| 9 | \ | assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False\ |
| 10 | \ | assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True\ |
| 11 | \ | assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True\ |
| 12 | \ | assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False\ |
| 13 | HumanEval/1 | from typing import List\ |
| 14 | \ | current_string = []\ |
| 15 | \ | current_depth = 0\ |
| 16 | \ | for c in paren_string:\ |
| 17 | \ | return result\ |
| 18 | \ | |
| 19 | \ | 'author': 'jt',\ |
| 20 | \ | 'dataset': 'test'\ |
| 21 | \ | assert candidate('(()()) ((())) () ((())()())') == [\ |
| 22 | \ | ]\ |
| 23 | \ | assert candidate('() (()) ((())) (((())))') == [\ |
| 24 | \ | ]\ |
| 25 | \ | assert candidate('(()(())((())))') == [\ |
| 26 | \ | ]\ |
| 27 | \ | assert candidate('( ) (( )) (( )( ))') == ['()', '(())', '(()())']\ |
| 28 | HumanEval/2 | \ |
| 29 | \ | |
| 30 | \ | 'author': 'jt',\ |
| 31 | \ | 'dataset': 'test'\ |
| 32 | \ | assert candidate(3.5) == 0.5\ |
| 33 | \ | assert abs(candidate(1.33) - 0.33) < 1e-6\ |
| 34 | \ | assert abs(candidate(123.456) - 0.456) < 1e-6\ |
| 35 | HumanEval/3 | from typing import List\ |
| 36 | \ | for op in operations:\ |
| 37 | \ | return False\ |
| 38 | \ | |
| 39 | \ | 'author': 'jt',\ |
| 40 | \ | 'dataset': 'test'\ |
| 41 | \ | assert candidate([]) == False\ |
| 42 | \ | assert candidate([1, 2, -3, 1, 2, -3]) == False\ |
| 43 | \ | assert candidate([1, 2, -4, 5, 6]) == True\ |
| 44 | \ | assert candidate([1, -1, 2, -2, 5, -5, 4, -4]) == False\ |
| 45 | \ | assert candidate([1, -1, 2, -2, 5, -5, 4, -5]) == True\ |
| 46 | \ | assert candidate([1, -2, 2, -2, 5, -5, 4, -4]) == True\ |
| 47 | HumanEval/4 | from typing import List\ |
| 48 | \ | return sum(abs(x - mean) for x in numbers) / len(numbers)\ |
| 49 | \ | |
| 50 | \ | 'author': 'jt',\ |
| 51 | \ | 'dataset': 'test'\ |
| 52 | \ | assert abs(candidate([1.0, 2.0, 3.0]) - 2.0/3.0) < 1e-6\ |
| 53 | \ | assert abs(candidate([1.0, 2.0, 3.0, 4.0]) - 1.0) < 1e-6\ |
| 54 | \ | assert abs(candidate([1.0, 2.0, 3.0, 4.0, 5.0]) - 6.0/5.0) < 1e-6\ |
| 55 | HumanEval/5 | from typing import List\ |
| 56 | \ | result = []\ |
| 57 | \ | for n in numbers[:-1]:\ |
| 58 | \ | result.append(numbers[-1])\ |
| 59 | \ | return result\ |
| 60 | \ | |
| 61 | \ | 'author': 'jt',\ |
| 62 | \ | 'dataset': 'test'\ |
| 63 | \ | assert candidate([], 7) == []\ |
| 64 | \ | assert candidate([5, 6, 3, 2], 8) == [5, 8, 6, 8, 3, 8, 2]\ |
| 65 | \ | assert candidate([2, 2, 2], 2) == [2, 2, 2, 2, 2]\ |
| 66 | HumanEval/6 | from typing import List\ |
| 67 | \ | return [parse_paren_group(x) for x in paren_string.split(' ') if x]\ |
| 68 | \ | |
| 69 | \ | 'author': 'jt',\ |
| 70 | \ | 'dataset': 'test'\ |
| 71 | \ | assert candidate('(()()) ((())) () ((())()())') == [2, 3, 1, 3]\ |
| 72 | \ | assert candidate('() (()) ((())) (((())))') == [1, 2, 3, 4]\ |
| 73 | \ | assert candidate('(()(())((())))') == [4]\ |
| 74 | HumanEval/7 | from typing import List\ |
| 75 | \ | |
| 76 | \ | 'author': 'jt',\ |
| 77 | \ | 'dataset': 'test'\ |
| 78 | \ | assert candidate([], 'john') == []\ |
| 79 | \ | assert candidate(['xxx', 'asd', 'xxy', 'john doe', 'xxxAAA', 'xxx'], 'xxx') == ['xxx', 'xxxAAA', 'xxx']\ |
| 80 | \ | assert candidate(['xxx', 'asd', 'aaaxxy', 'john doe', 'xxxAAA', 'xxx'], 'xx') == ['xxx', 'aaaxxy', 'xxxAAA', 'xxx']\ |
| 81 | \ | assert candidate(['grunt', 'trumpet', 'prune', 'gruesome'], 'run') == ['grunt', 'prune']\ |
| 82 | HumanEval/8 | from typing import List, Tuple\ |
| 83 | \ | prod_value = 1\ |
| 84 | \ | for n in numbers:\ |
| 85 | \ | return sum_value, prod_value\ |
| 86 | \ | |
| 87 | \ | 'author': 'jt',\ |
| 88 | \ | 'dataset': 'test'\ |
| 89 | \ | assert candidate([]) == (0, 1)\ |
| 90 | \ | assert candidate([1, 1, 1]) == (3, 1)\ |
| 91 | \ | assert candidate([100, 0]) == (100, 0)\ |
| 92 | \ | assert candidate([3, 5, 7]) == (3 + 5 + 7, 3 * 5 * 7)\ |
| 93 | \ | assert candidate([10]) == (10, 10)\ |
| 94 | HumanEval/9 | from typing import List, Tuple\ |
| 95 | \ | result = []\ |
| 96 | \ | for n in numbers:\ |
| 97 | \ | return result\ |
| 98 | \ | |
| 99 | \ | 'author': 'jt',\ |
| 100 | \ | 'dataset': 'test'\ |
| 101 | \ | assert candidate([]) == []\ |
| 102 | \ | assert candidate([1, 2, 3, 4]) == [1, 2, 3, 4]\ |
| 103 | \ | assert candidate([4, 3, 2, 1]) == [4, 4, 4, 4]\ |
| 104 | \ | assert candidate([3, 2, 3, 100, 3]) == [3, 3, 3, 100, 100]\ |