-
Notifications
You must be signed in to change notification settings - Fork 37
/
Qwen2.5-Coder-repolevel-fim.py
107 lines (87 loc) · 3.44 KB
/
Qwen2.5-Coder-repolevel-fim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from transformers import AutoTokenizer, AutoModelForCausalLM
device = "cuda" # the device to load the model onto
# Now you do not need to add "trust_remote_code=True"
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-7B")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-Coder-7B", device_map="auto").eval()
# tokenize the input into tokens
# set fim format into the corresponding file you need to infilling
input_text = """<|repo_name|>library-system
<|file_sep|>library.py
class Book:
def __init__(self, title, author, isbn, copies):
self.title = title
self.author = author
self.isbn = isbn
self.copies = copies
def __str__(self):
return f"Title: {self.title}, Author: {self.author}, ISBN: {self.isbn}, Copies: {self.copies}"
class Library:
def __init__(self):
self.books = []
def add_book(self, title, author, isbn, copies):
book = Book(title, author, isbn, copies)
self.books.append(book)
def find_book(self, isbn):
for book in self.books:
if book.isbn == isbn:
return book
return None
def list_books(self):
return self.books
<|file_sep|>student.py
class Student:
def __init__(self, name, id):
self.name = name
self.id = id
self.borrowed_books = []
def borrow_book(self, book, library):
if book and book.copies > 0:
self.borrowed_books.append(book)
book.copies -= 1
return True
return False
def return_book(self, book, library):
if book in self.borrowed_books:
self.borrowed_books.remove(book)
book.copies += 1
return True
return False
<|file_sep|>main.py
<|fim_prefix|>from library import Library
from student import Student
def main():
# Set up the library with some books
library = Library()
library.add_book("The Great Gatsby", "F. Scott Fitzgerald", "1234567890", 3)
library.add_book("To Kill a Mockingbird", "Harper Lee", "1234567891", 2)
# Set up a student
student = Student("Alice", "S1")
# Student borrows a book<|fim_suffix|>
if student.borrow_book(book, library):
print(f"{student.name} borrowed {book.title}")
else:
print(f"{student.name} could not borrow {book.title}")
# Student returns a book
if student.return_book(book, library):
print(f"{student.name} returned {book.title}")
else:
print(f"{student.name} could not return {book.title}")
# List all books in the library
print("All books in the library:")
for book in library.list_books():
print(book)
if __name__ == "__main__":
main()<|fim_middle|>
"""
model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
# Use `max_new_tokens` to control the maximum output length.
eos_token_ids = [151664, 151662, 151659, 151660, 151661, 151662, 151663, 151664, 151645, 151643]
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=1024, do_sample=False, eos_token_id=eos_token_ids)[0]
# The generated_ids include prompt_ids, so we only need to decode the tokens after prompt_ids.
output_text = tokenizer.decode(generated_ids[len(model_inputs.input_ids[0]):], skip_special_tokens=True)
print(f"Prompt: \n{input_text}\n\nGenerated text: \n{output_text.split('<|file_sep|>')[0]}")
# the expected output as following:
"""
Generated text:
book = library.find_book("1234567890")
"""