Refer [11] onwards- https://github.com/EdwardRaff/Inside-Deep-Learning/blob/main/Chapter_4.ipynb (FOr more information) zip_file_url = "https://download.pytorch.org/tutorial/data.zip" import requests,...

There are 2 files, one is the question (1.jpg) and second is the code (ques.txt), whose solution needs to be found out. (I have attached a github url for more reference)Programming- Python Language, Use only Pytorch if applicable.


Refer [11] onwards- https://github.com/EdwardRaff/Inside-Deep-Learning/blob/main/Chapter_4.ipynb (FOr more information) zip_file_url = "https://download.pytorch.org/tutorial/data.zip" import requests, zipfile, io r = requests.get(zip_file_url) z = zipfile.ZipFile(io.BytesIO(r.content)) z.extractall() #Zip file is organized as data/names/[LANG].txt , where [LANG] is a specific language namge_language_data = {} #We will use some code to remove UNICODE tokens to make life easy for us processing wise #e.g., convert something like "Ślusàrski" to Slusarski import unicodedata import string all_letters = string.ascii_letters + " .,;'" n_letters = len(all_letters) alphabet = {} for i in range(n_letters): alphabet[all_letters[i]] = i # Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427 def unicodeToAscii(s): return ''.join( c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn' and c in all_letters ) #Loop through every language, open the zip file entry, and read all the lines from the text file. for zip_path in z.namelist(): if "data/names/" in zip_path and zip_path.endswith(".txt"): lang = zip_path[len("data/names/"):-len(".txt")] with z.open(zip_path) as myfile: lang_names = [unicodeToAscii(line).lower() for line in str(myfile.read(), encoding='utf-8').strip().split("\n")] namge_language_data[lang] = lang_names print(lang, ": ", len(lang_names)) #Print out the name of each language too. class LanguageNameDataset(Dataset): def __init__(self, lang_name_dict, vocabulary): self.label_names = [x for x in lang_name_dict.keys()] self.data = [] self.labels = [] self.vocabulary = vocabulary for y, language in enumerate(self.label_names): for sample in lang_name_dict[language]: self.data.append(sample) self.labels.append(y) def __len__(self): return len(self.data) def string2InputVec(self, input_string): """ This method will convert any input string into a vector of long values, according to the vocabulary used by this object. input_string: the string to convert to a tensor """ T = len(input_string) #How many characters long is the string? #Create a new tensor to store the result in name_vec = torch.zeros((T), dtype=torch.long) #iterate through the string and place the appropriate values into the tensor for pos, character in enumerate(input_string): name_vec[pos] = self.vocabulary[character] return name_vec def __getitem__(self, idx): name = self.data[idx] label = self.labels[idx] #Conver the correct class label into a tensor for PyTorch label_vec = torch.tensor([label], dtype=torch.long) return self.string2InputVec(name), label
Oct 15, 2021
SOLUTION.PDF

Get Answer To This Question

Related Questions & Answers

More Questions »

Submit New Assignment

Copy and Paste Your Assignment Here