PR1 :To understand how to read data, and apply sentence segmentation and word tokenization using Python and NLTK. array_data = ["text", "Preprocessing", "in", "Python", "This", "is", "Practical", "one"] for item in array_data: if item != "in" and item != "is": print(item) with open("/content/SMSSpamCollection (1).bin", "r") as file: file_data = file.readline() print(file_data) import csv with open("/content/TestLarge (1).csv", mode='r') as csv_file: csv_read = csv.reader(csv_file) headers = next(csv_read) csv_data = list(csv_read) for row in csv_data: print(row) import pandas as pd data = pd.read_csv("/content/TestLarge (1).csv") print(data) print(data["Sentiment"][:5]) # First 5 sentiments print(data["Sentiment"].head(4)) # First 4 using head() print(data["Sentiment"].tail(4)) # Last 4 using tail() import nltk nltk.download('punkt') nltk.download('punkt_tab') corpus = "Natural Language Processing (NLP) is a subfield of computer science..." def segmentation(text): segments = [nltk.sent_tokenize(text) for seg in text] return segments print(segmentation(corpus)) print(nltk.sent_tokenize(corpus)) input_text = "Hello strudent's ! i am Harsh here , came late to class today. cause i had taffic problem sir" def segment_and_tokenize(text): sentences = nltk.sent_tokenize(text) # Break into sentences tokenize = [nltk.word_tokenize(sentence) for sentence in sentences] # Break into words return sentences, tokenize sentence_segments, word_tokenize = segment_and_tokenize(input_text) print("Sentence Segments:") for sent in sentence_segments: print(sent) print("Word Tokens:") for token in word_tokenize: print(token) ************************************************************************************