Python - AttributeError: 'liste' de l'objet n'a pas d'attribut

Je suis en train de créer un sentiment d'analyse. Les tweets qui seront analysés sont lues à partir d'un fichier CSV, et après avoir analysé, il sera écrit à nouveau dans un autre fichier CSV. Cependant, j'ai eu la AttributeError: 'liste' de l'objet n'a pas d'attribut "inférieure" erreur. L'erreur semble apparaître de la présente partie du code. Cette opération est-elle pas autorisé pour une phrase à l'intérieur d'un fichier CSV?

 def processTweet(tweet):
        # process the tweets

        #Convert to lower case
        tweet = tweet.lower()
        #Convert www.* or https?://* to URL
        tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))','URL',tweet)
        #Convert @username to AT_USER
        tweet = re.sub('@[^\s]+','AT_USER',tweet)    
        #Remove additional white spaces
        tweet = re.sub('[\s]+', ' ', tweet)
        #Replace #word with word
        tweet = re.sub(r'#([^\s]+)', r'', tweet)
        #trim
        tweet = tweet.strip('\'"')
        return tweet
    #end 

    #start getStopWordList
    def getStopWordList(stopWordListFileName):
        #read the stopwords
        stopWords = []
        stopWords.append('AT_USER')
        stopWords.append('URL')

        fp = open(stopWordListFileName, 'r')
        line = fp.readline()
        while line:
            word = line.strip()
            stopWords.append(word)
            line = fp.readline()
        fp.close()
        return stopWords
    #end

    #start getfeatureVector
    def getFeatureVector(tweet, stopWords):
        featureVector = []  
        words = tweet.split()
        for w in words:
            #replace two or more with two occurrences 
            w = replaceTwoOrMore(w) 
            #strip punctuation
            w = w.strip('\'"?,.')
            #check if it consists of only words
            val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", w)
            #ignore if it is a stopWord
            if(w in stopWords or val is None):
                continue
            else:
                featureVector.append(w.lower())
        return featureVector    
    #end

Voici le code complet

#import regex
import re
import csv
import pprint
import nltk.classify
#start replaceTwoOrMore
def replaceTwoOrMore(s):
#look for 2 or more repetitions of character
pattern = re.compile(r"(.)\1{1,}", re.DOTALL) 
return pattern.sub(r"\1\1", s)
#end
#start process_tweet
def processTweet(tweet):
# process the tweets
#Convert to lower case
tweet = tweet.lower()
#Convert www.* or https?://* to URL
tweet = re.sub('((www\.[\s]+)|(https?://[^\s]+))','URL',tweet)
#Convert @username to AT_USER
tweet = re.sub('@[^\s]+','AT_USER',tweet)    
#Remove additional white spaces
tweet = re.sub('[\s]+', ' ', tweet)
#Replace #word with word
tweet = re.sub(r'#([^\s]+)', r'\1', tweet)
#trim
tweet = tweet.strip('\'"')
return tweet
#end 
#start getStopWordList
def getStopWordList(stopWordListFileName):
#read the stopwords
stopWords = []
stopWords.append('AT_USER')
stopWords.append('URL')
fp = open(stopWordListFileName, 'r')
line = fp.readline()
while line:
word = line.strip()
stopWords.append(word)
line = fp.readline()
fp.close()
return stopWords
#end
#start getfeatureVector
def getFeatureVector(tweet, stopWords):
featureVector = []  
words = tweet.split()
for w in words:
#replace two or more with two occurrences 
w = replaceTwoOrMore(w) 
#strip punctuation
w = w.strip('\'"?,.')
#check if it consists of only words
val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", w)
#ignore if it is a stopWord
if(w in stopWords or val is None):
continue
else:
featureVector.append(w.lower())
return featureVector    
#end
#start extract_features
def extract_features(tweet):
tweet_words = set(tweet)
features = {}
for word in featureList:
features['contains(%s)' % word] = (word in tweet_words)
return features
#end
#Read the tweets one by one and process it
inpTweets = csv.reader(open('data/sampleTweets.csv', 'rb'), delimiter=',', quotechar='"')
stopWords = getStopWordList('data/feature_list/stopwords.txt')
count = 0;
featureList = []
tweets = []
for row in inpTweets:
sentiment = row[0]
tweet = row[1]
processedTweet = processTweet(tweet)
featureVector = getFeatureVector(processedTweet, stopWords)
featureList.extend(featureVector)
tweets.append((featureVector, sentiment));
#end loop
# Remove featureList duplicates
featureList = list(set(featureList))
# Generate the training set
training_set = nltk.classify.util.apply_features(extract_features, tweets)
# Train the Naive Bayes classifier
NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
# Test the classifier
# testTweet = 'RT @Jewelz2611 @mashable @apple, iphones r 2 expensive. Most went w/htc/galaxy. No customer loyalty w/phone comp..'
with open('data/test_datasets.csv', 'r') as csvinput:
with open('data/test_datasets_output.csv', 'w') as csvoutput:
writer = csv.writer(csvoutput, lineterminator='\n')
reader = csv.reader(csvinput)
all=[]
row = next(reader)
for row in reader:
processedTestTweet = processTweet(row)
sentiment = NBClassifier.classify(extract_features(getFeatureVector(processedTestTweet, stopWords)))
row.append(sentiment)
all.append(row)
writer.writerows(all)
# print "testTweet = %s, sentiment = %s\n" % (testTweet, sentiment)

La traçabilité en amont et d'erreur sont comme suit:

Traceback (most recent call last):
File "simpleDemo.py", line 114, in <module>
processedTestTweet = processTweet(row)
File "simpleDemo.py", line 19, in processTweet
tweet = tweet.lower()
AttributeError: 'list' object has no attribute 'lower'

Toute aide serait vraiment appréciés. Merci!

InformationsquelleAutor fuschia | 2014-02-26