Python pandas error 'Duplicate names are not allowed'



Problem

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-3-30d692425d67> in <module>
----> 1 X_train, X_test, y_train, y_test = get_human_dataset()

<ipython-input-2-e8318f86efdb> in get_human_dataset()
      2     feature_name_df = pd.read_csv('./datas/human_activity/features.txt', sep = '\s+', header=None, names=['column_index', 'column_name'])
      3     feature_name = feature_name_df.iloc[:, 1].values.tolist()
----> 4     X_train = pd.read_csv('./datas/human_activity/train/X_train.txt', sep='\s+', header=None, names = feature_name)
      5     X_test = pd.read_csv('./datas/human_activity/test/X_test.txt', sep='\s+', header=None, names = feature_name)
      6     y_train = pd.read_csv('./datas/human_activity/train/y_train.txt', sep='\s+', header=None, names=['action'])

c:\users\anaconda3\envs\modu\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    683         )
    684 
--> 685         return _read(filepath_or_buffer, kwds)
    686 
    687     parser_f.__name__ = name

c:\users\anaconda3\envs\modu\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
    452 
    453     # Check for duplicates in names.
--> 454     _validate_names(kwds.get("names", None))
    455 
    456     # Create the parser.

c:\users\anaconda3\envs\modu\lib\site-packages\pandas\io\parsers.py in _validate_names(names)
    419     if names is not None:
    420         if len(names) != len(set(names)):
--> 421             raise ValueError("Duplicate names are not allowed.")
    422     return names
    423 

ValueError: Duplicate names are not allowed.

Solve

This problem is problem of pandas version. You have to change pandas version. I confirm in pandas=0.25.1 , there is a problem. But i change version to 0.20.0, that works well.

pip install pandas==0.20.0