Changeset 10957:660b3532fb57 in orange
- Timestamp:
- 07/23/12 12:47:55 (10 months ago)
- Branch:
- default
- Location:
- Orange
- Files:
-
- 2 edited
-
OrangeWidgets/Prototypes/OWCSVFileImport.py (modified) (3 diffs)
-
data/io.py (modified) (8 diffs)
Legend:
- Unmodified
- Added
- Removed
-
Orange/OrangeWidgets/Prototypes/OWCSVFileImport.py
r10814 r10957 16 16 17 17 from OWDataTable import ExampleTableModel 18 19 # Hints used when the sniff_csv cannot determine the dialect. 20 DEFAULT_HINTS = \ 21 {"quote": "'", 22 "quotechar": "'", 23 "doublequote": False, 24 "quoting": 0, 25 "escapechar": "\\", 26 "delimiter": ",", 27 "has_header": True, 28 "has_orange_header": False, 29 "skipinitialspace": True, 30 "DK": "?", 31 } 18 32 19 33 … … 273 287 self.recent_files.pop(index_to_remove + 1) 274 288 289 self.warning(1) 275 290 if filename in self.hints: 276 291 hints = self.hints[filename] 277 292 else: 278 hints = sniff_csv(filename) 279 self.hints[filename] = hints 293 try: 294 hints = sniff_csv(filename) 295 except csv.Error, ex: 296 self.warning(1, str(ex)) 297 hints = dict(DEFAULT_HINTS) 298 299 if not hints: 300 hints = dict(DEFAULT_HINTS) 301 302 self.hints[filename] = hints 280 303 281 304 delimiter = hints["delimiter"] … … 328 351 329 352 # Save hints for the selected file 330 hints["quote "] = self.quote353 hints["quotechar"] = self.quote 331 354 hints["delimiter"] = self.delimiter or self.other_delimiter 332 355 hints["has_header"] = self.has_header -
Orange/data/io.py
r10899 r10957 599 599 file = as_open_file(file, "rb") 600 600 snifer = csv.Sniffer() 601 sample = file.read(5 * 2 ** 20) # max 5MB sample TODO: What if this is not enough. Try with a bigger sample 602 dialect = snifer.sniff(sample) 603 601 602 # Max 5MB sample 603 # TODO: What if this is not enough. Try with a bigger sample 604 sample = file.read(5 * 2 ** 20) 605 try: 606 dialect = snifer.sniff(sample) 607 except csv.Error: 608 # try the default, hope the provided arguments are correct 609 dialect = "excel" 610 604 611 if has_header is None: 605 has_header = snifer.has_header(sample) 606 607 file.seek(0) # Rewind 608 612 try: 613 has_header = snifer.has_header(sample) 614 except csv.Error: 615 has_header = False 616 617 file.seek(0) # Rewind 618 609 619 def kwparams(**kwargs): 610 620 """Return not None kwargs. 611 621 """ 612 622 return dict([(k, v) for k, v in kwargs.items() if v is not None]) 613 623 624 # non-None format parameters. 614 625 fmtparam = kwparams(delimiter=delimiter, 615 626 quotechar=quotechar, 616 627 escapechar=escapechar, 617 628 skipinitialspace=skipinitialspace) 618 629 619 630 reader = csv.reader(file, dialect=dialect, 620 631 **fmtparam) … … 623 634 624 635 row = first_row = reader.next() 625 636 626 637 if has_header: 627 638 header = row … … 632 643 if has_types is None: 633 644 has_types = has_header and is_var_types_row(row) 634 645 635 646 if has_types: 636 647 types = var_types(row) … … 642 653 has_annotations = has_header and has_types and \ 643 654 is_var_attributes_row(row) 644 655 645 656 if has_annotations: 646 657 labels_row = row … … 652 663 # Create a default header 653 664 header = ["F_%i" % i for i in range(len(first_row))] 654 665 655 666 if not types: 656 667 # Create blank variable types 657 668 types = [None] * len(header) 658 669 659 670 if not var_attrs: 660 671 # Create blank variable attributes 661 672 var_attrs = [None] * len(header) 673 else: 674 # Pad the vars_attrs if it is not complete 675 # (orange tab format allows this line to be shorter then header). 676 if len(var_attrs) < len(header): 677 var_attrs += [None] * (len(header) - len(var_attrs)) 662 678 663 679 # start from the beginning 664 680 file.seek(0) 665 681 reader = csv.reader(file, dialect=dialect, **fmtparam) 666 682 667 683 for defined in [has_header, has_types, has_annotations]: 668 if defined: 684 if defined: 669 685 # skip definition rows if present in the file 670 686 reader.next() 671 687 672 688 variables = [] 673 689 undefined_vars = [] 674 # Missing value flags 690 # Missing value flags 675 691 missing_flags = DK.split(",") if DK is not None else ["?", "", "NA", "~", "*"] 676 692 missing_map = dict.fromkeys(missing_flags, "?") 677 693 missing_translate = lambda val: missing_map.get(val, val) 678 694 679 695 # Create domain variables or corresponding place holders 680 696 for i, (name, var_t) in enumerate(zip(header, types)): … … 712 728 for ind, var_def in undefined_vars: 713 729 var_def.values.add(row[ind]) 714 715 # Process undefined variables now that we can deduce their type 730 731 # Process undefined variables now that we can deduce their type 716 732 for ind, var_def in undefined_vars: 717 values = var_def.values - set(["?", ""]) # TODO: Other unknown strings?733 values = var_def.values - set(["?", ""]) # TODO: Other unknown strings 718 734 values = sorted(values) 719 735 if isinstance(var_def, _disc_placeholder): … … 779 795 780 796 class_var = class_var[0] if class_var else None 781 797 782 798 attribute_load_status += class_var_load_status 783 799 variable_indices = attribute_indices + class_indices … … 799 815 return table 800 816 817 801 818 def as_open_file(file, mode="rb"): 802 819 if isinstance(file, basestring): 803 820 file = open(file, mode) 804 else: # assuming it is file like with proper mode, could check for write, read821 else: # assuming it is file like with proper mode, could check for write, read 805 822 pass 806 823 return file
Note: See TracChangeset
for help on using the changeset viewer.
