CSVReader

CSVReader use without processors:

>>> from csvio import CSVReader
>>> reader = CSVReader("fruit_stock.csv")
>>> reader.fieldnames
['Supplier', 'Fruit', 'Quantity']

>>> len(reader.rows)
4

>>> import json
>>> print(json.dumps(reader.rows, indent=4))
[
    {
        "Supplier": "Big Apple",
        "Fruit": "Apple",
        "Quantity": "1"
    },
    {
        "Supplier": "Big Melons",
        "Fruit": "Melons",
        "Quantity": "2"
    },
    {
        "Supplier": "Big Mangoes",
        "Fruit": "Mango",
        "Quantity": "3"
    },
    {
        "Supplier": "Small Strawberries",
        "Fruit": "Strawberry",
        "Quantity": "4"
    }
]

CSV file contents:

Supplier,Fruit,Quantity
Big Apple,Apple,1
Big Melons,Melons,2
Long Mangoes,Mango,3
Small Strawberries,Strawberry,4

CSV Reader with Processors

Original CSV Contents: fruit_stock.csv

Supplier,Fruit,Origin,Quantity
Big Apples,Apple,Spain,1
Big Melons,Melons,Italy,2
Long Mangoes,Mango,India,3
Small Strawberries,Strawberry,France,4
Short Mangoes,Mango,France,5
Sweet Strawberries,Strawberry,Spain,6
Square Apples,Apple,Italy,7
Small Melons,Melons,Italy,8
Dark Berries,Strawberry,Australia,9
Sweet Berries,Blackcurrant,Australia,10

Processor function definitions

def update_row(row):

    row["Supplier"] = f"{row['Supplier']} ({row['Origin']})"

    row["Quantity"] = int(row["Quantity"])

    if row["Quantity"] > 2:
        row["Quantity"] += 1

    return row

def capitalize(x):
    return x.upper()

def replace_big_huge(x):
    return x.replace("Big", "Huge")

Define and apply processors

from csvio import CSVReader, CSVWriter
from csvio.processors import FieldProcessor, RowProcessor
from json import dumps

fp = FieldProcessor("fp1")

fp.add_processor("Supplier", replace_big_huge)
fp.add_processor("Fruit", capitalize)
fp.add_processor("Quantity", lambda x: int(x))
fp.add_processor("Origin", capitalize)

rp = RowProcessor("rp1")
rp.add_processor(update_row)

processors_list = [fp,rp]

reader = CSVReader("fruit_stock.csv", processors=processors_list)

writer = CSVWriter("fruit_stock_processed.csv", reader.fieldnames)
writer.add_rows(reader.rows)
writer.flush()

print(dumps(reader.rows, indent=4))

Output

[
    {
        "Supplier": "Huge Apples (SPAIN)",
        "Fruit": "APPLE",
        "Origin": "SPAIN",
        "Quantity": 1
    },
    {
        "Supplier": "Huge Melons (ITALY)",
        "Fruit": "MELONS",
        "Origin": "ITALY",
        "Quantity": 2
    },
    {
        "Supplier": "Long Mangoes (INDIA)",
        "Fruit": "MANGO",
        "Origin": "INDIA",
        "Quantity": 4
    },
    {
        "Supplier": "Small Strawberries (FRANCE)",
        "Fruit": "STRAWBERRY",
        "Origin": "FRANCE",
        "Quantity": 5
    },
    {
        "Supplier": "Short Mangoes (FRANCE)",
        "Fruit": "MANGO",
        "Origin": "FRANCE",
        "Quantity": 6
    },
    {
        "Supplier": "Sweet Strawberries (SPAIN)",
        "Fruit": "STRAWBERRY",
        "Origin": "SPAIN",
        "Quantity": 7
    },
    {
        "Supplier": "Square Apples (ITALY)",
        "Fruit": "APPLE",
        "Origin": "ITALY",
        "Quantity": 8
    },
    {
        "Supplier": "Small Melons (ITALY)",
        "Fruit": "MELONS",
        "Origin": "ITALY",
        "Quantity": 9
    },
    {
        "Supplier": "Dark Berries (AUSTRALIA)",
        "Fruit": "STRAWBERRY",
        "Origin": "AUSTRALIA",
        "Quantity": 10
    },
    {
        "Supplier": "Sweet Berries (AUSTRALIA)",
        "Fruit": "BLACKCURRANT",
        "Origin": "AUSTRALIA",
        "Quantity": 11
    }
]

CSV Contents after Processing: fruit_stock_processed.csv

Supplier,Fruit,Origin,Quantity
Huge Apples (SPAIN),APPLE,SPAIN,1
Huge Melons (ITALY),MELONS,ITALY,2
Long Mangoes (INDIA),MANGO,INDIA,4
Small Strawberries (FRANCE),STRAWBERRY,FRANCE,5
Short Mangoes (FRANCE),MANGO,FRANCE,6
Sweet Strawberries (SPAIN),STRAWBERRY,SPAIN,7
Square Apples (ITALY),APPLE,ITALY,8
Small Melons (ITALY),MELONS,ITALY,9
Dark Berries (AUSTRALIA),STRAWBERRY,AUSTRALIA,10
Sweet Berries (AUSTRALIA),BLACKCURRANT,AUSTRALIA,11
class csvio.CSVReader(filename: str, processors: Optional[List[csvio.processors.processor_base.ProcessorBase]] = None, fieldnames: List[str] = [], open_kwargs: Dict[str, Any] = {}, csv_kwargs: Dict[str, Any] = {})

Bases: csvio.csvbase.CSVBase

This object represents a CSV file for reading.

Parameters
  • filename (required) – Full path to the CSV file for reading.

  • fieldnames (optional) – A list of strings representing the column headings for the CSV file. If this list is specified while initiating an Object of this class then it is used as the column headings. This is handy when the CSV to read does not have column headings. Otherwise this list is populated from the CSV that is set in the filename argument of this Class’s constructor.

  • fieldprocessor (optional) – An instance of the FieldProcessor object. The processor functions defined in the FieldProcessor object are applied to the rows in the CSV after they read.

  • open_kwargs (optional) – A dictionary of key, value pairs that should be passed to the open method within this class.

  • csv_kwargs (optional) – A dictionary of key, value pairs that should be passed to the DictReader constructor within this class.

property csv_kwargs: Dict[str, Any]
Returns

A dictionary of key, value pairs that should be passed to the DictReader constructor within this class.

delete(missing_ok: bool = False) bool

Delete the file at the path provided in the filename parameter

Parameters

missing_ok (optional) – Parameter to pass to the pathlib.Path.unlink() method.

Returns

True If file is deleted successfully.

False On failure.

property fieldnames: List[str]
Returns

List of column headings

property file_ext: str
Returns

Extension suffix of the file without parent directory and file name.

property filedir: str
Returns

Parent directory path of the file (excluding the name of the file)

property filename: str
Returns

File name without the parent directory path.

property filename_no_ext: str
Returns

File name without parent directory and file extension.

property filepath: str
Returns

Complete file path including the parent directory, file name and extension

property num_rows: int
Returns

The total number of rows in the CSV (excluding column headings)

property open_kwargs: Dict[str, Any]
Returns

A dictionary of key, value pairs that should be passed to the open method within this class.

property path_obj: pathlib.Path
Returns

pathlib.Path object representing filename.

property rows: List[Dict[str, Any]]
Returns

A list of dictionaries where each item in it represents a row in the CSV file. Each dictionary in the list maps the column heading (fieldname) to the corresponding value for it from the CSV.

rows_from_column_key(column_name: str, rows: Optional[List[Dict[str, Any]]] = None) Dict[str, List[Dict[str, Any]]]

Collect all the rows in the rows parameter that have the same values for the column defined in the column_name parameter, and construct a dictionary with the column_name value as the key and the corresponding rows as a list of dictionaries, as the value of this key.

Parameters
  • column_name (required) – Name of the column that is to be used as the key under which all the rows having the samee value of this column will be collected.

  • rows (optional. If not provided self.rows will be used.) – List of dictionaries representing the rows that will be separated and collected under a the common value of the column name provided in column_name parameter.

Returns

A dictionary constructed using the logic as explained above.

rows_to_nested_dicts(column_order: List[str], rows: Optional[List[Dict[str, Any]]] = None) Dict[str, Any]

Collect all values of columns that are the same and construct a nested dictionary that has the common values as the keys, in the same order of hierarchy as provided in the column_order parameter.

The value of the last column name in the column_order list

Parameters
  • column_order (required) – An ordered list of column names, to be used for constructing the dictionary

  • rows (optional. If not provided self.rows will be used.) – List of dictionaries representing the rows that will be transformed to the output Dictionary.

Returns

A dictionary with same column values collected under a common key in a hierarchical order.

Example:

CSV Contents: fruit_stock.csv

Supplier,Fruit,Origin,Quantity
Big Apples,Apple,Spain,1
Big Melons,Melons,Italy,2
Long Mangoes,Mango,India,3
Small Strawberries,Strawberry,France,4
Short Mangoes,Mango,France,5
Sweet Strawberries,Strawberry,Spain,6
Square Apples,Apple,Italy,7
Small Melons,Melons,Italy,8
Dark Berries,Strawberry,Australia,9
Sweet Berries,Blackcurrant,Australia,10

Create dictionary with hierarchy {"Fruit": [rows]}

from csvio.csvreader import CSVReader
from json import dumps

reader = CSVReader("fruit_stock.csv")

col_order = ["Fruit"]

dict_tree= reader.rows_to_nested_dicts(col_order)

print(dumps(dict_tree, indent=4))

Output:

{
    "Apple": [
        {
            "Supplier": "Big Apples",
            "Fruit": "Apple",
            "Origin": "Spain",
            "Quantity": "1"
        },
        {
            "Supplier": "Square Apples",
            "Fruit": "Apple",
            "Origin": "Italy",
            "Quantity": "7"
        }
    ],
    "Melons": [
        {
            "Supplier": "Big Melons",
            "Fruit": "Melons",
            "Origin": "Italy",
            "Quantity": "2"
        },
        {
            "Supplier": "Small Melons",
            "Fruit": "Melons",
            "Origin": "Italy",
            "Quantity": "8"
        }
    ],
    "Mango": [
        {
            "Supplier": "Long Mangoes",
            "Fruit": "Mango",
            "Origin": "India",
            "Quantity": "3"
        },
        {
            "Supplier": "Short Mangoes",
            "Fruit": "Mango",
            "Origin": "France",
            "Quantity": "5"
        }
    ],
    "Strawberry": [
        {
            "Supplier": "Small Strawberries",
            "Fruit": "Strawberry",
            "Origin": "France",
            "Quantity": "4"
        },
        {
            "Supplier": "Sweet Strawberries",
            "Fruit": "Strawberry",
            "Origin": "Spain",
            "Quantity": "6"
        },
        {
            "Supplier": "Dark Berries",
            "Fruit": "Strawberry",
            "Origin": "Australia",
            "Quantity": "9"
        }
    ],
    "Blackcurrant": [
        {
            "Supplier": "Sweet Berries",
            "Fruit": "Blackcurrant",
            "Origin": "Australia",
            "Quantity": "10"
        }
    ]
}

Create dictionary with hierarchy {"Fruit": "Origin" : [rows]}

from csvio.csvreader import CSVReader
from json import dumps

reader = CSVReader("fruit_stock.csv")

col_order = ["Fruit", "Origin"]

dict_tree= reader.rows_to_nested_dicts(col_order)

print(dumps(dict_tree, indent=4))

Output:

{
    "Apple": {
        "Spain": [
            {
                "Supplier": "Big Apples",
                "Fruit": "Apple",
                "Origin": "Spain",
                "Quantity": "1"
            }
        ],
        "Italy": [
            {
                "Supplier": "Square Apples",
                "Fruit": "Apple",
                "Origin": "Italy",
                "Quantity": "7"
            }
        ]
    },
    "Melons": {
        "Italy": [
            {
                "Supplier": "Big Melons",
                "Fruit": "Melons",
                "Origin": "Italy",
                "Quantity": "2"
            },
            {
                "Supplier": "Small Melons",
                "Fruit": "Melons",
                "Origin": "Italy",
                "Quantity": "8"
            }
        ]
    },
    "Mango": {
        "India": [
            {
                "Supplier": "Long Mangoes",
                "Fruit": "Mango",
                "Origin": "India",
                "Quantity": "3"
            }
        ],
        "France": [
            {
                "Supplier": "Short Mangoes",
                "Fruit": "Mango",
                "Origin": "France",
                "Quantity": "5"
            }
        ]
    },
    "Strawberry": {
        "France": [
            {
                "Supplier": "Small Strawberries",
                "Fruit": "Strawberry",
                "Origin": "France",
                "Quantity": "4"
            }
        ],
        "Spain": [
            {
                "Supplier": "Sweet Strawberries",
                "Fruit": "Strawberry",
                "Origin": "Spain",
                "Quantity": "6"
            }
        ],
        "Australia": [
            {
                "Supplier": "Dark Berries",
                "Fruit": "Strawberry",
                "Origin": "Australia",
                "Quantity": "9"
            }
        ]
    },
    "Blackcurrant": {
        "Australia": [
            {
                "Supplier": "Sweet Berries",
                "Fruit": "Blackcurrant",
                "Origin": "Australia",
                "Quantity": "10"
            }
        ]
    }
}

Construct a dictionary with number of rows for each unique Origin

from csvio.csvreader import CSVReader
from json import dumps

reader = CSVReader("fruit_stock.csv")

col_order = ["Origin"]

origin_fruit_count = {}
dict_tree = reader.rows_to_nested_dicts(col_order)

for origin in dict_tree:
    origin_fruit_count.setdefault(origin, len(dict_tree[origin]))

print(dumps(origin_fruit_count, indent=4))

Output:

{
    "Spain": 2,
    "Italy": 3,
    "India": 1,
    "France": 2,
    "Australia": 2
}
touch(exist_ok: bool = False) bool

Create a blank file at the path provided in the filename parameter.

Parameters

exist_ok (optional) – Parameter to pass to the pathlib.Path.touch() method.

Returns

True If blank file is created successfully.

False On failure.