Python VS JSON, YAML, CSV & XML

Python VS JSON, YAML, CSV & XML


Juanmi Taboada
Juanmi Taboada
Python VS JSON, YAML, CSV & XML

Examples of how to work with JSON, YAML, CSV, and XML files in Python.

Today I saw myself preparing some exercises for my student who is learning Python programming language, and last week he asked me about converting a CSV to XML. So I decided to publish these exercises for anybody who doubts how to work with those formats.

Example of JSON processing

import json

example = {
    "name": "Kiroga",
    "workers": [
        {
            "name": "Alex",
            "sold": 3,
        },
        {
            "name": "Susan",
            "sold": 8,
        },
        {
            "name": "Delia",
            "sold": 4,
        },
    ]
 }

print("=== From Python to JSON String ===")
json_string = json.dumps(example)
print("{} -> {}".format(type(json_string), json_string))
print()

print("=== From JSON String to Python ===")
obj = json.loads(json_string)
print("{} -> {}".format(type(obj), obj))

Result:

=== From Python to JSON String ===
<class 'str'> -> {"name": "Kiroga", "workers": [{"name": "Alex", "sold": 3}, {"name": "Susan", "sold": 8}, {"name": "Delia", "sold": 4}]}

=== From JSON String to Python ===
<class 'dict'> -> {'name': 'Kiroga', 'workers': [{'name': 'Alex', 'sold': 3}, {'name': 'Susan', 'sold': 8}, {'name': 'Delia', 'sold': 4}]}

Example of YAML processing

import yaml

example = {
    'incoming data': [
        1024, 
        88,
        u'á€eVentura',
        192834, 
        1337, 
        'no data', 
    ],
    'configuration': '/etc/example/config.conf',
    'aprox pi value': 3.141592654,
    'names': {
        'incoming': 'málaga',
        'outgoing': 'szczecin',
        'days': 4
    }
}

print("=== Dictionary to YAML string ===")
yaml_string = yaml.dump(example)
print(yaml_string)
print("")

print("=== YAML string to Dictionary ===")
obj = yaml.safe_load(yaml_string)
print(obj)

Result:

=== Dictionary to YAML string ===
aprox pi value: 3.141592654
configuration: /etc/example/config.conf
incoming data:
- 1024
- 88
- "\xE1\u20ACeVentura"
- 192834
- 1337
- no data
names:
  days: 4
  incoming: "m\xE1laga"
  outgoing: szczecin


=== YAML string to Dictionary ===
{'aprox pi value': 3.141592654, 'configuration': '/etc/example/config.conf', 'incoming data': [1024, 88, 'á€eVentura', 192834, 1337, 'no data'], 'names': {'days': 4, 'incoming': 'málaga', 'outgoing': 'szczecin'}}

Example of CSV processing

import csv
from io import StringIO

example = '''"Origin","Target","Code","Country"
"1.0.0.0","1.0.0.255","AU","Australia"
"1.0.1.0","1.0.3.255","CN","China"
"1.0.4.0","1.0.7.255","AU","Australia"
"1.0.128.0","1.0.255.255","TH","Thailand"'''

print("=== Read as ROW ===")
rows = csv.reader(example.split("\n"), delimiter=',', quotechar='"')
headers = next(rows)
for row in rows:
  print("Origin:{:>12s}     Target:{:>12s}      Code:{:2s}     Country:{}".format(*row))
print("")

print("=== Read as Dict ===")
rows = csv.DictReader(example.split("\n"), delimiter=',', quotechar='"')
for row in rows:
  print("Origin:{:>12s}     Target:{:>12s}      Code:{:2s}     Country:{}".format(row['Origin'], row['Target'], row['Code'], row['Country']))
print("")

print("=== Write CSV ===")
buffer = StringIO()
header = ['Origin', 'Target', 'Code', 'Country']
writer = csv.DictWriter(buffer, fieldnames=header)
writer.writeheader()
writer.writerow({'Origin': '1.0.0.0', 'Target': '1.0.0.255', 'Code': 'AU', 'Country': 'Australia'})
writer.writerow({'Origin': '1.0.1.0', 'Target': '1.0.3.255', 'Code': 'CN', 'Country': 'China'})
writer.writerow({'Origin': '1.0.4.0', 'Target': '1.0.7.255', 'Code': 'AU', 'Country': 'Australia'})
writer.writerow({'Origin': '1.0.128.0', 'Target': '1.0.255.255', 'Code': 'TH', 'Country': 'Thailand'})
print(buffer.getvalue())

Result:

=== Read as ROW ===
Origin:     1.0.0.0     Target:   1.0.0.255      Code:AU     Country:Australia
Origin:     1.0.1.0     Target:   1.0.3.255      Code:CN     Country:China
Origin:     1.0.4.0     Target:   1.0.7.255      Code:AU     Country:Australia
Origin:   1.0.128.0     Target: 1.0.255.255      Code:TH     Country:Thailand

=== Read as Dict ===
Origin:     1.0.0.0     Target:   1.0.0.255      Code:AU     Country:Australia
Origin:     1.0.1.0     Target:   1.0.3.255      Code:CN     Country:China
Origin:     1.0.4.0     Target:   1.0.7.255      Code:AU     Country:Australia
Origin:   1.0.128.0     Target: 1.0.255.255      Code:TH     Country:Thailand

=== Write CSV ===
Origin,Target,Code,Country
1.0.0.0,1.0.0.255,AU,Australia
1.0.1.0,1.0.3.255,CN,China
1.0.4.0,1.0.7.255,AU,Australia
1.0.128.0,1.0.255.255,TH,Thailand

Example of XML processing with Minidom

For me, the resultant source code seems too complex for use, a spaghetti soup code.

from xml.dom import minidom

print("=== Create XML === ")

# Create XML document
root = minidom.Document()

# WMWROOT Element
wmwroot = root.createElement('WMWROOT')
wmwroot.setAttribute('xmlns', 'http://www.manh.com/ILSNET/Interface')
root.appendChild(wmwroot)

# WMWDDATA
wmwdata = root.createElement('WMWDATA')
wmwroot.appendChild(wmwdata)

# WMFWUpload
wmfwupload = root.createElement('WMFWUpload')
wmfwupload.setAttribute('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
wmfwupload.setAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
wmwdata.appendChild(wmfwupload)

# Date
node = root.createElement('Date')
node.appendChild(root.createTextNode("2022-09-26T10:36:02.6713225Z"))
wmfwupload.appendChild(node)

# GroupIndex
node = root.createElement('GroupIndex')
node.appendChild(root.createTextNode("1"))
wmfwupload.appendChild(node)

# Id
node = root.createElement('Id')
node.appendChild(root.createTextNode("48fafcd4-f928-4f95-88ca-702ce6139267"))
wmfwupload.appendChild(node)

# ...
node = root.createElement('More')
node.appendChild(root.createTextNode("..."))
wmfwupload.appendChild(node)

# TO String
xml_str = root.toprettyxml(indent="\t")
print(xml_str)
print("")

print("=== Parse XML ===")
xml_obj = minidom.parseString(xml_str)
print("Node {} is Type {}".format(xml_obj.childNodes[0].tagName, type(xml_obj)))
print("NAME:", xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].tagName)
print("VALUE:", xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].childNodes[0].data)
print("===================================")
print("Summary:")
print(xml_obj.childNodes)
print(xml_obj.childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[1].childNodes)
print(xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes)
print(xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].tagName)
print(xml_obj.childNodes[0].childNodes[1].childNodes[1].childNodes[5].childNodes)

Result:

=== Create XML === 
<?xml version="1.0" ?>
<WMWROOT xmlns="http://www.manh.com/ILSNET/Interface">
	<WMWDATA>
		<WMFWUpload xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
			<Date>2022-09-26T10:36:02.6713225Z</Date>
			<GroupIndex>1</GroupIndex>
			<Id>48fafcd4-f928-4f95-88ca-702ce6139267</Id>
			<More>...</More>
		</WMFWUpload>
	</WMWDATA>
</WMWROOT>


=== Parse XML ===
Node WMWROOT is Type <class 'xml.dom.minidom.Document'>
NAME: Id
VALUE: 48fafcd4-f928-4f95-88ca-702ce6139267
===================================
Summary:
[<DOM Element: WMWROOT at 0x7f7f207809b0>]
[<DOM Text node "'\n\t'">, <DOM Element: WMWDATA at 0x7f7f20780910>, <DOM Text node "'\n'">]
[<DOM Text node "'\n\t\t'">, <DOM Element: WMFWUpload at 0x7f7f20780c30>, <DOM Text node "'\n\t'">]
[<DOM Text node "'\n\t\t\t'">, <DOM Element: Date at 0x7f7f20780eb0>, <DOM Text node "'\n\t\t\t'">, <DOM Element: GroupIndex at 0x7f7f207802d0>, <DOM Text node "'\n\t\t\t'">, <DOM Element: Id at 0x7f7f20780190>, <DOM Text node "'\n\t\t\t'">, <DOM Element: More at 0x7f7f20780f50>, <DOM Text node "'\n\t\t'">]
Id
[<DOM Text node "'48fafcd4-f'...">]

Example of XML processing with ElementTree

It seems more practical to me when it comes to being used, and the source code is clearer:

import xml.etree.ElementTree as ET
from xml.dom import minidom

print("=== Create XML === ")

# WMWROOT Element
wmwroot = ET.Element("WMWROOT")
wmwroot.set('xmlns', 'http://www.manh.com/ILSNET/Interface')

# WMWDDATA
wmwdata = ET.SubElement(wmwroot, "WMWDATA")

# WMFWUpload
wmfwupload = ET.SubElement(wmwdata, "WMFWUpload")
wmfwupload.set('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
wmfwupload.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')

# Date, GroupIndex, Id, ...
node = ET.SubElement(wmfwupload, 'Date').text = "2022-09-26T10:36:02.6713225Z"
node = ET.SubElement(wmfwupload, 'GroupIndex').text = "1"
node = ET.SubElement(wmfwupload, 'Id').text = "48fafcd4-f928-4f95-88ca-702ce6139267"
node = ET.SubElement(wmfwupload, 'More').text = "..."

# TO String
xml_str = ET.tostring(wmwroot)
print(minidom.parseString(xml_str).toprettyxml(indent="\t"))
print("")

print("=== Parse XML ===")
xml_obj = ET.fromstring(xml_str)
print("Node {} is Type {}".format(xml_obj[0].tag, type(xml_obj)))
print("===================================")
print("Summary:")
for wmwdata in xml_obj:
  for wmwfupload in wmwdata:
    for element in wmwfupload:
      print("{} -> {}".format(element.tag, element.text))

Result:

=== Create XML === 
<?xml version="1.0" ?>
<WMWROOT xmlns="http://www.manh.com/ILSNET/Interface">
	<WMWDATA>
		<WMFWUpload xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
			<Date>2022-09-26T10:36:02.6713225Z</Date>
			<GroupIndex>1</GroupIndex>
			<Id>48fafcd4-f928-4f95-88ca-702ce6139267</Id>
			<More>...</More>
		</WMFWUpload>
	</WMWDATA>
</WMWROOT>


=== Parse XML ===
Node {http://www.manh.com/ILSNET/Interface}WMWDATA is Type <class 'xml.etree.ElementTree.Element'>
===================================
Summary:
{http://www.manh.com/ILSNET/Interface}Date -> 2022-09-26T10:36:02.6713225Z
{http://www.manh.com/ILSNET/Interface}GroupIndex -> 1
{http://www.manh.com/ILSNET/Interface}Id -> 48fafcd4-f928-4f95-88ca-702ce6139267
{http://www.manh.com/ILSNET/Interface}More -> ...

Example of XML processing with ETree from LXML

I didn’t manage to work the namespaces in internal nodes.

from lxml import etree as ET
from xml.dom import minidom

print("=== Create XML === ")

# WMWROOT Element
wmwroot = ET.Element("WMWROOT")
wmwroot.set('xmlns', 'http://www.manh.com/ILSNET/Interface')

# WMWDDATA
wmwdata = ET.SubElement(wmwroot, "WMWDATA")

# WMFWUpload
wmfwupload = ET.SubElement(wmwdata, "WMFWUpload")

# NOT WORKING! (Neither with QName)
# wmfwupload.set('xmlns:xsd', 'http://www.w3.org/2001/XMLSchema')
# wmfwupload.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')

# Date, GroupIndex, Id, ...
node = ET.SubElement(wmfwupload, 'Date').text = "2022-09-26T10:36:02.6713225Z"
node = ET.SubElement(wmfwupload, 'GroupIndex').text = "1"
node = ET.SubElement(wmfwupload, 'Id').text = "48fafcd4-f928-4f95-88ca-702ce6139267"
node = ET.SubElement(wmfwupload, 'More').text = "..."

# TO String
xml_str = ET.tostring(wmwroot)
print(minidom.parseString(xml_str).toprettyxml(indent="\t"))
print("")

print("=== Parse XML ===")
xml_obj = minidom.parseString(xml_str)
print("Node {} is Type {}".format(xml_obj.childNodes[0].tagName, type(xml_obj)))
print("NAME:", xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].tagName)
print("VALUE:", xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].childNodes[0].data)
print("===================================")
print("Summary:")
print(xml_obj.childNodes)
print(xml_obj.childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes)
print(xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].tagName)
print(xml_obj.childNodes[0].childNodes[0].childNodes[0].childNodes[2].childNodes)

Result:

=== Create XML === 
<?xml version="1.0" ?>
<WMWROOT xmlns="http://www.manh.com/ILSNET/Interface">
	<WMWDATA>
		<WMFWUpload>
			<Date>2022-09-26T10:36:02.6713225Z</Date>
			<GroupIndex>1</GroupIndex>
			<Id>48fafcd4-f928-4f95-88ca-702ce6139267</Id>
			<More>...</More>
		</WMFWUpload>
	</WMWDATA>
</WMWROOT>


=== Parse XML ===
Node WMWROOT is Type <class 'xml.dom.minidom.Document'>
NAME: Id
VALUE: 48fafcd4-f928-4f95-88ca-702ce6139267
===================================
Summary:
[<DOM Element: WMWROOT at 0x7f7f206fccd0>]
[<DOM Text node "'\n\t'">, <DOM Element: WMWDATA at 0x7f7f206fc730>, <DOM Text node "'\n'">]
[<DOM Text node "'\n\t\t'">, <DOM Element: WMFWUpload at 0x7f7f206fcd70>, <DOM Text node "'\n\t'">]
[<DOM Text node "'\n\t\t\t'">, <DOM Element: Date at 0x7f7f206fcc30>, <DOM Text node "'\n\t\t\t'">, <DOM Element: GroupIndex at 0x7f7f206fceb0>, <DOM Text node "'\n\t\t\t'">, <DOM Element: Id at 0x7f7f206fca50>, <DOM Text node "'\n\t\t\t'">, <DOM Element: More at 0x7f7f206fcb90>, <DOM Text node "'\n\t\t'">]
Id
[<DOM Text node "'48fafcd4-f'...">]

Comments

Related Articles

Programación

Multiprocessing: una piscina de procesos en tu Python

Cada año durante las vacaciones, como buen informático, aprovecho para leer cosas diferentes y aprender algo nuevo, en esta ocasión he podido estudiar sobre la librería...

Posted on by Juanmi Taboada
Programación

Descarga gratuitamente fuentes TTF del colegio para el ordenador

Llevo buscando unos días fuentes para mi ordenador para enseñar a mis hijos a leer y escribir….finalmente he conseguido montar una colección bastante buena y he decidido...

Posted on by Juanmi Taboada