armbian-build/lib/tools/info/json2csv.py

#!/usr/bin/env python3

# ‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹
#  SPDX-License-Identifier: GPL-2.0
#  Copyright (c) 2023 Ricardo Pardini <ricardo@pardini.net>
#  This file is a part of the Armbian Build Framework https://github.com/armbian/build/
# ‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹
import collections.abc
import json
import logging
import os

import sys

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from common import armbian_utils

# Prepare logging
armbian_utils.setup_logging()
log: logging.Logger = logging.getLogger("info-gatherer-image")


def eprint(*args, **kwargs):
	print(*args, file=sys.stderr, **kwargs)


def flatten(d, parent_key='', sep='_'):
	items = []
	for k, v in d.items():
		new_key = parent_key + sep + k if parent_key else k
		if isinstance(v, collections.abc.MutableMapping):
			items.extend(flatten(v, new_key, sep=sep).items())
		else:
			items.append((new_key, v))
	return dict(items)


json_object = json.load(sys.stdin)
eprint("Loaded {} objects from stdin...".format(len(json_object)))

flat = []
for obj in json_object:
	flat.append(flatten(obj, '', '.'))

columns_map = {}
for obj in flat:
	# get the string keys
	for key in obj.keys():
		value = obj[key]
		if type(value) == str:
			columns_map[key] = True
		if type(value) == bool:
			columns_map[key] = True

columns = columns_map.keys()

eprint("columns: {}".format(len(columns)))

# Now, find the columns of which all values are the same
# and remove them
columns_to_remove = []
for column in columns:
	values = []
	for obj in flat:
		value = obj.get(column)
		values.append(value)
	if len(set(values)) == 1:
		columns_to_remove.append(column)

# eprint("columns with all-identical values: {}: '{}'".format(len(columns_to_remove), columns_to_remove))

# Now actually filter columns, removing columns_to_remove
columns = [column for column in columns if column not in columns_to_remove]

import csv

writer = csv.DictWriter(sys.stdout, fieldnames=columns, extrasaction='ignore')

writer.writeheader()
for obj in flat:
	writer.writerow(obj)

eprint("Done writing CSV to stdout.")
-												armbian-next: `lib/tools` - python - config definitions capture and massaging

											
										
										
											2022-10-08 10:48:21 +00:00
+								#!/usr/bin/env python3
-												🔥 JSON info pipeline: v18

- pipeline: add `pipeline` context object to targets; use it to filter artifacts and images to build; warn about oci-name with multiple oci-tags
- pipeline: better error messages when info's fail; show some (out-of-order) progress messages during parallel info gathering
- pipeline: targets-compositor: add `not-eos` inventory
- TARGETS_FILENAME, log all OCI lookups
- SKIP_IMAGES
- IMAGES_ONLY_OUTDATED_ARTIFACTS
- no dash in chunk id in JSON
- pipeline: very initial chunking, using the same outputs
- pipeline: template targets, `items-from-inventory:` inventory expansion, CHECK_OCI=yes, CLEAN_MATRIX=yes, CLEAN_INFO=yes, many fixes
- cli: `inventory` / `targets` / `matrix` / `workflow`
- pipeline: workflow beginnings
- pipeline: general log cleanup + OCI stats / better miss handling
- pipeline: fixes/reorg
- pipeline: catch & log JSON parsing errors
- pipeline: gha matrix: use IMAGE_FILE_ID as job description
- pipeline (delusion): gha workflow output, based on old matrix code
- pipeline: better parsing and reporting of stderr log lines (under `ANSI_COLOR=none`)
- pipeline: mapper-oci-uptodate: use separate positive/negative cache dirs (GHA will only cache positives); cache negs for 5 minutes locally
- pipeline: output-gha-matrix artifacts + images
  - pipeline: output-gha-matrix artifacts + images: "really" and fake 1-item matrix if empty
- pipeline: move files into subdir; update copyright & cleanup
- pipeline: refactor bash jsoninfo driver a bit
- pipeline: outdated-artifact-image-reducer
- pipeline: introduce `target_id` at the compositor, aggregate it at the reducer, carry it over in the artifact info mapper
- pipeline: mapper-oci-uptodate
- pipeline: info-gatherer-artifact, with PRE_PREPARED_HOST
- pipeline: refactor/rename info-gatherer-image.py
- pipeline: beginnings

											
										
										
											2022-12-30 11:20:53 +00:00
 								# ‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹
 								#  SPDX-License-Identifier: GPL-2.0
 								#  Copyright (c) 2023 Ricardo Pardini <ricardo@pardini.net>
 								#  This file is a part of the Armbian Build Framework https://github.com/armbian/build/
 								# ‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹‹
-												armbian-next: `lib/tools` - python - config definitions capture and massaging

											
										
										
											2022-10-08 10:48:21 +00:00
+								import collections.abc
 								import json
-												🔥 JSON info pipeline: v18

- pipeline: add `pipeline` context object to targets; use it to filter artifacts and images to build; warn about oci-name with multiple oci-tags
- pipeline: better error messages when info's fail; show some (out-of-order) progress messages during parallel info gathering
- pipeline: targets-compositor: add `not-eos` inventory
- TARGETS_FILENAME, log all OCI lookups
- SKIP_IMAGES
- IMAGES_ONLY_OUTDATED_ARTIFACTS
- no dash in chunk id in JSON
- pipeline: very initial chunking, using the same outputs
- pipeline: template targets, `items-from-inventory:` inventory expansion, CHECK_OCI=yes, CLEAN_MATRIX=yes, CLEAN_INFO=yes, many fixes
- cli: `inventory` / `targets` / `matrix` / `workflow`
- pipeline: workflow beginnings
- pipeline: general log cleanup + OCI stats / better miss handling
- pipeline: fixes/reorg
- pipeline: catch & log JSON parsing errors
- pipeline: gha matrix: use IMAGE_FILE_ID as job description
- pipeline (delusion): gha workflow output, based on old matrix code
- pipeline: better parsing and reporting of stderr log lines (under `ANSI_COLOR=none`)
- pipeline: mapper-oci-uptodate: use separate positive/negative cache dirs (GHA will only cache positives); cache negs for 5 minutes locally
- pipeline: output-gha-matrix artifacts + images
  - pipeline: output-gha-matrix artifacts + images: "really" and fake 1-item matrix if empty
- pipeline: move files into subdir; update copyright & cleanup
- pipeline: refactor bash jsoninfo driver a bit
- pipeline: outdated-artifact-image-reducer
- pipeline: introduce `target_id` at the compositor, aggregate it at the reducer, carry it over in the artifact info mapper
- pipeline: mapper-oci-uptodate
- pipeline: info-gatherer-artifact, with PRE_PREPARED_HOST
- pipeline: refactor/rename info-gatherer-image.py
- pipeline: beginnings

											
										
										
											2022-12-30 11:20:53 +00:00
+								import logging
 								import os
-												armbian-next: `lib/tools` - python - config definitions capture and massaging

											
										
										
											2022-10-08 10:48:21 +00:00
+								import sys
-												🔥 JSON info pipeline: v18

- pipeline: add `pipeline` context object to targets; use it to filter artifacts and images to build; warn about oci-name with multiple oci-tags
- pipeline: better error messages when info's fail; show some (out-of-order) progress messages during parallel info gathering
- pipeline: targets-compositor: add `not-eos` inventory
- TARGETS_FILENAME, log all OCI lookups
- SKIP_IMAGES
- IMAGES_ONLY_OUTDATED_ARTIFACTS
- no dash in chunk id in JSON
- pipeline: very initial chunking, using the same outputs
- pipeline: template targets, `items-from-inventory:` inventory expansion, CHECK_OCI=yes, CLEAN_MATRIX=yes, CLEAN_INFO=yes, many fixes
- cli: `inventory` / `targets` / `matrix` / `workflow`
- pipeline: workflow beginnings
- pipeline: general log cleanup + OCI stats / better miss handling
- pipeline: fixes/reorg
- pipeline: catch & log JSON parsing errors
- pipeline: gha matrix: use IMAGE_FILE_ID as job description
- pipeline (delusion): gha workflow output, based on old matrix code
- pipeline: better parsing and reporting of stderr log lines (under `ANSI_COLOR=none`)
- pipeline: mapper-oci-uptodate: use separate positive/negative cache dirs (GHA will only cache positives); cache negs for 5 minutes locally
- pipeline: output-gha-matrix artifacts + images
  - pipeline: output-gha-matrix artifacts + images: "really" and fake 1-item matrix if empty
- pipeline: move files into subdir; update copyright & cleanup
- pipeline: refactor bash jsoninfo driver a bit
- pipeline: outdated-artifact-image-reducer
- pipeline: introduce `target_id` at the compositor, aggregate it at the reducer, carry it over in the artifact info mapper
- pipeline: mapper-oci-uptodate
- pipeline: info-gatherer-artifact, with PRE_PREPARED_HOST
- pipeline: refactor/rename info-gatherer-image.py
- pipeline: beginnings

											
										
										
											2022-12-30 11:20:53 +00:00
+								sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 								from common import armbian_utils
 								# Prepare logging
 								armbian_utils.setup_logging()
 								log: logging.Logger = logging.getLogger("info-gatherer-image")
-												armbian-next: `lib/tools` - python - config definitions capture and massaging

											
										
										
											2022-10-08 10:48:21 +00:00
 								def eprint(*args, **kwargs):
 									print(*args, file=sys.stderr, **kwargs)
 								def flatten(d, parent_key='', sep='_'):
 									items = []
 									for k, v in d.items():
 										new_key = parent_key + sep + k if parent_key else k
 										if isinstance(v, collections.abc.MutableMapping):
 											items.extend(flatten(v, new_key, sep=sep).items())
 										else:
 											items.append((new_key, v))
 									return dict(items)
 								json_object = json.load(sys.stdin)
 								eprint("Loaded {} objects from stdin...".format(len(json_object)))
 								flat = []
 								for obj in json_object:
 									flat.append(flatten(obj, '', '.'))
 								columns_map = {}
 								for obj in flat:
 									# get the string keys
 									for key in obj.keys():
 										value = obj[key]
 										if type(value) == str:
 											columns_map[key] = True
 										if type(value) == bool:
 											columns_map[key] = True
 								columns = columns_map.keys()
-												armbian-next: `json-info` command, wrapping up the Python tooling; running in Docker; fix config dump

											
										
										
											2022-11-03 18:24:00 +00:00
+								eprint("columns: {}".format(len(columns)))
-												armbian-next: `lib/tools` - python - config definitions capture and massaging

											
										
										
											2022-10-08 10:48:21 +00:00
-												armbian-next: `json-info` command, wrapping up the Python tooling; running in Docker; fix config dump

											
										
										
											2022-11-03 18:24:00 +00:00
+								# Now, find the columns of which all values are the same
 								# and remove them
 								columns_to_remove = []
 								for column in columns:
 									values = []
 									for obj in flat:
 										value = obj.get(column)
 										values.append(value)
 									if len(set(values)) == 1:
 										columns_to_remove.append(column)
-												armbian-next: json-info: cleanup info/json/csv/opensearch, add logging, add some very basic instructions; add Kibana dashboards & docker-compose to bring it OS+Kibana up

											
										
										
											2023-01-30 15:51:54 +00:00
+								# eprint("columns with all-identical values: {}: '{}'".format(len(columns_to_remove), columns_to_remove))
-												armbian-next: `json-info` command, wrapping up the Python tooling; running in Docker; fix config dump

											
										
										
											2022-11-03 18:24:00 +00:00
 								# Now actually filter columns, removing columns_to_remove
 								columns = [column for column in columns if column not in columns_to_remove]
-												armbian-next: `lib/tools` - python - config definitions capture and massaging

											
										
										
											2022-10-08 10:48:21 +00:00
 								import csv
-												armbian-next: `json-info` command, wrapping up the Python tooling; running in Docker; fix config dump

											
										
										
											2022-11-03 18:24:00 +00:00
+								writer = csv.DictWriter(sys.stdout, fieldnames=columns, extrasaction='ignore')
-												armbian-next: `lib/tools` - python - config definitions capture and massaging

											
										
										
											2022-10-08 10:48:21 +00:00
-												armbian-next: `json-info` command, wrapping up the Python tooling; running in Docker; fix config dump

											
										
										
											2022-11-03 18:24:00 +00:00
+								writer.writeheader()
 								for obj in flat:
 									writer.writerow(obj)
-												armbian-next: json-info: cleanup info/json/csv/opensearch, add logging, add some very basic instructions; add Kibana dashboards & docker-compose to bring it OS+Kibana up

											
										
										
											2023-01-30 15:51:54 +00:00
+								eprint("Done writing CSV to stdout.")