#!/usr/bin/python2 """ This script reads all the spec files in the folders specified and extract the date of the most recent entry that wasn't created by rel-eng or Dennis Gilmore. """ import argparse import collections import re import os import sys import requests _date_regex = '(?:mon|tue|wed|thu|fri|sat|sun)\s+'\ '(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\s+'\ '\d\d?\s+'\ '\d\d\d\d' _months = { 'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6, 'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12 } def _get_retired_pkgs(): pdc_url = 'https://pdc.fedoraproject.org/rest_api/v1/'\ 'component-branches/?active=false&type=rpm&name=master' pkgs = set() while pdc_url: print(pdc_url) req = requests.get(pdc_url) data = req.json() for res in data['results']: pkgs.add(res['global_component']) pdc_url = data['next'] print('%s packages retired found' % len(pkgs)) return pkgs def parse_args(): parser = argparse.ArgumentParser( description='Get stats about most recent human-action') parser.add_argument( 'target', help='Folder where are present the spec files') return parser.parse_args() def get_most_recent_entry(specfile): """ Return the date of the most recent entry in the specified specfiled where this entry was not created by rel-eng or Dennis Gilmore. """ with open(specfile, 'r') as stream: spec = [row.strip() for row in stream.readlines()] changelog = False output = None for row in spec: if '%changelog' in row: changelog = True continue if changelog: dates = re.findall(_date_regex, row, flags=re.I) if dates: if 'fedora release engineering' in row.lower(): continue if 'dennis gilmore' in row.lower(): continue date = dates[0] month = '%s-%s-1' % ( date.split()[3], _months[date.split()[1].lower()]) output = (month, '%s -- %s' % (specfile, row)) break if output: break return output def main(): args = parse_args() retired_pkgs = _get_retired_pkgs() stats = collections.defaultdict(list) for dirpath, dirnames, filenames in os.walk(args.target): for filename in filenames: if filename.endswith('.spec'): if filename.split('.spec')[0] in retired_pkgs: print(' - retired: %s' % filename) continue specfile = os.path.join(dirpath, filename) entry = get_most_recent_entry(specfile) if entry is None: stats['unknown'].append(specfile) else: simple_date, row = entry stats[simple_date].append(row) # print(stats.keys()) for date in sorted(stats): print(date, len(stats[date])) with open('spec_stats.csv', 'w') as stream: stream.write('Date, entry\n') for date in sorted(stats): stream.write('%s,%s\n' % (date, len(stats[date]))) with open('spec_stats_details.csv', 'w') as stream: stream.write('Date, entry\n') for date in sorted(stats): for entry in stats[date]: stream.write('%s,%s\n' % (date, entry)) if __name__ == '__main__': sys.exit(main())