tdf#130911: convert image-sort from Perl to Python

Tested with diff that the sorting output file is identical to the one
the Perl script produced.

Change-Id: I22eb28e71f51315609957e84c6204f1beb5dccaa
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/90348
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
diff --git a/postprocess/CustomTarget_images.mk b/postprocess/CustomTarget_images.mk
index 691db4b..0d2db4f 100644
--- a/postprocess/CustomTarget_images.mk
+++ b/postprocess/CustomTarget_images.mk
@@ -52,7 +52,8 @@ $(packimages_DIR)/images_%.zip : \
	$(call gb_Trace_StartRange,$(subst $(WORKDIR)/,,$@),PRL)
	$(call gb_Helper_abbreviate_dirs, \
		ILSTFILE=$(call var2file,$(shell $(gb_MKTEMP)),100,$(filter %.ilst,$^)) && \
		$(call gb_ExternalExecutable_get_command,python) $(SRCDIR)/solenv/bin/pack_images.py \
		$(call gb_ExternalExecutable_get_command,python) \
			$(SRCDIR)/solenv/bin/pack_images.py \
			$(if $(DEFAULT_THEME),\
				-g $(packimages_DIR) -m $(packimages_DIR) -c $(packimages_DIR),\
				-g $(SRCDIR)/icon-themes/$* -m $(SRCDIR)/icon-themes/$* -c $(SRCDIR)/icon-themes/$* \
@@ -107,11 +108,13 @@ $(packimages_DIR)/commandimagelist.ilst :

$(packimages_DIR)/sorted.lst : \
		$(SRCDIR)/postprocess/packimages/image-sort.lst \
		$(call gb_Postprocess_get_target,AllUIConfigs)
		$(call gb_Postprocess_get_target,AllUIConfigs) \
		$(call gb_ExternalExecutable_get_dependencies,python)
	$(call gb_Output_announce,$(subst $(WORKDIR)/,,$@),$(true),PRL,1)
	$(call gb_Trace_StartRange,$(subst $(WORKDIR)/,,$@),PRL)
	$(call gb_Helper_abbreviate_dirs, \
		$(PERL) $(SRCDIR)/solenv/bin/image-sort.pl \
		$(call gb_ExternalExecutable_get_command,python) \
			$(SRCDIR)/solenv/bin/image-sort.py \
			$< $(INSTROOT)/$(gb_UIConfig_INSTDIR) $@)
	$(call gb_Trace_EndRange,$(subst $(WORKDIR)/,,$@),PRL)

diff --git a/solenv/bin/image-sort.pl b/solenv/bin/image-sort.pl
deleted file mode 100755
index a59c7bc..0000000
--- a/solenv/bin/image-sort.pl
+++ /dev/null
@@ -1,179 +0,0 @@
#!/usr/bin/env perl -w
# -*- Mode: Perl; tab-width: 4; indent-tabs-mode: nil -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# This file incorporates work covered by the following license notice:
#
#   Licensed to the Apache Software Foundation (ASF) under one or more
#   contributor license agreements. See the NOTICE file distributed
#   with this work for additional information regarding copyright
#   ownership. The ASF licenses this file to you under the Apache
#   License, Version 2.0 (the "License"); you may not use this file
#   except in compliance with the License. You may obtain a copy of
#   the License at http://www.apache.org/licenses/LICENSE-2.0 .
#

my @global_list = ();
my %global_hash = ();
my $base_path;

sub read_icons($)
{
    my $fname = shift;
    my $fileh;
    my @images;
    if (! -e "$base_path/$fname") {
        print "Skipping non-existent $base_path/$fname\n";
        return @images;
    }
    open ($fileh, "$base_path/$fname") || die "Can't open $base_path/$fname: $!";
    while (<$fileh>) {
        m/xlink:href=\"\.uno:(\S+)\"\s+/ || next;
        push @images, lc($1);
    }
    close ($fileh);

    return @images;
}

# filter out already seen icons & do prefixing
sub read_new_icons($$)
{
    my $fname = shift;
    my $prefix = shift;
    my @images = read_icons ($fname);
    my @new_icons;
    my %new_icons;
    for my $icon (@images) {
        my $iname = "cmd/" . $prefix . $icon . ".png";
        if (!defined $global_hash{$iname} &&
            !defined $new_icons{$iname}) {
            push @new_icons, $iname;
            $new_icons{$iname} = 1;
        }
    }
    return @new_icons;
}

sub process_group($@)
{
    my $prefix = shift;
    my @uiconfigs = @_;
    my %group;
    my $cur_max = 1.0;

# a very noddy sorting algorithm
    for my $uiconfig (@uiconfigs) {
        my @images = read_new_icons ($uiconfig, $prefix);
        my $prev = '';
        for my $icon (@images) {
            if (!defined $group{$icon}) {
                if (!defined $group{$prev}) {
                    $group{$icon} = $cur_max;
                    $cur_max += 1.0;
                } else {
                    $group{$icon} = $group{$prev} + (1.0 - 0.5 / $cur_max);
                }
            } # else a duplicate
        }
    }
    for my $icon (sort { $group{$a} <=> $group{$b} } keys %group) {
        push @global_list, $icon;
        $global_hash{$icon} = 1;
    }
}

sub process_file($$)
{
    my @images = read_new_icons (shift, shift);

    for my $icon (@images) {
        push @global_list, $icon;
        $global_hash{$icon} = 1;
    }
}

sub chew_controlfile($)
{
    my $fname = shift;
    my $fileh;
    my @list;
    open ($fileh, $fname) || die "Can't open $fname: $!";
    while (<$fileh>) {
        /^\#/ && next;
        s/[\r\n]*$//;
        /^\s*$/ && next;

        my $line = $_;
        if ($line =~ s/^-- (\S+)\s*//) {
            # control code
            my $code = $1;
            my $small = (lc ($line) eq 'small');
            if (lc($code) eq 'group') {
                if (!$small) {
                    process_group ("lc_", @list);
                }
                process_group ("sc_", @list);
            } elsif (lc ($code) eq 'ordered') {
                if (!$small) {
                    for my $file (@list) {
                        process_file ($file, "lc_");
                    }
                }
                for my $file (@list) {
                    process_file ($file, "sc_");
                }
            } elsif (lc ($code) eq 'literal') {
                for my $file (@list) {
                    if (!defined $global_hash{$file}) {
                        push @global_list, $file;
                        $global_hash{$file} = 1;
                    }
                }
            } else {
                die ("Unknown code '$code'");
            }
            @list = ();
        } else {
            push @list, $line;
        }
    }
    close ($fileh);
}

if (!@ARGV) {
    print "image-sort <image-sort.lst> /path/to/OOOo/source/root\n";
    exit 1;
}

# where the control file lives
my $control = shift @ARGV;
# where the uiconfigs live
$base_path = shift @ARGV;
# output
if (@ARGV) {
    my $outf = shift @ARGV;
    open ($output, ">$outf") || die "Can't open $outf: $!";
    $stdout_out = 0;
} else {
    $output = STDOUT;
    $stdout_out = 1;
}

chew_controlfile ($control);

for my $icon (@global_list) {
    print $output $icon . "\n" if (!($icon =~ /^sc_/));
}
for my $icon (@global_list) {
    print $output $icon . "\n" if ($icon =~ /^sc_/);
}

close $output if (!$stdout_out);

# dnl vim:set shiftwidth=4 softtabstop=4 expandtab:
diff --git a/solenv/bin/image-sort.py b/solenv/bin/image-sort.py
new file mode 100644
index 0000000..1055862
--- /dev/null
+++ b/solenv/bin/image-sort.py
@@ -0,0 +1,142 @@
# -*- Mode: Python; tab-width: 4; indent-tabs-mode: nil -*-
#
# This file is part of the LibreOffice project.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
# This file incorporates work covered by the following license notice:
#
#   Licensed to the Apache Software Foundation (ASF) under one or more
#   contributor license agreements. See the NOTICE file distributed
#   with this work for additional information regarding copyright
#   ownership. The ASF licenses this file to you under the Apache
#   License, Version 2.0 (the "License"); you may not use this file
#   except in compliance with the License. You may obtain a copy of
#   the License at http://www.apache.org/licenses/LICENSE-2.0 .
#

import sys, os, re

global_list = []
global_hash = {}
base_path = None

def read_icons(fname):
    global base_path
    images = []
    full_path = os.path.join(base_path, fname)
    if not os.path.exists(full_path):
        print("Skipping non-existent {}\n".format(full_path))
        return images
    for line in open(full_path):
        m = re.search(r'xlink:href="\.uno:(\S+)"\s+', line)
        if m:
            images.append(m.group(1).lower())
    return images

# filter out already seen icons & do prefixing
def read_new_icons(fname, prefix):
    images = read_icons(fname)
    new_icons_arr = []
    new_icons_d = {}
    for icon in images:
        iname = "cmd/" + prefix + icon + ".png"
        if iname not in global_hash and \
            iname not in new_icons_d:
            new_icons_arr.append(iname)
            new_icons_d[iname] = 1
    return new_icons_arr

def process_group(prefix, uiconfigs):
    global global_list, global_hash
    group = {}
    cur_max = 1.0

    # a very noddy sorting algorithm
    for uiconfig in uiconfigs:
        images = read_new_icons(uiconfig, prefix)
        prev = ''
        for icon in images:
            if icon not in group:
                if prev not in group:
                    group[icon] = cur_max
                    cur_max += 1.0
                else:
                    group[icon] = group[prev] + (1.0 - 0.5 / cur_max)
    def intvalue(i):
        return group[i]
    for icon in sorted(group.keys(), key=intvalue):
        global_list.append(icon)
        global_hash[icon] = 1

def process_file(fname, prefix):
    global global_list, global_hash
    images = read_new_icons(fname, prefix)

    for icon in images:
        global_list.append(icon)
        global_hash[icon] = 1

def chew_controlfile(fname):
    global global_list, global_hash
    filelist = []
    for line in open(fname):
        line = line.strip()
        if line.startswith('#'):
            continue
        if not line:
            continue

        m = re.match(r'-- (\S+)\s*', line)
        if m:
            # control code
            code = m.group(1)
            small = line.lower().endswith(' small')
            if code.lower() == 'group':
                if not small:
                    process_group("lc_", filelist)
                process_group ("sc_", filelist)
            elif code.lower() == 'ordered':
                if not small:
                    for f in filelist:
                        process_file(f, "lc_")
                for f in filelist:
                    process_file(f, "sc_")
            elif code.lower() == 'literal':
                for f in filelist:
                    if f not in global_hash:
                        global_list.append(f)
                        global_hash[f] = 1
            else:
                sys.exit("Unknown code '{}'".format(code))
            filelist = []
        else:
            filelist.append(line)

if len(sys.argv) == 1:
    print("image-sort <image-sort.lst> /path/to/OOOo/source/root\n")
    sys.exit(1)

# where the control file lives
control = sys.argv[1]
# where the uiconfigs live
base_path = sys.argv[2]
# output
if len(sys.argv) > 3:
    output = open(sys.argv[3], 'w')
else:
    output = sys.stdout

chew_controlfile(control)

for icon in global_list:
    if not icon.startswith('sc_'):
        output.write(icon + "\n")

for icon in global_list:
    if icon.startswith('sc_'):
        output.write(icon + "\n")

# dnl vim:set shiftwidth=4 softtabstop=4 expandtab: