#!/bin/sh
#******************************* License GPLv3 ********************************
# Factory_paquets_de_CRIs_gen : Factory to build one paquet-de-CRIs from the website www.les-cris.com, with a timestamp, and and a SHA256 signature of the package.
# Copyright (C) 2022  Antoine Herzog <info -!at-arobase=! les-cris -!dot-point=! com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
#******************************************************************************

# This script cleans the Httracks addons that it put in the files.
# NOTE : this scripts must have these variables yet set : 
# "TCHA" <=> "TO_CLEAN_HTTRACK_ADDONS"  (TCHA)
# DEST_OUTPUT_FOLDER_TO_CLEAN_HTTRACK_ADDONS_NAME_GEN
# LOG_TCHA_MAINFOLDER
# LOG_TCHA_GEN_PREFIX_FILE_NAME
# LOG_TCHA_ERR_PREFIX_FILE_NAME
# LOG_TCHA_DATETIMESTAMP

# This script removes these marks :
# <!-- Mirrored from www.mywebsite.com/mywebpage.php by HTTrack Website Copier/3.x [XR&CO'2014], Fri, 24 Jul 2020 20:43:37 GMT -->
# <!-- Added by HTTrack --><meta http-equiv="content-type" content="text/html;charset=UTF-8" /><!-- /Added by HTTrack -->
# See :
# The best solution : https://stackoverflow.com/questions/7995205/how-to-delete-html-meta-tag-using-sed
#  https://quantumwarp.com/kb/articles/29-general/600-remove-added-by-httrack
# https://forum.httrack.com/readmsg/29619/index.html
# http://forum.httrack.com/readmsg/19565/index.html

# This uses these commands.
# grep -lr -e 'index' *.html | xargs sed -i .bak -e 's#<!-- Added by HTTrack --><meta http-equiv="content-type" content="text/html;charset=UTF-8"><!-- /Added by HTTrack -->##g'
# 
# This will only work however if the files you want to modify must contain index. If you want to modify all index.html files under the current directory and its subdirectories use this:
# find . -name 'index.html' | xargs sed -i .bak -e 's#<!-- Added by HTTrack --><meta http-equiv="content-type" content="text/html;charset=UTF-8"><!-- /Added by HTTrack -->##g'
# 
# Either way, the important thing was to replace / with # in the sed's s command. 

SCRIPTCTXT_DIRNAME_ORIGINAL=`pwd`
# SCRIPTCTXT_DIRNAME_ORIGINAL=`pwd`

logging_string_to_logfile()
{
echo "${1}" >> "${LOG_TCHA_FOLDER_AND_FILENAME_TO_USE}"
}
logging_string_to_err_logfile()
{
echo "${1}" >> "${LOG_TCHA_FOLDER_AND_FILENAME_ERR_TO_USE}"
}


# Check the variable of log folder is set.
if [ "x$LOG_TCHA_MAINFOLDER" = "x" ]
then
	echo ""
	echo "!!!!!!!!!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!!!!!"
	echo "Cannot clean the files in the folder ! (for Httrack addons in files)"
	echo "The variable LOG_TCHA_MAINFOLDER is null or empty."
	echo "It should contain the folder path to log into."
	echo "!!!!!!!!!!!!!!!!!!!! END ERROR !!!!!!!!!!!!!!!!!!!!"
	echo ""
	echo ""
	exit -12
fi

# The Log files
LOG_TCHA_FOLDER_AND_FILENAME_TO_USE="$LOG_TCHA_MAINFOLDER/$LOG_TCHA_GEN_PREFIX_FILE_NAME-$LOG_TCHA_DATETIMESTAMP.log"
LOG_TCHA_FOLDER_AND_FILENAME_ERR_TO_USE="$LOG_TCHA_MAINFOLDER/$LOG_TCHA_ERR_PREFIX_FILE_NAME-$LOG_TCHA_DATETIMESTAMP.log"

# Create the log directory.
if [ ! -e "$LOG_TCHA_MAINFOLDER" ]		# Check Log Directory exists.
	then
	mkdir -p "$LOG_TCHA_MAINFOLDER"
	chmod u+rwx "$LOG_TCHA_MAINFOLDER"
	chmod o-rwx "$LOG_TCHA_MAINFOLDER"
	chmod g-rwx "$LOG_TCHA_MAINFOLDER"
fi

# Redirection of sdtout and stderr to logging file, with 2>&1. Logging within script :
# See : https://www.baeldung.com/linux/exec-command-in-shell-script#1-logging-within-scripts
exec 1>>${LOG_TCHA_FOLDER_AND_FILENAME_TO_USE}
exec 2>&1

# Check the variable folder is set.
if [ "x$DEST_OUTPUT_FOLDER_TO_CLEAN_HTTRACK_ADDONS_NAME_GEN" = "x" ]
then
	logging_string_to_err_logfile ""
	logging_string_to_err_logfile "!!!!!!!!!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!!!!!"
	logging_string_to_err_logfile "Cannot clean the files in the folder ! (for Httrack addons in files)"
	logging_string_to_err_logfile "The variable DEST_OUTPUT_FOLDER_TO_CLEAN_HTTRACK_ADDONS_NAME_GEN is null or empty."
	logging_string_to_err_logfile "It should contain the folder path to be clean httrack addons in."
	logging_string_to_err_logfile "!!!!!!!!!!!!!!!!!!!! END ERROR !!!!!!!!!!!!!!!!!!!!"
	logging_string_to_err_logfile ""
	logging_string_to_err_logfile ""
	exit -12
fi


# Build the commands string.
cd ${DEST_OUTPUT_FOLDER_TO_CLEAN_HTTRACK_ADDONS_NAME_GEN}

# With grep, but it does not work and seems difficult to debug.
# COMMAND_TCHA_20_META_ADDED_GEN="grep -lr -e 'Added by HTTrack' *  | xargs sed -i.bak -e 's#<!-- Added by HTTrack --><meta http-equiv=""content-type"" content=""text/html;charset=UTF-8"" /><!-- /Added by HTTrack -->##g'"
# COMMAND_TCHA_10_COMMENT_MIRRORED_BY_GEN="grep -lr -e 'Mirrored' *  | xargs sed -i.bak -e 's#<!-- Mirrored from*GMT -->##g'"

# With find.
COMMAND_TCHA_20_META_ADDED_GEN="find . -name '*.html' | xargs sed -i -e 's#<!-- Added by HTTrack --><meta http-equiv=\"content-type\" content=\"text/html;charset=UTF-8\" /><!-- /Added by HTTrack -->##g'"
COMMAND_TCHA_10_COMMENT_MIRRORED_BY_GEN="find . -name '*.html' | xargs sed -i -e 's#<!-- Mirrored from.*GMT -->##g'"


# Add the log file options
# COMMAND_GEN="$COMMAND_GEN --file-log=""$LOG_HTTRACK_ERR_FULLPATH_AND_FILE_NAME"""

logging_string_to_logfile 
logging_string_to_logfile ========================================================================================
logging_string_to_logfile "Start cleaning the HTTrack addons (meta, comments) :"
logging_string_to_logfile
logging_string_to_logfile "OutPut Folder :"
logging_string_to_logfile "DEST_OUTPUT_FOLDER_TO_CLEAN_HTTRACK_ADDONS_NAME_GEN              = $DEST_OUTPUT_FOLDER_TO_CLEAN_HTTRACK_ADDONS_NAME_GEN"
logging_string_to_logfile "Logs :"
logging_string_to_logfile "LOG_TCHA_MAINFOLDER              = $LOG_TCHA_MAINFOLDER"
logging_string_to_logfile "LOG_TCHA_GEN_PREFIX_FILE_NAME    = $LOG_TCHA_GEN_PREFIX_FILE_NAME"
logging_string_to_logfile "LOG_TCHA_ERR_PREFIX_FILE_NAME    = $LOG_TCHA_ERR_PREFIX_FILE_NAME"
logging_string_to_logfile "LOG_TCHA_DATETIMESTAMP           = $LOG_TCHA_DATETIMESTAMP"

logging_string_to_logfile
logging_string_to_logfile "COMMAND_TCHA_10_COMMENT_MIRRORED_BY_GEN                = $COMMAND_TCHA_10_COMMENT_MIRRORED_BY_GEN"
logging_string_to_logfile
logging_string_to_logfile "COMMAND_TCHA_20_META_ADDED_GEN                = $COMMAND_TCHA_20_META_ADDED_GEN"
logging_string_to_logfile ========================================================================================
logging_string_to_logfile

# Use eval, here, ... and not exec.
# This is because of the way the shell escape the chars, with the find command.
# See : https://ma.ttias.be/find-in-bash-scripting-paths-must-precede-expression/
eval ${COMMAND_TCHA_10_COMMENT_MIRRORED_BY_GEN}
eval ${COMMAND_TCHA_20_META_ADDED_GEN}

logging_string_to_logfile 
logging_string_to_logfile ========================================================================================
logging_string_to_logfile "End of cleaning the HTTrack addons (meta, comments)."
logging_string_to_logfile ========================================================================================
logging_string_to_logfile


cd ${SCRIPTCTXT_DIRNAME_ORIGINAL}

