#!/bin/bash # There are mostly three reasons the synapse database tends to get large over time: # # - Stuff that no longer needs to be kept around and should be deleted # - Synapse is extremely cache-happy, and this takes a lot of space # - Table bloat & Index bloat in PostgreSQL # # See also: # https://github.com/matrix-org/synapse/wiki/SQL-for-analyzing-Synapse-PostgreSQL-database-stats # Exit on first failure set -e # Adjust as needed to your own setup TOKEN="" export PGPASSWORD="" PGUSER="synapse_user" PGDB="synapse" PGHOST="localhost" FQDN="" WORKDIR=/root/matrix_cleanup SUJET="Matrix server cleanup - $(hostname -f)"; POSTMASTER="root" RESULTAT="KO" LOGFILE=/var/log/matrix/cleanup.log # Ensure dirs exist [[ -d $(dirname ${LOGFILE}) ]] || mkdir -p $(dirname ${LOGFILE}) [[ -d ${WORKDIR} ]] || mkdir -p ${WORKDIR} jnl() { echo "$(date +%c) : $1" | tee -a "${LOGFILE}" } two_months_ago() { timestamp=$(date +"%s" -d '2 months ago') echo "${timestamp}" } one_year_ago() { date +"%s%N" -d '1 year ago'|cut -b1-13 } notify() { jnl "End of programme." cat $LOGFILE | mail -s "${SUJET} : ${RESULTAT}" ${POSTMASTER} [ "${RESULTAT}" = "OK" ] && exit 0 || exit 1 } ##################################### # Main ##################################### # pre-cleanup jnl "Remove temporary files..." rm -f /opt/synapse-compress/*.sql rm -f /root/matrix_cleanup/*.txt jnl "Dumping current status on Database" biggestTables=$(echo "SELECT nspname || '.' || relname AS \"relation\", pg_size_pretty(pg_total_relation_size(C.oid)) AS "total_size" FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) WHERE nspname NOT IN ('pg_catalog', 'information_schema') AND C.relkind <> 'i' AND nspname !~ '^pg_toast' ORDER BY pg_total_relation_size(C.oid) DESC LIMIT 20;" | psql -h ${PGHOST} -d ${PGDB} -U ${PGUSER}) jnl "${biggestTables}" # Removing empty rooms jnl "Getting empty rooms..." curl -H "HOST: ${FQDN}" -H "Authorization: Bearer $TOKEN" 'http://127.0.0.1:8008/_synapse/admin/v1/rooms?limit=300' | jq '.rooms[] | select(.joined_local_members == 0) | .room_id'|sed 's/^"//;s/"$//' > ${WORKDIR}/to_purge.txt jnl < ${WORKDIR}/to_purge.txt while read room_id; do jnl "purging room ${room_id}..." #curl -w "\n" --header "Authorization: Bearer $TOKEN" -X POST -H "Content-Type: application/json" -d "{ \"room_id\": $room_id }" -H "HOST: ${FQDN}" 'http://127.0.0.1:8008/_synapse/admin/v1/purge_room' curl -w "\n" --header "Authorization: Bearer $TOKEN" -X DELETE -H "Content-Type: application/json" -d "{ \"purge\": true }" -H "HOST: ${FQDN}" "http://127.0.0.1:8008/_synapse/admin/v2/rooms/${room_id}" sleep 0.5 done < ${WORKDIR}/to_purge.txt jnl "Deleting history of more than 2 months in large rooms" curl -H "HOST: ${FQDN}" -H "Authorization: Bearer $TOKEN" 'http://127.0.0.1:8008/_synapse/admin/v1/rooms?limit=300' | jq '.rooms[] | select(.state_events > 100) | .room_id' | sed 's/\"//g' > ${WORKDIR}/history_to_purge.txt if [[ -s ${WORKDIR}/history_to_purge.txt ]]; then dateUntil=$(two_months_ago) while read room_id; do jnl "purging history for ${room_id}..." echo curl -w "\n" --header "Authorization: Bearer $TOKEN" -X POST -H "Content-Type: application/json" -d "{ \"delete_local_events\": false, \"purge_up_to_ts\": ${dateUntil} }" -H "HOST: ${FQDN}" "http://127.0.0.1:8008/_synapse/admin/v1/purge_history/$room_id" curl -w "\n" --header "Authorization: Bearer $TOKEN" -X POST -H "Content-Type: application/json" -d "{ \"delete_local_events\": false, \"purge_up_to_ts\": ${dateUntil} }" -H "HOST: ${FQDN}" "http://127.0.0.1:8008/_synapse/admin/v1/purge_history/$room_id" sleep 0.5 done < ${WORKDIR}/history_to_purge.txt fi echo curl -X POST -H "HOST: ${FQDN}" -H "Authorization: Bearer $TOKEN" "http://127.0.0.1:8008/_synapse/admin/v1/purge_media_cache?before_ts=$(one_year_ago)" curl -X POST -H "HOST: ${FQDN}" -H "Authorization: Bearer $TOKEN" "http://127.0.0.1:8008/_synapse/admin/v1/purge_media_cache?before_ts=$(one_year_ago)" jnl "Optimizing synapse cache for rooms with more than 100 000 state changes" # Get the list of rooms with more than 99 999 state groups events jnl "Gathering list" psql -h "${PGHOST}" -d "${PGDB}" -U "${PGUSER}" -t -c "SELECT room_id, count(*) AS count FROM state_groups_state GROUP BY room_id HAVING count(*) > 99999 ORDER BY count DESC;" | sed -r 's/\s//g' | egrep -v '^$' |awk -F "|" '{print $1}' > ${WORKDIR}/to_compress.txt jnl "Prepare compression files state-compressor" cpt=1 while read room_id; do # Could we remove the password / is it using .pgpass? echo room_id ${room_id} /usr/local/bin/synapse-compress-state -t -o /opt/synapse-compress/state-compressor_${cpt}.sql -p "host=${PGHOST} user=${PGUSER} password=${PGPASSWORD} dbname=${PGDB}" -r "$room_id" ((cpt++)) sleep 0.5 done < ${WORKDIR}/to_compress.txt jnl "Running the compressions..." for file in `ls /opt/synapse-compress/state-compressor*.sql`; do jnl "Compressing ${file}" psql -h ${PGHOST} -d ${PGDB} -U ${PGUSER} < ${file} sleep 0.5 done # If needed, or to do once in a while # REINDEX (VERBOSE) DATABASE matrix_prod; # VACUUM FULL VERBOSE state_groups_state; RESULTAT="OK" notify exit 0