I wrote a complete solution that uses recursion and does it off space key. Even though the original question was asking about how to do it off of only the title, I thought I'd show how it would all work in a single, compact script.
This script will iterate through an entire space, given its space key, and then print out the title of every page and child page.
import json
import requests
import builtins
class list(list):
def __init__(self, *args):
super().__init__(args)
def print(self):
for i in self:
print(f"{i}")
def append_unique(self, item):
if item not in self:
self.append(item)
class Requests:
def __init__(self, requests_username, requests_secret_file_name, requests_url_root):
self.session = requests.Session()
self.session.auth = (requests_username, self.load_password(requests_secret_file_name))
self.url_root = requests_url_root
@staticmethod
def load_password(file_name):
with open(f"{file_name}") as f: contents = f.read()
return contents
def get_top_level_space_content(self, space_key):
url = f"{self.url_root}/rest/api/content?spaceKey={space_key}"
response = self.session.get(url)
return str(response.text)
class Parser:
def __init__(self, parser_requests):
self.page_names = list()
self.page_ids = list()
self.parser_requests = parser_requests
def extract_list_of_page_ids(self, content):
as_json = json.loads(content)
content_list = dict(as_json).get('results')
if content_list is None:
return
for c_l in content_list:
if c_l.get('type') == 'page':
self.page_titles.append_unique(c_l.get("title"))
self.page_ids.append_unique(c_l.get("id"))
self.extract_list_of_page_ids(
self.parser_requests.session.get(f'{requests.url_root}/rest/api/content/search?cql=parent='
f'{c_l.get("id")}').text)
return
if __name__ == "__main__":
# I wrote this with a .gitignore that ignores *.secret files
# I recommend using an access token and not a password
username, secret_file_name, url_root = "username", \
"file_containing_password.secret", \
"https://confluence-wiki-root.com"
requests = Requests(username, secret_file_name, url_root)
parser = Parser(requests)
space_content = parser.parser_requests.get_top_level_space_content("SPACE-KEY")
parser.extract_list_of_page_ids(space_content)
parser.page_names.print()
- NOTE 1: Works with on-prem Confluence 7.7.4
- NOTE 2: I will be the first to admit, it's not the fastest thing I've ever written, but it was a first pass. Maybe I can optimize later.