summaryrefslogtreecommitdiff
path: root/TODO
blob: 31da25b5f70c712d2b8910387ecab51d25a72e48 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
- handle the case where objects are just '/'. Currently they fail in invoke
  with "failed to find object ''"

nul_dbus_service_start() should likely take a data parameter to
distinguish multiple instances of the same service. Method callbacks
will need to know this information. How does this work given the
"callback, data" religion?

Maybe callbacks for methods should be hooked after the service has
been started? That way they can get data pointers that may not be
available when the service starts.

Alternatively, maybe it should be possible to check whether a service
can be started without actually starting it. Ie., 

    if (!service_available())
    {
	App *...

	start ();
    }
    else
    {
	invoke();
    }

This is racy though.

Another possibility would be to say "start_service()" and if that is
successful, then add the objects afterwards. For now, this is what we
are doing.

Callbacks should religiously be on the "callback, data," form to make
them bindable in languages with closures. (Broken at the moment for
objects).

Is this actually sufficient? Destroy notifiers would also be
necessary. Maybe languages just need to suck it up and generate
trampolines. We still need to know about "data" arguments so that we
can ignore them both in the trampoline and in the bound API.

Would it be possible to also use tramplines in C? Ie., have a macro

	NUL_CALLBACK (function, data)

that would return a single function to call? Event code would look like this then

	nuuk_connect (button, "clicked", NUL_CALLBACK (function, button, x, y, z));

and

	nuuk_connect (button, "clicked", NUL_CALLBACK (function, NUL_INT (x), NUL_INT (y)));

But there would need to be a way to specify where extra information
passed to the function would go. Maybe that would just always go first. Ie., 

	nuuk_connect (button, "clicked", NUL_CALLBACK (on_clicked));

	...;

	void
	on_clicked (void)
	{
	    printf ("clicked\n");
	}	

Would require a dependency on macro varargs and on libffi closures.

libffi closures are not a problem. They are supported for all
relevant, and several irrelevant, architectures. Macro varargs - the
main issue might be msvc. Actually, it doesn't have to be a macro, it
could be a vararg function. It would require some way to specify the
end of the argument list though. Or just give the number of arguments:

    NUL_CALLBACK (function, 2, NUL_INT (x), NUL_INT (y));

If the NUL_INT constructors return a pointer, it would be easy to
check at runtime that the number of args is not an insanely big number
(say > 100 or < 0).

If all callbacks are defined as trampolines, then it is possible to
have them be a struct with an optional destroy notifier embedded in
it. Ie., the NUL_CALLBACK would do something like this:

    nul_callback_info_t *info = malloc (sizeof (...));
    info->f = make_closure (f, ...);
    info->destroy = free_callback_info

where free_callback_info would free the closure and the callback info
plus any other house keeping.

Actually, we probably don't even need libffi closures. Instead
NUL_CLOSURE() creates a nul_closure_t that contains the function, the
types and the arguments. The caller of the closure is then responsible
for supplying extra arguments and for destroying it.

There will need to be functions

      nul_create_closure (function, n_args, ...);

      /* These functions would take care of storing/overwriting/destroying */
      nul_closure_own (closure **closure, closure *closure);
      nul_closure_disown (closure **closure);

      /* tie the lifetime of the closure to the object */
      nul_object_own_closure (object, closure **closure);

      /* Takes any additional argument. These are added to the front
       * of the parameter list followed by all the other
       */
      nul_closure_call (closure, ...);

Could the "own" functions take additional types and then return a
function that could be called directly? 


Type systems:

- sfile		array, record, union, double, names,

- invoke	basic types, no names

- dbus		array, record, double, names 

- introspection 

Would be nice to unify some of these.

For sfile, the names for records, unions and lists serve two purposes:
(1) it is the name of the corresponding variable, and (2) it is the
name of the type for the purposes of pointer types.

dbus types are used directly with invoke, but for now arrays and
records are not supported. These will need to be converted to pointers
for invoke.


Dbus:

Note that proxy_invoke() will have to contain a description of the
types, because the alternative is to believe the XML from the service,
which would mean a malicious service could make us send random stuff
from the stack.

Write tool that reads the introspection XML and generates service
code.

Can we use the same type system for dbus and sfile? The sfile type
system is basically name/type pairs, which is approximately what dbus
needs as well.

For dbus we will need structs of basic types, arrays of basic types,
and probably arrays of structs of basic types. Arrays will be
represented in C as "int n; const type *array". I suppose it would be
possible to generate the fully nested types, but it's pretty crackful
that that's even supported in dbus.

For the introspection types, we don't need the names, so those types
will probably have to be specified in some other ways. One question is
if it is only the specification or also the actual types that are
different.

Or possibly make sfile use anonymous types.

Why do we not need names for the introspection types? The parameter
names would be quite useful actually.


Things from glib:

  - Consistent API between things like GCheckSum and GBase64.
    GCheckSum looks better to me offhand. Note, these should also be
    able to take nul_byte_queue_t's (or nul_strings())

       nul_checksum_t
       nul_base64_t
       nul_iconv_t (iconv wrapper)

    Consider making them objects inheriting some sort of stream
    interface. This could also be done separately, and in a way that
    would allow errors to propagate. Ie., build a stream out of
    smaller streams.

    	nul_compound_stream (
            nul_convert_stream ("LATIN-1", "UTF-8"),
            nul_checksum_new (G_CHECK_SUM_SHA1),
	    NULL);

	nul_object_connect (stream, "event", on_event, NULL);

	nul_stream_feed (stream, "asdfasdf");
	nul_stream_end (stream);

    The event system here could really benefit from extensible unions
    so that you can have inherited streams with more complex events.

  - Note GData is used in GObject, and by GScanner, which is also used
    for various things. GHook is also used in GObject (signals), plus,
    weirdly, an enum from ghook.h is used in gmain.c. Also, gpattern
    is used in gtestutils.c

  - These things should be improved:

	  - ghash.h		open addressing, no primes
	  - gmain.h		simpler API, better threads
	    			Better child watch
	  - gmarkup.h		cut and paste expat
	  - grand.h		Delete the context version
	  - gsequence.h		Should gain aggregators
	  - gstring.h		Should probably be replaced with byte array 
	    			(which in turn must get equivalent API to
				 gstring)
				Is GStringChunk useful? probably not
          - gthreadpool.h	maybe (replace with executor?)
          - gasyncqueue.h	probably
	  - G*Array		Direct pointers, byte queue

  - New things:

    	  - sparse table	maybe
	  - byte queue
	  - sfile
	  - block sequence	maybe, or could become default for gsequence.
	  - FreeList		maybe
	  - watch API

  - GObject should go away, replaced by new, simpler object system
    (See below)

  - Note: GIO depends on GObject, so the right order is:
    - Add new object system
    - Port GIO to it
    - delete GObject

  - GIO in separate library?

  - GThread integrated into main library, and simplified

  - Careful API review of what remains

  - These things are crack:

      - gbacktrace.h	though G_BREAKPOINT is useful
      - gdataset.h
      - ghook.h
      - giochannel.h
      - gnode.h
      - gpattern.h
      - gprimes.h
      - gquark.h		possibly
      - gslice.h
      - gscanner.h
      - galloca.h
      - gmem.h		the malloc wrappers are useful, chunks most go
      - gcache.h
      - gcompletion.h
      - gtree.h
      - grel.h

-=-=- Notes on main loop

New, even simpler design

Basically, having one main thread is good enough. JobQueues/MainContexts
can be implemented on top of that. Executor could be called ThreadPool.
JobQueues should have a reference to an executor. 

	  job_queue_set_read (job_queue_t *job, int fd, read_func, data);
	  job_queue_set_write (job_queue_t *job, ...);
	  job_queue_add_idle/timeout().

Whenever something is queued up for a job queue, the jobqueue's
run-next time is computed, and if it needs to run at some point, it is
added to the executor along with the timeout. The executor will
maintain a sorted list of things that need to run (sorted by when they
need to run). When there is nothing to do it waits for that timeout to
expire (or for something to happen), otherwise it schedules things in
order.

When something is added that should run "as soon as possible", it is
added with the current time. This ensures that timeouts etc. will
eventually run (because their deadline will eventually be in the
past).

So when an fd fires for a job queue, this will happen:

   1. callback is added to the job queue's queue. (Insert sorted with
      current time as deadline).

   2. Current run-time is computed, and the job queue is added to the
      executor.

The job queue queue probably needs its own mutex, separate from the
job queue's.


New, simpler design:

There is an 'Engine' which is both Executor and Mainloop. It uses an
Epoll object to handle the polling. This epoll object does not do any
locking by itself.

Contexts can be created; they are like jobqueues, except they also
allow you to add fd's and idles and timeouts. They can do that since
they have access to the engine. These are completely unlocked; if you
want to access a context from more than one thread, you must lock them
yourself. It is guaranteed that only one callback will be active for a
context at the same time. From within that callback it is then safe to
remove other callbacks etc. In general such a context can act pretty
much like one thread.

A web server will consist of a listening context, which will create
other contexts as necessary and add the fd's to them with the
appropriate callback. Or maybe just call back with the new fd. I guess
there should be a generic listener object that will create its own
context and call back. Then an http module can just create a listener
and do whatever it wants in response.

One possible problem. The thread that creates a context and adds the
fd not necessarily the one that will execute the first callback. Can
these interfere? Maybe the solution is just that the creating thread
should not do anything with the context after it adds the fd.  A
better idea may be to just have a context_dispatch() function that you
must call after creating the context. Until this is called, nothing
the context is not active.

Beginning of this new design is implemented and can be built with
build2.sh.



Old design (referenced above)

Highlevel design for a web server:

        - Executor
                takes callbacks and executes them, possibly in parallell
                or out of order. The callbacks are likely to be called in
                a different thread than the one that queued them.

		Should support both push_back() and push_front(). Push_front()
		is needed to do parallel requests with good latency. If 
		a request can be parallellized, the parallel tasks should
		be put at the front of the queue.

        - EPoll
                Simple class that wraps epoll()/poll(). Note: must support
		oneshot polling and rearming.

        - JobQueue
                Maintain a queue of jobs, uses executor to execute them
                in order. Two jobs in the same JobQueue will never run
                at the same time.

        - Mainloop
                Uses executor, epoll.
                - Takes filedescriptors and callbacks. calls
                        back when descriptor is readable/writable/etc.
		- The callbacks are put on a job queue, which is passed in
		- Each callback is a oneshot - ie., after calling, the
		  filedescriptor is not polled again until it is rearmed.
		  The mainloop will have a method to rearm filedescriptors.
		  This ensures that the mainloop can start a new poll()
		  whenever it wants without waiting for all the callbacks
		  to finish.
                - Also takes timeouts that can be canceled. Timeouts also
		  need to be put on a queue.

		Sketch of implementation:

			polljob()
			{
				timeout = compute min_timeout ();
				if (timeout > 0)
					poll_armed_describtors(timeout);
				call all timeouts (ie., put them on queues);
				call all callbacks (ie., put them on queues);
				schedule (polljob);
			}

	- MainContext

		- The thing clients will deal with

		- Uses the main loop

		- Filedescriptors
			maincontext is responsible for rearming the
			descriptor after the client callback has been
			called.

		- Timeouts

		- Idle handlers

		- Everything associated with a main context happens 
		  serialized - ie., as if only one thread executed it.

		- This means a client structure doesn't need to be locked.

		- Has a get_executor() method so that clients can parallelize
		  if they want to.

		- Current thinking is that if we have main contexts, who
		  really needs a main loop? The only thing you could do
		  with it is to pass it to main contexts.

		  OTOH that's true of several of the objects here.
		  see notes at top of maincontext.c

		- Worth noting that stuff that has to be passed in to
		  create a client object must be available to the 
		  listener callback. But see http_server.c for an example

		- Err, the epoll in itself is not enough for maincontext,
		  since it needs to be shared between maincontexts. Ie., 
		  who would call epoll_wait(). We do really need a main
		  loop that will call back.

        - Listener
                Listens on a port. Calls back with a file descriptor when
		someone connects.

	- Connection
		initialized with a MainContext and a filedescriptor.
		Creates events when something happens

	- Http:
		Has a Connection
		parses http, emits events such as
			"get hostname pagename query"
			"post etc etc etc"
	
	- ContentProvider




-=-=- Notes on object/component system:

/*
  signals	(no class handlers, no return values).

  nul_object_emit (object, "birnan", x, y, z);
  nul_object_block (object, "birnan");
  nul_object_unblock (object, "birnan");

/  nul_object_connect (object, "birnan", on_birnan, DATA);
  
*/

const nul_type_t *
nul_toggle_button_get_type(void)
{
    return nul_define_class (
	"nul_toggle_button_t",
	nul_extends (nul_fish_get_type()),
	nul_implements (nul_scrollable_get_type()),
	nul_doc (
	    nul_doc_since (2, 14, 0),
	    nul_doc_class_blurb (
		"A type of button that retains its state"),
	    nul_doc_class_long ("				\
Yeah, it's going to be quite annoying to type this stuff asd	\
But then, it's hopefully not that much documentation that must  \
be written this way.						\
								\
<p>A better scheme may be to have ToggleButton.txt that are run	\
through a preprocesser which generates a header file which	\
defines a macro TOGGLE_BUTTON_LONG_DOC_STRING, which we then	\
just use here.							\
								\
<p>Or maybe writing documentation as C strings is not		\
<em>that</em> bad -- the main issue is likely going to be	\
changing existing documentation and manually putting in		\
newlines. Maybe we don't actually need newlines. We can just	\
require &lt;p&gt; tags.						\
								\
<p>Yeah, not that bad, actually.				\
								\
<example>							\
<title>Creating two <structname>GtkToggleButton</structname>	\
widgets.</title>						\
<programlisting>						\
void make_toggles (void) {					\
   GtkWidget *dialog, *toggle1, *toggle2;			\

   dialog = gtk_dialog_new (<!-- -->);				\
   toggle1 = gtk_toggle_button_new_with_label (\"Hi, i'm a toggle button.\"); \
								\
   /* Makes this toggle button invisible */				\
   gtk_toggle_button_set_mode (GTK_TOGGLE_BUTTON (toggle1), TRUE);	\
									\
   g_signal_connect (toggle1, \"toggled\",				\
                     G_CALLBACK (output_state), NULL);			\
   gtk_box_pack_start (GTK_BOX (GTK_DIALOG (dialog)->action_area),	\
                       toggle1, FALSE, FALSE, 2);			\
									\
   toggle2 = gtk_toggle_button_new_with_label (\"Hi, i'm another toggle button.\"); \
   gtk_toggle_button_set_mode (GTK_TOGGLE_BUTTON (toggle2), FALSE);	\
   g_signal_connect (toggle2, \"toggled\",				\
                     G_CALLBACK (output_state), NULL);			\
   gtk_box_pack_start (GTK_BOX (GTK_DIALOG (dialog)->action_area),	\
                       toggle2, FALSE, FALSE, 2);			\
									\
   gtk_widget_show_all (dialog);					\
}									\
									\
</programlisting>							\
</example>								\
									\
<p>Ok, that's more painful, especially since you don't get any		\
syntax highlighting. It also distracts from the rest of the class	\
definition.								\
									\
Also, MSVC does not support longer literals than 2048; an ANSI C	\
is only required to support 509 characters.				\
									\
So a preprocessor that generates a macro that expands to a comma	\
separate list of literals.						\
"),
	    NULL),
	    NULL);
	nul_define_constructor (
	    ...);
	nul_define_property_rw ("fish", nul_type_int32_limits (0, 0, 200)),
	nul_define_property_r ("moose", nul_type_int32()),
	nul_define_property_cr ("fwr", nul_type_string()),
	nul_define_method (
	    "set_label",
	    nul_return_type (NUL_TYPE_VOID),
	    nul_arg_in ("label", NUL_TYPE_LABEL),
	    nul_arg_out ("size", NUL_TYPE_INT),
	    nul_arg_in ("fisH", NUL_TYPE_FISH),
	    NULL);
	nul_define_method (
	    "birnan",
	    NUL_TYPE_INT,
	    nul_type_closure (
		NUL_TYPE_VOID,
		NUL_TYPE_INT,
		NUL_TYPE_DOUBLE,
		NULL)),
	nul_define_virtual_method (
	    "..."),
	nul_define_signal (
	    "toggled",
	    NULL);
	NULL);
}

nul_toggle_button_t *
nul_toggle_button_new_with_label (const char *label)
{
    return nul_object_new (nul_toggle_button_get_type(), NULL);
}
	
/* ... */
const nul_introspect_info_t *
nul_introspect (void)
{
    /* This should be updated everytime
     * new types are added
     */
    return nul_introspection_info (
	nul_introspect_types (
	    nul_object_get_type,
	    nul_toggle_button_get_type,
	    nul_int_get_type,
	    nul_rectangle_get_type,
	    NULL),
	nul_introspect_functions (
	    ...));
}


types:
	objects
	structs		(represented as pointers)
	unions?		(tagged unions, that can extend other unions,
			 useful for extensible events, represented as
			 pointers, but contents is a full type. Ie.,
			 a union of structs will be a pointer to
			 a (tag + struct)).
			 If we add them, make sure they can represent a
			 dbus variant.
        basic types
	arrays		(represented in C as an integer and pointer to 
			 contents; or as a pointer to nul_queue? with
			 similar semantics as boxed types)

	You only get arrays of basic types probably.