summaryrefslogtreecommitdiff
path: root/emacs.d/lisp/guess-offset.el
blob: 007848f116c2fa0f9f406280fbd579d5291159ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
;;; guess-offset.el -- Automatically determine c-basic-offset

;; Copyright (C) 2003 Julian Scheid

;; Version: 0.1.1
;; Author: Julian Scheid <julian@sektor37.de>
;; Keywords: indent c-mode c-basic-offset

;; This file is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; This file is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to
;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;; A hook for C, C++ and Java modes that guesses the indentation
;; offset used in an existing C-like source code.

;; To install, add this file to one of your load directories,
;; byte-compile it and put the following at the very end of your
;; .emacs file:

;;    (require 'guess-offset)

;; When installed, guess-offset will briefly analyze every visited C
;; or C-like source code (C++ and Java, at the time) and make a
;; "guess" what indentation offset was used for creating this source.
;; If the guess is considered reliable, c-basic-offset is overridden
;; to reflect the proposed offset.

;; Guess-offset doesn't add anything to your Emacs environment, but it
;; removes something you won't miss - the hassle to manually deal with
;; source codes that are formatted with a different indentation
;; offset.  With guess-offset, Emacs will transparently adapt to
;; foreign indentation offsets.

;; It doesn't work perfectly for every possible source file.  In
;; particular, it won't work with source codes that use a single space
;; per level, and not with those that use varying indentation
;; depending on the outer construct.

;; Still, it will make dealing with a heap of source codes much
;; easier, and leave you no worse off with the remainder than before.
;; Remember, the c-basic-offset setting will only be tinkered with if
;; there is enough evidence that the guessed offset is the right one.

;; GuessOffset relies on heuristics and has a couple of variables you
;; can play with.  Please tune these settings if you are not satisfied
;; with the current behaviour.  If you think your settings are better
;; than the defaults, please send me a copy.

;; Note to users of other languages: the only reason guess-offset is
;; currently limited to C-like languages is that it can only deal with
;; C comments and expressions ("/*...*/" and "(...)", resp.).  It
;; shouldn't be too hard to extend this to work with other languages.
;; Please send a patch to the author if you do.

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;; Code:

;;;-----------------------------------------------------------------
;;; Dependencies

(require 'cc-vars)

;;;-----------------------------------------------------------------
;;; Customization Definitions:

(defgroup guess-offset nil
  "Functions for automatically determining the indentation offset
used in a buffer."
  :version "21.1"
  :group 'tools)

(defcustom guess-offset-minimum-tab-width 2
  "*The minimum offset that can be guessed for a buffer."
  :type 'number
  :group 'guess-offset)

(defcustom guess-offset-maximum-tab-width 8
  "*The maximum offset that can be guessed for a buffer."
  :type 'number
  :group 'guess-offset)

(defcustom guess-offset-minimum-line-count 8
  "*The minimum count of non-empty, indented lines a buffer must
contain for guess-offset to readjust the offset. If there are
less lines, guess-offset will leave the buffer alone."
  :type 'number
  :group 'guess-offset)

(defcustom guess-offset-minimum-probability 0.5
  "*The minimum probability a guess must have for guess-offset to
readjust the offset. If the best guess is less probably than this
number, guess-offset will leave the buffer alone."
  :type 'number
  :group 'guess-offset)

(defcustom guess-offset-minimum-superiority 2.0
  "*The minimum factor the best guess must be better than the second
best guess for guess-offset to readjust the offset. If the best guess
is not significantly better than the second best guess, guess-offset
will leave the buffer alone. Note that guesses with equal (or similar)
probability are treated as one guess in this context. This avoids
problems with offsets that are multiples of other valid offsets (like
2, 4 and 8.)"
  :type 'number
  :group 'guess-offset)

(defcustom guess-offset-probability-gravity 0.01
  "*The difference between two probability values (percentages) below
which both values are considered equal. This tolerance is necessary to
avoid a fractional offset of the real offset being preferred over the
actual offset used in the buffer. For example, if an offset of two has
the probability 64.2% and an offset of four has the probability 64.9%,
then guess-offset will assume an offset of four instead of two when
the gravity is set to 0.007 (0.7 percent) or more."
  :type 'number
  :group 'guess-offset)

(defcustom guess-offset-quiet-p nil
  "*Whether guess-offset should refrain from outputting any messages."
  :type 'boolean
  :group 'guess-offset)

(defcustom guess-offset-debug-p nil
  "*Whether guess-offset should output lots of debugging
information."
  :type 'boolean
  :group 'guess-offset)

;; The function to use for detecting the start of a multi-line
;; comment.
(defvar guess-offset-skip-comment-start
  'guess-offset-skip-c-comment-start)

;; The function to use for detecting the end of a multi-line comment.
(defvar guess-offset-skip-comment-end
  'guess-offset-skip-c-comment-end)

;;;-----------------------------------------------------------------
;;; Function Definitions:

(defun guess-offset-skip-c-comment-start (eol)
  "Skip to the next C/C++/Java comment start in this line."
  (re-search-forward "/[*]" eol t))

(defun guess-offset-skip-c-comment-end (eol)
  "Skip to the next C/C++/Java comment end in this line."
  (re-search-forward "[*]/" eol t))

(defun guess-offset-collect-indent-widths ()
  "Return a indentation histogram for the current buffer."
  (let ((tab-positions nil)
	(line-count 0)
	go-in-comment)
    (save-excursion
      (beginning-of-buffer)
      (while (re-search-forward "[^\t\r\n ]" nil t)
	(let ((column (- (current-column) 1)))
	  (when (and (> column 0)
		     (not go-in-comment))
	    (let ((tabinfo (assoc column tab-positions)))
	      (if (null tabinfo)
		  (setq tab-positions (cons (cons column 1)
					    tab-positions))
		(setcdr tabinfo
			(+ 1 (cdr tabinfo)))))
	    (setq line-count (+ line-count 1))))
	;; check whether a comment begins or ends on this line
	(let ((eol (save-excursion (end-of-line) (point))))
	  (beginning-of-line)
	  (while
	      (or
	       (and (not go-in-comment)
		    (funcall guess-offset-skip-comment-start eol)
		    (setq go-in-comment t))
	       (and (not go-in-comment)
		    (re-search-forward "(" eol t)
		    (let ((bracket-level 1))
		      (while (and (/= bracket-level 0)
				  (re-search-forward "[()]" nil t))
			(if (eq ?\( (char-before))
			    (setq bracket-level (+ 1 bracket-level))
			  (setq bracket-level (- 1 bracket-level))))))
	       (and go-in-comment
		    (funcall guess-offset-skip-comment-end eol)
		    (not (setq go-in-comment nil))))))
	(beginning-of-line 2)))
    (cons line-count tab-positions)))

(defun guess-offset-get-tab-width-linecount (tab-positions try-offset)
  "Return the number of lines the given offset is used for."
  (let ((result 0))
    (while (not (null tab-positions))
      (let ((tab-size (car (car tab-positions)))
	    (line-count (cdr (car tab-positions))))
	(when (eq 0 (mod tab-size try-offset))
	  (setq result (+ result line-count))))
      (setq tab-positions (cdr tab-positions)))
    result))


(defun guess-offset-purge-suboffsets (offset-probability-list)
  "Return a modified version of the list passed in with all offsets
removed that are dividends of another offset and have a similar
probability."
  ;; sort the list by offsets in descending order, largest offset
  ;; first.
  (setq offset-probability-list
	(sort offset-probability-list
	      '(lambda (list1 list2)
		 (> (car list1) (car list2)))))

  ;; for each offset in this list, starting with the largest...
  (let ((offset-probability-iterator offset-probability-list))
    (while offset-probability-iterator

      ;; ... remove all possible sub-offsets from the remaining part
      ;; of the list:
      (let ((offset (car (car offset-probability-iterator))))

	;; for each `sub-offset' between `offset' - 1 and
	;; `guess-offset-minimum-tab-width', including, check whether
	;; `offset' is a multiple of `sub-offset' (in decending order)
	(let ((sub-offset (- offset 1)))

	  (while (>= sub-offset guess-offset-minimum-tab-width)
	    (let ((sub-offset-probability-pair
		   (assoc sub-offset offset-probability-list)))
	      (and sub-offset-probability-pair
		   (= 0 (mod offset sub-offset))

		   ;; if so, check whether its probability is near the
		   ;; probability of the current outer offset
		   ;; iterator.
		   (< (abs (- (cdr (car offset-probability-iterator))
			      (cdr sub-offset-probability-pair)))
		      guess-offset-probability-gravity)

		   ;; if this is the case, merge both entries. set the
		   ;; probability of the current entry to the mean
		   ;; value of both probabilities ...
		   (setcdr
		    (car offset-probability-iterator)
		    (/ (+ (cdr (car offset-probability-iterator))
			  (cdr sub-offset-probability-pair))
		       2))

		   ;; ... and throw away the other.
		   (setq offset-probability-list
			 (delq sub-offset-probability-pair
			       offset-probability-list)))
	      (setq sub-offset (- sub-offset 1))))))
      (setq offset-probability-iterator
	    (cdr offset-probability-iterator))))

  ;; result is the modified list
  offset-probability-list)

(defun guess-offset-get-guess ()
  "Guess the indentation offset used by the current buffer. Return the
offest or nil if the offset could not be guessed reliably."
  (let ((collect-result (guess-offset-collect-indent-widths))
	offset-probabilities)
    (let ((tab-positions
	   (sort (cdr collect-result)
		 '(lambda (list1 list2)
		    (> (cdr list1) (cdr list2)))))
	  (try-tab-width guess-offset-minimum-tab-width)
	  (line-count (car collect-result)))
      (when guess-offset-debug-p
	(message (concat "Indentation histogram:\n"
			 (mapconcat
			  '(lambda (pair)
			     (concat "  "
				     (number-to-string (car pair))
				     "->"
				     (number-to-string (cdr pair))))
			  tab-positions
			  "\n"))))
      (when (> line-count guess-offset-minimum-line-count)
	(while (<= try-tab-width guess-offset-maximum-tab-width)
	  (let ((probability (/ (float
				 (guess-offset-get-tab-width-linecount
				  tab-positions try-tab-width))
				line-count)))
	    (setq offset-probabilities
		  (cons (cons try-tab-width probability)
			offset-probabilities))
	    (setq try-tab-width (+ 1 try-tab-width))))
	(let ((sorted-probabilities
	       (sort (guess-offset-purge-suboffsets
		      offset-probabilities)
		     '(lambda (list1 list2)
			(let ((prob1 (cdr list1))
			      (offs1 (car list1))
			      (prob2 (cdr list2))
			      (offs2 (car list2)))

			  ;; two probabilities are considered equal
			  ;; when their difference is below
			  ;; guess-offset-probability-gravity
			  ;;
			  ;; if they are equal,
			  (or (> prob1 prob2)
			      (and (= prob1 prob2)
				   (> offs1 offs2))))))))

	  (when guess-offset-debug-p
	    (message
	     (concat "Probable offsets:\n"
		     (mapconcat
		      '(lambda (pair)
			 (concat "  Offset "
				 (number-to-string (car pair))
				 " has probability "
				 (format "%f" (cdr pair))))
		      sorted-probabilities
		      "\n"))))

	  (let ((gold-offset
		 (car (nth 0 sorted-probabilities)))
		(gold-probability
		 (cdr (nth 0 sorted-probabilities)))
		(silver-probability
		 (cdr (nth 1 sorted-probabilities))))

	    (when (and (>= gold-probability
			   guess-offset-minimum-probability)
		       (or (null silver-probability)
			   (= 0 silver-probability)
			   (>= (/ (float gold-probability)
				  (float silver-probability) )
			       guess-offset-minimum-superiority)))
	      gold-offset)))))))

(defun guess-offset-set-c-basic-offset ()
  "Guess the proper c-basic-offset for the current buffer and change
it if a reliable guess could be made."
  (let ((guessed-tab-width (guess-offset-get-guess)))
    (when (not (or (null guessed-tab-width)
		   (eq guessed-tab-width c-basic-offset)))
      (setq c-basic-offset guessed-tab-width)
      (unless guess-offset-quiet-p
	(message (concat "Note: c-basic-offset adjusted to "
			 (number-to-string guessed-tab-width)
			 " for buffer "
			 (buffer-name)
			 "."))))))

;;;-----------------------------------------------------------------
;;; Installation

(defvar guess-offset-hooks
 (list 'c-mode-hook
       'c++-mode-hook
       (if (featurep 'jde)
	      'jde-mode-hook
	    'java-mode-hook)))

(mapcar '(lambda (hook-name)
	   (add-hook hook-name
		     'guess-offset-set-c-basic-offset
		     t))
	guess-offset-hooks)

(provide 'guess-offset)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; guess-offset.el ends here